mirror of
https://github.com/anyproto/anytype-heart.git
synced 2025-06-11 02:13:41 +09:00
commit
fef51f1aa5
5 changed files with 143 additions and 131 deletions
|
@ -39,7 +39,7 @@ const (
|
|||
CName = "indexer"
|
||||
|
||||
// increasing counters below will trigger existing account to reindex their data
|
||||
ForceThreadsObjectsReindexCounter int32 = 5 // reindex thread-based objects
|
||||
ForceThreadsObjectsReindexCounter int32 = 6 // reindex thread-based objects
|
||||
ForceFilesReindexCounter int32 = 5 // reindex ipfs-file-based objects
|
||||
ForceBundledObjectsReindexCounter int32 = 3 // reindex objects like anytypeProfile
|
||||
ForceIdxRebuildCounter int32 = 12 // erases localstore indexes and reindex all type of objects (no need to increase ForceThreadsObjectsReindexCounter & ForceFilesReindexCounter)
|
||||
|
@ -49,7 +49,8 @@ const (
|
|||
var log = logging.Logger("anytype-doc-indexer")
|
||||
|
||||
var (
|
||||
ftIndexInterval = 10 * time.Second
|
||||
ftIndexInterval = time.Minute
|
||||
ftIndexForceMinInterval = time.Second * 10
|
||||
)
|
||||
|
||||
func New() Indexer {
|
||||
|
@ -57,6 +58,7 @@ func New() Indexer {
|
|||
}
|
||||
|
||||
type Indexer interface {
|
||||
ForceFTIndex()
|
||||
app.ComponentRunnable
|
||||
}
|
||||
|
||||
|
@ -96,6 +98,7 @@ type indexer struct {
|
|||
archivedMap map[string]struct{}
|
||||
favoriteMap map[string]struct{}
|
||||
newAccount bool
|
||||
forceFt chan struct{}
|
||||
}
|
||||
|
||||
func (i *indexer) Init(a *app.App) (err error) {
|
||||
|
@ -112,7 +115,7 @@ func (i *indexer) Init(a *app.App) (err error) {
|
|||
i.quit = make(chan struct{})
|
||||
i.archivedMap = make(map[string]struct{}, 100)
|
||||
i.favoriteMap = make(map[string]struct{}, 100)
|
||||
|
||||
i.forceFt = make(chan struct{})
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -165,6 +168,13 @@ func (i *indexer) Run() (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
func (i *indexer) ForceFTIndex() {
|
||||
select {
|
||||
case i.forceFt <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func (i *indexer) migrateRemoveNonindexableObjects() {
|
||||
ids, err := i.getIdsForTypes(
|
||||
smartblock.SmartblockTypeMarketplaceType, smartblock.SmartblockTypeMarketplaceRelation,
|
||||
|
@ -728,7 +738,7 @@ func (i *indexer) index(ctx context.Context, info doc.DocInfo) error {
|
|||
func (i *indexer) ftLoop() {
|
||||
ticker := time.NewTicker(ftIndexInterval)
|
||||
i.ftIndex()
|
||||
|
||||
var lastForceIndex time.Time
|
||||
i.mu.Lock()
|
||||
quit := i.quit
|
||||
i.mu.Unlock()
|
||||
|
@ -738,6 +748,11 @@ func (i *indexer) ftLoop() {
|
|||
return
|
||||
case <-ticker.C:
|
||||
i.ftIndex()
|
||||
case <-i.forceFt:
|
||||
if time.Since(lastForceIndex) > ftIndexForceMinInterval {
|
||||
i.ftIndex()
|
||||
lastForceIndex = time.Now()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package core
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/anytypeio/go-anytype-middleware/core/indexer"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -110,7 +111,9 @@ func (mw *Middleware) ObjectSearch(req *pb.RpcObjectSearchRequest) *pb.RpcObject
|
|||
}
|
||||
|
||||
at := mw.app.MustComponent(core.CName).(core.Service)
|
||||
|
||||
if req.FullText != "" {
|
||||
mw.app.MustComponent(indexer.CName).(indexer.Indexer).ForceFTIndex()
|
||||
}
|
||||
records, _, err := at.ObjectStore().Query(nil, database.Query{
|
||||
Filters: req.Filters,
|
||||
Sorts: req.Sorts,
|
||||
|
|
1
go.mod
1
go.mod
|
@ -11,6 +11,7 @@ require (
|
|||
github.com/anytypeio/go-slip21 v0.0.0-20200218204727-e2e51e20ab51
|
||||
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de
|
||||
github.com/blevesearch/bleve/v2 v2.3.0 // indirect
|
||||
github.com/blevesearch/bleve_index_api v1.0.1 // indirect
|
||||
github.com/cheggaaa/mb v1.0.3
|
||||
github.com/dave/jennifer v1.4.1
|
||||
github.com/dgraph-io/badger v1.6.2
|
||||
|
|
|
@ -1,26 +1,34 @@
|
|||
package ftsearch
|
||||
|
||||
import (
|
||||
"github.com/anytypeio/go-anytype-middleware/app"
|
||||
"github.com/anytypeio/go-anytype-middleware/core/wallet"
|
||||
"github.com/anytypeio/go-anytype-middleware/metrics"
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
||||
"github.com/blevesearch/bleve/v2/analysis/lang/en"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
|
||||
"github.com/anytypeio/go-anytype-middleware/app"
|
||||
"github.com/anytypeio/go-anytype-middleware/core/wallet"
|
||||
"github.com/anytypeio/go-anytype-middleware/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
CName = "fts"
|
||||
ftsDir = "fts"
|
||||
ftsVer = "1"
|
||||
)
|
||||
|
||||
type SearchDoc struct {
|
||||
Id string
|
||||
Title string
|
||||
Text string
|
||||
Id string
|
||||
Title string
|
||||
TitleNoTerms string
|
||||
Text string
|
||||
}
|
||||
|
||||
func New() FTSearch {
|
||||
|
@ -37,13 +45,17 @@ type FTSearch interface {
|
|||
}
|
||||
|
||||
type ftSearch struct {
|
||||
path string
|
||||
index bleve.Index
|
||||
rootPath string
|
||||
ftsPath string
|
||||
index bleve.Index
|
||||
enStopWordsMap map[string]bool
|
||||
}
|
||||
|
||||
func (f *ftSearch) Init(a *app.App) (err error) {
|
||||
repoPath := a.MustComponent(wallet.CName).(wallet.Wallet).RepoPath()
|
||||
f.path = filepath.Join(repoPath, ftsDir)
|
||||
f.rootPath = filepath.Join(repoPath, ftsDir)
|
||||
f.ftsPath = filepath.Join(repoPath, ftsDir, ftsVer)
|
||||
f.enStopWordsMap, _ = en.TokenMapConstructor(nil, nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -52,60 +64,131 @@ func (f *ftSearch) Name() (name string) {
|
|||
}
|
||||
|
||||
func (f *ftSearch) Run() (err error) {
|
||||
f.index, err = bleve.Open(f.path)
|
||||
f.index, err = bleve.Open(f.ftsPath)
|
||||
if err == bleve.ErrorIndexPathDoesNotExist || err == bleve.ErrorIndexMetaMissing {
|
||||
mapping := bleve.NewIndexMapping()
|
||||
if f.index, err = bleve.New(f.path, mapping); err != nil {
|
||||
if f.index, err = bleve.New(f.ftsPath, f.makeMapping()); err != nil {
|
||||
return
|
||||
}
|
||||
// cleanup old indexes
|
||||
if strings.HasSuffix(f.rootPath, ftsDir) {
|
||||
de, e := os.ReadDir(f.rootPath)
|
||||
if e == nil {
|
||||
// cleanup old index versions
|
||||
for _, d := range de {
|
||||
if d.Name() != ftsVer {
|
||||
os.RemoveAll(filepath.Join(f.rootPath, d.Name()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if err != nil {
|
||||
return
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *ftSearch) makeMapping() mapping.IndexMapping {
|
||||
mapping := bleve.NewIndexMapping()
|
||||
|
||||
keywordMapping := bleve.NewTextFieldMapping()
|
||||
keywordMapping.Analyzer = "noTerms"
|
||||
|
||||
mapping.DefaultMapping.AddFieldMappingsAt("TitleNoTerms", keywordMapping)
|
||||
mapping.DefaultMapping.AddFieldMappingsAt("Id", keywordMapping)
|
||||
|
||||
standardMapping := bleve.NewTextFieldMapping()
|
||||
standardMapping.Analyzer = standard.Name
|
||||
mapping.DefaultMapping.AddFieldMappingsAt("Title", standardMapping)
|
||||
mapping.DefaultMapping.AddFieldMappingsAt("Text", standardMapping)
|
||||
|
||||
mapping.AddCustomAnalyzer("noTerms",
|
||||
map[string]interface{}{
|
||||
"type": custom.Name,
|
||||
"tokenizer": single.Name,
|
||||
"token_filters": []string{
|
||||
lowercase.Name,
|
||||
},
|
||||
})
|
||||
return mapping
|
||||
}
|
||||
|
||||
func (f *ftSearch) Index(d SearchDoc) (err error) {
|
||||
metrics.ObjectFTUpdatedCounter.Inc()
|
||||
d.TitleNoTerms = d.Title
|
||||
return f.index.Index(d.Id, d)
|
||||
}
|
||||
|
||||
func (f *ftSearch) Search(text string) (results []string, err error) {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
var queries = make([]query.Query, 0, 4)
|
||||
terms := append([]string{text}, strings.Split(text, " ")...)
|
||||
termsFiltered := terms[:0]
|
||||
|
||||
// id match
|
||||
if len(text) > 10 {
|
||||
im := bleve.NewMatchQuery(text)
|
||||
im.SetField("Id")
|
||||
im.SetBoost(30)
|
||||
queries = append(queries, im)
|
||||
for _, t := range terms {
|
||||
t = strings.TrimSpace(t)
|
||||
if t != "" && !f.enStopWordsMap[t] {
|
||||
termsFiltered = append(termsFiltered, t)
|
||||
}
|
||||
}
|
||||
terms = termsFiltered
|
||||
|
||||
var exactQueries = make([]query.Query, 0, 4)
|
||||
// id match
|
||||
if len(text) > 5 {
|
||||
im := bleve.NewDocIDQuery([]string{text})
|
||||
im.SetBoost(30)
|
||||
exactQueries = append(exactQueries, im)
|
||||
}
|
||||
// title prefix
|
||||
tp := bleve.NewPrefixQuery(text)
|
||||
tp.SetField("Title")
|
||||
tp.SetField("TitleNoTerms")
|
||||
tp.SetBoost(40)
|
||||
queries = append(queries, tp)
|
||||
exactQueries = append(exactQueries, tp)
|
||||
|
||||
// title substr
|
||||
tss := bleve.NewWildcardQuery("*" + strings.ReplaceAll(text, "*", `\*`) + "*")
|
||||
tss.SetField("Title")
|
||||
tss.SetField("TitleNoTerms")
|
||||
tss.SetBoost(8)
|
||||
queries = append(queries, tss)
|
||||
// title match
|
||||
tm := bleve.NewMatchQuery(text)
|
||||
tm.SetFuzziness(1)
|
||||
tm.SetField("Title")
|
||||
tm.SetBoost(7)
|
||||
queries = append(queries, tm)
|
||||
// text match
|
||||
txtm := bleve.NewMatchQuery(text)
|
||||
txtm.SetFuzziness(0)
|
||||
txtm.SetField("Text")
|
||||
queries = append(queries, txtm)
|
||||
exactQueries = append(exactQueries, tss)
|
||||
|
||||
sr := bleve.NewSearchRequest(bleve.NewDisjunctionQuery(queries...))
|
||||
var notExactQueriesGroup = make([]query.Query, 0, 5)
|
||||
for i, t := range terms {
|
||||
// fulltext queries
|
||||
var notExactQueries = make([]query.Query, 0, 3)
|
||||
tp = bleve.NewPrefixQuery(t)
|
||||
tp.SetField("Title")
|
||||
if i == 0 {
|
||||
tp.SetBoost(8)
|
||||
}
|
||||
notExactQueries = append(notExactQueries, tp)
|
||||
|
||||
// title match
|
||||
tm := bleve.NewMatchQuery(t)
|
||||
tm.SetFuzziness(1)
|
||||
tm.SetField("Title")
|
||||
if i == 0 {
|
||||
tm.SetBoost(7)
|
||||
}
|
||||
notExactQueries = append(notExactQueries, tm)
|
||||
|
||||
// text match
|
||||
txtm := bleve.NewMatchQuery(t)
|
||||
txtm.SetFuzziness(0)
|
||||
txtm.SetField("Text")
|
||||
if i == 0 {
|
||||
txtm.SetBoost(2)
|
||||
}
|
||||
notExactQueries = append(notExactQueries, txtm)
|
||||
notExactQueriesGroup = append(notExactQueriesGroup, bleve.NewDisjunctionQuery(notExactQueries...))
|
||||
}
|
||||
|
||||
//exactQueries = []query.Query{bleve.NewDisjunctionQuery(notExactQueriesGroup...)}
|
||||
exactQueries = append(exactQueries, bleve.NewConjunctionQuery(notExactQueriesGroup...))
|
||||
|
||||
sr := bleve.NewSearchRequest(bleve.NewDisjunctionQuery(exactQueries...))
|
||||
sr.Size = 100
|
||||
sr.Explain = true
|
||||
res, err := f.index.Search(sr)
|
||||
//fmt.Println(res.String())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
|
|
@ -52,93 +52,3 @@ func TestNewFTSearch(t *testing.T) {
|
|||
assert.Len(t, res, 1)
|
||||
ft.Close()
|
||||
}
|
||||
|
||||
func TestFtSearch_Search(t *testing.T) {
|
||||
tmpDir, _ := ioutil.TempDir("", "")
|
||||
fixture := newFixture(tmpDir, t)
|
||||
ft := fixture.ft
|
||||
defer ft.Close()
|
||||
var docs = [...]SearchDoc{
|
||||
{
|
||||
Id: "1",
|
||||
Title: "First one",
|
||||
Text: "one two two",
|
||||
},
|
||||
{
|
||||
Id: "2",
|
||||
Title: "Second two",
|
||||
Text: "one two three",
|
||||
},
|
||||
{
|
||||
Id: "3",
|
||||
Title: "Third three",
|
||||
Text: "some text with 3",
|
||||
},
|
||||
{
|
||||
Id: "4",
|
||||
Title: "Fours four",
|
||||
Text: "some text with four and some text five",
|
||||
},
|
||||
{
|
||||
Id: "5",
|
||||
Title: "Fives five",
|
||||
Text: "some text with five and one and two ans rs",
|
||||
},
|
||||
{
|
||||
Id: "6",
|
||||
Title: "Rs six some",
|
||||
Text: "some text with six",
|
||||
},
|
||||
{
|
||||
Id: "somelongidentifier",
|
||||
},
|
||||
{
|
||||
Id: "eczq5t",
|
||||
Title: "FERRARI styling CENter with somethinglong ",
|
||||
},
|
||||
}
|
||||
for _, d := range docs {
|
||||
require.NoError(t, ft.Index(d))
|
||||
}
|
||||
|
||||
searches := [...]struct {
|
||||
Query string
|
||||
Result []string
|
||||
}{
|
||||
{
|
||||
"one",
|
||||
[]string{"1", "2", "5"},
|
||||
},
|
||||
{
|
||||
"rs",
|
||||
[]string{"6", "1", "4", "5"},
|
||||
},
|
||||
{
|
||||
"two",
|
||||
[]string{"2", "1", "5"},
|
||||
},
|
||||
{
|
||||
"six",
|
||||
[]string{"6"},
|
||||
},
|
||||
{
|
||||
"some text",
|
||||
[]string{"4", "3", "6", "5"},
|
||||
},
|
||||
{
|
||||
"somelongidentifier",
|
||||
[]string{"somelongidentifier"},
|
||||
},
|
||||
{
|
||||
"FeRRa",
|
||||
[]string{"eczq5t"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, st := range searches {
|
||||
ids, err := ft.Search(st.Query)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, st.Result, ids, st.Query)
|
||||
t.Logf("%s:\t%v\t%v", st.Query, ids, st.Result)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue