feat: multiple search indexes (#2514)

* refactor: abstract search interface

* wip: ~

* fix cycle import

* objs update hook

* wip: ~

* Delete search/none

* auto update index while cache changed

* db searcher

TODO: bleve init issue

cannot open index, metadata missing

* fix size type

why float64??

* fix typo

* fix nil pointer using

* api adapt ui

* bleve: fix clear & change struct
This commit is contained in:
Noah Hsu
2022-11-28 13:45:25 +08:00
committed by GitHub
parent bb969d8dc6
commit ddcba93eea
43 changed files with 855 additions and 350 deletions

View File

@ -0,0 +1,38 @@
package bleve
import (
"github.com/alist-org/alist/v3/internal/conf"
"github.com/alist-org/alist/v3/internal/search/searcher"
"github.com/blevesearch/bleve/v2"
log "github.com/sirupsen/logrus"
)
var config = searcher.Config{
Name: "bleve",
}
func Init(indexPath *string) (bleve.Index, error) {
log.Debugf("bleve path: %s", *indexPath)
fileIndex, err := bleve.Open(*indexPath)
if err == bleve.ErrorIndexPathDoesNotExist {
log.Infof("Creating new index...")
indexMapping := bleve.NewIndexMapping()
fileIndex, err = bleve.New(*indexPath, indexMapping)
if err != nil {
return nil, err
}
} else if err != nil {
return nil, err
}
return fileIndex, nil
}
func init() {
searcher.RegisterSearcher(config, func() (searcher.Searcher, error) {
b, err := Init(&conf.Conf.BleveDir)
if err != nil {
return nil, err
}
return &Bleve{BIndex: b}, nil
})
}

View File

@ -0,0 +1,90 @@
package bleve
import (
"context"
"os"
"github.com/alist-org/alist/v3/internal/conf"
"github.com/alist-org/alist/v3/internal/errs"
"github.com/alist-org/alist/v3/internal/model"
"github.com/alist-org/alist/v3/internal/search/searcher"
"github.com/alist-org/alist/v3/pkg/utils"
"github.com/blevesearch/bleve/v2"
search2 "github.com/blevesearch/bleve/v2/search"
"github.com/google/uuid"
log "github.com/sirupsen/logrus"
)
type Bleve struct {
BIndex bleve.Index
}
func (b *Bleve) Config() searcher.Config {
return config
}
func (b *Bleve) Search(ctx context.Context, req model.SearchReq) ([]model.SearchNode, int64, error) {
query := bleve.NewMatchQuery(req.Keywords)
query.SetField("name")
search := bleve.NewSearchRequest(query)
search.Size = req.PerPage
search.Fields = []string{"*"}
searchResults, err := b.BIndex.Search(search)
if err != nil {
log.Errorf("search error: %+v", err)
return nil, 0, err
}
res, err := utils.SliceConvert(searchResults.Hits, func(src *search2.DocumentMatch) (model.SearchNode, error) {
return model.SearchNode{
Parent: src.Fields["parent"].(string),
Name: src.Fields["name"].(string),
IsDir: src.Fields["is_dir"].(bool),
Size: int64(src.Fields["size"].(float64)),
}, nil
})
return res, int64(len(res)), nil
}
func (b *Bleve) Index(ctx context.Context, parent string, obj model.Obj) error {
return b.BIndex.Index(uuid.NewString(), model.SearchNode{
Parent: parent,
Name: obj.GetName(),
IsDir: obj.IsDir(),
Size: obj.GetSize(),
})
}
func (b *Bleve) Get(ctx context.Context, parent string) ([]model.SearchNode, error) {
return nil, errs.NotSupport
}
func (b *Bleve) Del(ctx context.Context, prefix string) error {
return errs.NotSupport
}
func (b *Bleve) Release(ctx context.Context) error {
if b.BIndex != nil {
return b.BIndex.Close()
}
return nil
}
func (b *Bleve) Clear(ctx context.Context) error {
err := b.Release(ctx)
if err != nil {
return err
}
log.Infof("Removing old index...")
err = os.RemoveAll(conf.Conf.BleveDir)
if err != nil {
log.Errorf("clear bleve error: %+v", err)
}
bIndex, err := Init(&conf.Conf.BleveDir)
if err != nil {
return err
}
b.BIndex = bIndex
return nil
}
var _ searcher.Searcher = (*Bleve)(nil)

100
internal/search/build.go Normal file
View File

@ -0,0 +1,100 @@
package search
import (
"context"
"path"
"path/filepath"
"time"
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/internal/fs"
"github.com/alist-org/alist/v3/internal/model"
log "github.com/sirupsen/logrus"
)
var (
Running = false
)
func BuildIndex(ctx context.Context, indexPaths, ignorePaths []string, maxDepth int, count bool) error {
var objCount uint64 = 0
Running = true
var (
err error
fi model.Obj
)
defer func() {
Running = false
now := time.Now()
eMsg := ""
if err != nil {
log.Errorf("build index error: %+v", err)
eMsg = err.Error()
} else {
log.Infof("success build index, count: %d", objCount)
}
if count {
WriteProgress(&model.IndexProgress{
ObjCount: objCount,
IsDone: err == nil,
LastDoneTime: &now,
Error: eMsg,
})
}
}()
admin, err := db.GetAdmin()
if err != nil {
return err
}
if count {
WriteProgress(&model.IndexProgress{
ObjCount: 0,
IsDone: false,
})
}
for _, indexPath := range indexPaths {
walkFn := func(indexPath string, info model.Obj, err error) error {
for _, avoidPath := range ignorePaths {
if indexPath == avoidPath {
return filepath.SkipDir
}
}
// ignore root
if indexPath == "/" {
return nil
}
err = instance.Index(ctx, path.Dir(indexPath), info)
if err != nil {
return err
} else {
objCount++
}
if objCount%100 == 0 {
log.Infof("index obj count: %d", objCount)
log.Debugf("current success index: %s", indexPath)
if count {
WriteProgress(&model.IndexProgress{
ObjCount: objCount,
IsDone: false,
LastDoneTime: nil,
})
}
}
return nil
}
fi, err = fs.Get(ctx, indexPath)
if err != nil {
return err
}
// TODO: run walkFS concurrently
err = fs.WalkFS(context.WithValue(ctx, "user", admin), maxDepth, indexPath, fi, walkFn)
if err != nil {
return err
}
}
return nil
}
func Clear(ctx context.Context) error {
return instance.Clear(ctx)
}

View File

@ -0,0 +1,16 @@
package db
import (
"github.com/alist-org/alist/v3/internal/search/searcher"
)
var config = searcher.Config{
Name: "database",
AutoUpdate: true,
}
func init() {
searcher.RegisterSearcher(config, func() (searcher.Searcher, error) {
return &DB{}, nil
})
}

View File

@ -0,0 +1,46 @@
package db
import (
"context"
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/internal/model"
"github.com/alist-org/alist/v3/internal/search/searcher"
)
type DB struct{}
func (D DB) Config() searcher.Config {
return config
}
func (D DB) Search(ctx context.Context, req model.SearchReq) ([]model.SearchNode, int64, error) {
return db.SearchNode(req)
}
func (D DB) Index(ctx context.Context, parent string, obj model.Obj) error {
return db.CreateSearchNode(&model.SearchNode{
Parent: parent,
Name: obj.GetName(),
IsDir: obj.IsDir(),
Size: obj.GetSize(),
})
}
func (D DB) Get(ctx context.Context, parent string) ([]model.SearchNode, error) {
return db.GetSearchNodesByParent(parent)
}
func (D DB) Del(ctx context.Context, prefix string) error {
return db.DeleteSearchNodesByParent(prefix)
}
func (D DB) Release(ctx context.Context) error {
return nil
}
func (D DB) Clear(ctx context.Context) error {
return db.ClearSearchNodes()
}
var _ searcher.Searcher = (*DB)(nil)

View File

@ -0,0 +1,6 @@
package search
import (
_ "github.com/alist-org/alist/v3/internal/search/bleve"
_ "github.com/alist-org/alist/v3/internal/search/db"
)

View File

@ -0,0 +1,36 @@
package search
import (
"context"
"github.com/alist-org/alist/v3/internal/conf"
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/internal/model"
"github.com/alist-org/alist/v3/internal/setting"
"github.com/alist-org/alist/v3/pkg/utils"
log "github.com/sirupsen/logrus"
)
func Progress(ctx context.Context) (*model.IndexProgress, error) {
p := setting.GetStr(conf.IndexProgress)
var progress model.IndexProgress
err := utils.Json.UnmarshalFromString(p, &progress)
return &progress, err
}
func WriteProgress(progress *model.IndexProgress) {
p, err := utils.Json.MarshalToString(progress)
if err != nil {
log.Errorf("marshal progress error: %+v", err)
}
err = db.SaveSettingItem(model.SettingItem{
Key: conf.IndexProgress,
Value: p,
Type: conf.TypeText,
Group: model.SINGLE,
Flag: model.PRIVATE,
})
if err != nil {
log.Errorf("save progress error: %+v", err)
}
}

54
internal/search/search.go Normal file
View File

@ -0,0 +1,54 @@
package search
import (
"context"
"fmt"
"github.com/alist-org/alist/v3/internal/conf"
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/internal/model"
"github.com/alist-org/alist/v3/internal/search/searcher"
log "github.com/sirupsen/logrus"
)
var instance searcher.Searcher = nil
// Init or reset index
func Init(mode string) error {
if instance != nil {
err := instance.Release(context.Background())
if err != nil {
log.Errorf("release instance err: %+v", err)
}
instance = nil
}
if Running {
return fmt.Errorf("index is running")
}
if mode == "none" {
log.Warnf("not enable search")
return nil
}
s, ok := searcher.NewMap[mode]
if !ok {
return fmt.Errorf("not support index: %s", mode)
}
i, err := s()
if err != nil {
log.Errorf("init searcher error: %+v", err)
} else {
instance = i
}
return err
}
func Search(ctx context.Context, req model.SearchReq) ([]model.SearchNode, int64, error) {
return instance.Search(ctx, req)
}
func init() {
db.RegisterSettingItemHook(conf.SearchIndex, func(item *model.SettingItem) error {
log.Debugf("searcher init, mode: %s", item.Value)
return Init(item.Value)
})
}

View File

@ -0,0 +1,9 @@
package searcher
type New func() (Searcher, error)
var NewMap = map[string]New{}
func RegisterSearcher(config Config, searcher New) {
NewMap[config.Name] = searcher
}

View File

@ -0,0 +1,29 @@
package searcher
import (
"context"
"github.com/alist-org/alist/v3/internal/model"
)
type Config struct {
Name string
AutoUpdate bool
}
type Searcher interface {
// Config of the searcher
Config() Config
// Search specific keywords in specific path
Search(ctx context.Context, req model.SearchReq) ([]model.SearchNode, int64, error)
// Index obj with parent
Index(ctx context.Context, parent string, obj model.Obj) error
// Get by parent
Get(ctx context.Context, parent string) ([]model.SearchNode, error)
// Del with prefix
Del(ctx context.Context, prefix string) error
// Release resource
Release(ctx context.Context) error
// Clear all index
Clear(ctx context.Context) error
}

73
internal/search/update.go Normal file
View File

@ -0,0 +1,73 @@
package search
import (
"context"
"path"
"github.com/alist-org/alist/v3/internal/model"
"github.com/alist-org/alist/v3/internal/op"
mapset "github.com/deckarep/golang-set/v2"
log "github.com/sirupsen/logrus"
)
func Update(parent string, objs []model.Obj) {
if instance != nil && !instance.Config().AutoUpdate {
return
}
ctx := context.Background()
// only update when index have built
progress, err := Progress(ctx)
if err != nil {
log.Errorf("update search index error while get progress: %+v", err)
return
}
if !progress.IsDone {
return
}
nodes, err := instance.Get(ctx, parent)
if err != nil {
log.Errorf("update search index error while get nodes: %+v", err)
return
}
now := mapset.NewSet[string]()
for i := range objs {
now.Add(objs[i].GetName())
}
old := mapset.NewSet[string]()
for i := range nodes {
old.Add(nodes[i].Name)
}
// delete data that no longer exists
toDelete := old.Difference(now)
toAdd := now.Difference(old)
for i := range nodes {
if toDelete.Contains(nodes[i].Name) {
err = instance.Del(ctx, path.Join(parent, nodes[i].Name))
if err != nil {
log.Errorf("update search index error while del old node: %+v", err)
return
}
}
}
for i := range objs {
if toAdd.Contains(objs[i].GetName()) {
err = instance.Index(ctx, parent, objs[i])
if err != nil {
log.Errorf("update search index error while index new node: %+v", err)
return
}
// build index if it's a folder
if objs[i].IsDir() {
err = BuildIndex(ctx, []string{path.Join(parent, objs[i].GetName())}, nil, -1, false)
if err != nil {
log.Errorf("update search index error while build index: %+v", err)
return
}
}
}
}
}
func init() {
op.RegisterObjsUpdateHook(Update)
}