feat: build index & search with bleve (close #1740 pr #2386)

* feat: build index & search with bleve (#1740)

* delete unused struct

Co-authored-by: Noah Hsu <i@nn.ci>
This commit is contained in:
BoYanZh
2022-11-24 11:46:47 +08:00
committed by GitHub
parent 2b902de6fd
commit 330a767fd7
11 changed files with 346 additions and 0 deletions

View File

@ -0,0 +1,10 @@
package bootstrap
import (
"github.com/alist-org/alist/v3/internal/conf"
"github.com/alist-org/alist/v3/internal/index"
)
func InitIndex() {
index.Init(&conf.Conf.IndexDir)
}

View File

@ -45,11 +45,13 @@ type Config struct {
Database Database `json:"database"`
Scheme Scheme `json:"scheme"`
TempDir string `json:"temp_dir" env:"TEMP_DIR"`
IndexDir string `json:"index_dir" env:"INDEX_DIR"`
Log LogConfig `json:"log"`
}
func DefaultConfig() *Config {
tempDir := filepath.Join(flags.DataDir, "temp")
indexDir := filepath.Join(flags.DataDir, "index")
logPath := filepath.Join(flags.DataDir, "log/log.log")
dbPath := filepath.Join(flags.DataDir, "data.db")
return &Config{
@ -64,6 +66,7 @@ func DefaultConfig() *Config {
TablePrefix: "x_",
DBFile: dbPath,
},
IndexDir: indexDir,
Log: LogConfig{
Enable: true,
Name: logPath,

106
internal/index/build.go Normal file
View File

@ -0,0 +1,106 @@
package index
import (
"context"
"path"
"path/filepath"
"time"
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/internal/fs"
"github.com/alist-org/alist/v3/internal/model"
"github.com/blevesearch/bleve/v2"
"github.com/google/uuid"
)
// walkFS traverses filesystem fs starting at name up to depth levels.
//
// walkFS will stop when current depth > `depth`. For each visited node,
// walkFS calls walkFn. If a visited file system node is a directory and
// walkFn returns path.SkipDir, walkFS will skip traversal of this node.
func walkFS(ctx context.Context, depth int, name string, info model.Obj, walkFn func(reqPath string, info model.Obj, err error) error) error {
// This implementation is based on Walk's code in the standard path/path package.
walkFnErr := walkFn(name, info, nil)
if walkFnErr != nil {
if info.IsDir() && walkFnErr == filepath.SkipDir {
return nil
}
return walkFnErr
}
if !info.IsDir() || depth == 0 {
return nil
}
meta, _ := db.GetNearestMeta(name)
// Read directory names.
objs, err := fs.List(context.WithValue(ctx, "meta", meta), name)
if err != nil {
return walkFnErr
}
for _, fileInfo := range objs {
filename := path.Join(name, fileInfo.GetName())
if err := walkFS(ctx, depth-1, filename, fileInfo, walkFn); err != nil {
if err == filepath.SkipDir {
break
}
return err
}
}
return nil
}
type Data struct {
Path string
}
func BuildIndex(ctx context.Context, indexPaths, ignorePaths []string, maxDepth int) {
WriteProgress(&Progress{
FileCount: 0,
IsDone: false,
LastDoneTime: nil,
})
var batchs []*bleve.Batch
var fileCount uint64 = 0
for _, indexPath := range indexPaths {
batch := func() *bleve.Batch {
batch := index.NewBatch()
// TODO: cache unchanged part
// TODO: store current progress
walkFn := func(indexPath string, info model.Obj, err error) error {
for _, avoidPath := range ignorePaths {
if indexPath == avoidPath {
return filepath.SkipDir
}
}
if !info.IsDir() {
batch.Index(uuid.NewString(), Data{Path: indexPath})
fileCount += 1
if fileCount%100 == 0 {
WriteProgress(&Progress{
FileCount: fileCount,
IsDone: false,
LastDoneTime: nil,
})
}
}
return nil
}
fi, err := fs.Get(ctx, indexPath)
if err != nil {
return batch
}
// TODO: run walkFS concurrently
walkFS(ctx, maxDepth, indexPath, fi, walkFn)
return batch
}()
batchs = append(batchs, batch)
}
for _, batch := range batchs {
index.Batch(batch)
}
now := time.Now()
WriteProgress(&Progress{
FileCount: fileCount,
IsDone: true,
LastDoneTime: &now,
})
}

30
internal/index/index.go Normal file
View File

@ -0,0 +1,30 @@
package index
import (
"github.com/blevesearch/bleve/v2"
log "github.com/sirupsen/logrus"
)
var index bleve.Index
func Init(indexPath *string) {
fileIndex, err := bleve.Open(*indexPath)
if err == bleve.ErrorIndexPathDoesNotExist {
log.Infof("Creating new index...")
indexMapping := bleve.NewIndexMapping()
fileIndex, err = bleve.New(*indexPath, indexMapping)
if err != nil {
log.Fatal(err)
}
}
index = fileIndex
progress := ReadProgress()
if !progress.IsDone {
log.Warnf("Last index build does not succeed!")
WriteProgress(&Progress{
FileCount: progress.FileCount,
IsDone: false,
LastDoneTime: nil,
})
}
}

19
internal/index/search.go Normal file
View File

@ -0,0 +1,19 @@
package index
import (
"github.com/blevesearch/bleve/v2"
log "github.com/sirupsen/logrus"
)
func Search(queryString string, size int) (*bleve.SearchResult, error) {
query := bleve.NewMatchQuery(queryString)
search := bleve.NewSearchRequest(query)
search.Size = size
search.Fields = []string{"Path"}
searchResults, err := index.Search(search)
if err != nil {
log.Errorf("search error: %+v", err)
return nil, err
}
return searchResults, nil
}

46
internal/index/util.go Normal file
View File

@ -0,0 +1,46 @@
package index
import (
"errors"
"os"
"path/filepath"
"time"
"github.com/alist-org/alist/v3/internal/conf"
"github.com/alist-org/alist/v3/pkg/utils"
log "github.com/sirupsen/logrus"
)
type Progress struct {
FileCount uint64 `json:"file_count"`
IsDone bool `json:"is_done"`
LastDoneTime *time.Time `json:"last_done_time"`
}
func ReadProgress() Progress {
progressFilePath := filepath.Join(conf.Conf.IndexDir, "progress.json")
_, err := os.Stat(progressFilePath)
progress := Progress{0, false, nil}
if errors.Is(err, os.ErrNotExist) {
if !utils.WriteJsonToFile(progressFilePath, progress) {
log.Fatalf("failed to create index progress file")
}
}
progressBytes, err := os.ReadFile(progressFilePath)
if err != nil {
log.Fatalf("reading index progress file error: %+v", err)
}
err = utils.Json.Unmarshal(progressBytes, &progress)
if err != nil {
log.Fatalf("load index progress error: %+v", err)
}
return progress
}
func WriteProgress(progress *Progress) {
progressFilePath := filepath.Join(conf.Conf.IndexDir, "progress.json")
log.Infof("write index progress: %v", progress)
if !utils.WriteJsonToFile(progressFilePath, progress) {
log.Fatalf("failed to write to index progress file")
}
}