* feat: build index & search with bleve (#1740) * delete unused struct Co-authored-by: Noah Hsu <i@nn.ci>
This commit is contained in:
10
internal/bootstrap/index.go
Normal file
10
internal/bootstrap/index.go
Normal file
@ -0,0 +1,10 @@
|
||||
package bootstrap
|
||||
|
||||
import (
|
||||
"github.com/alist-org/alist/v3/internal/conf"
|
||||
"github.com/alist-org/alist/v3/internal/index"
|
||||
)
|
||||
|
||||
func InitIndex() {
|
||||
index.Init(&conf.Conf.IndexDir)
|
||||
}
|
@ -45,11 +45,13 @@ type Config struct {
|
||||
Database Database `json:"database"`
|
||||
Scheme Scheme `json:"scheme"`
|
||||
TempDir string `json:"temp_dir" env:"TEMP_DIR"`
|
||||
IndexDir string `json:"index_dir" env:"INDEX_DIR"`
|
||||
Log LogConfig `json:"log"`
|
||||
}
|
||||
|
||||
func DefaultConfig() *Config {
|
||||
tempDir := filepath.Join(flags.DataDir, "temp")
|
||||
indexDir := filepath.Join(flags.DataDir, "index")
|
||||
logPath := filepath.Join(flags.DataDir, "log/log.log")
|
||||
dbPath := filepath.Join(flags.DataDir, "data.db")
|
||||
return &Config{
|
||||
@ -64,6 +66,7 @@ func DefaultConfig() *Config {
|
||||
TablePrefix: "x_",
|
||||
DBFile: dbPath,
|
||||
},
|
||||
IndexDir: indexDir,
|
||||
Log: LogConfig{
|
||||
Enable: true,
|
||||
Name: logPath,
|
||||
|
106
internal/index/build.go
Normal file
106
internal/index/build.go
Normal file
@ -0,0 +1,106 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/alist-org/alist/v3/internal/db"
|
||||
"github.com/alist-org/alist/v3/internal/fs"
|
||||
"github.com/alist-org/alist/v3/internal/model"
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// walkFS traverses filesystem fs starting at name up to depth levels.
|
||||
//
|
||||
// walkFS will stop when current depth > `depth`. For each visited node,
|
||||
// walkFS calls walkFn. If a visited file system node is a directory and
|
||||
// walkFn returns path.SkipDir, walkFS will skip traversal of this node.
|
||||
func walkFS(ctx context.Context, depth int, name string, info model.Obj, walkFn func(reqPath string, info model.Obj, err error) error) error {
|
||||
// This implementation is based on Walk's code in the standard path/path package.
|
||||
walkFnErr := walkFn(name, info, nil)
|
||||
if walkFnErr != nil {
|
||||
if info.IsDir() && walkFnErr == filepath.SkipDir {
|
||||
return nil
|
||||
}
|
||||
return walkFnErr
|
||||
}
|
||||
if !info.IsDir() || depth == 0 {
|
||||
return nil
|
||||
}
|
||||
meta, _ := db.GetNearestMeta(name)
|
||||
// Read directory names.
|
||||
objs, err := fs.List(context.WithValue(ctx, "meta", meta), name)
|
||||
if err != nil {
|
||||
return walkFnErr
|
||||
}
|
||||
for _, fileInfo := range objs {
|
||||
filename := path.Join(name, fileInfo.GetName())
|
||||
if err := walkFS(ctx, depth-1, filename, fileInfo, walkFn); err != nil {
|
||||
if err == filepath.SkipDir {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
Path string
|
||||
}
|
||||
|
||||
func BuildIndex(ctx context.Context, indexPaths, ignorePaths []string, maxDepth int) {
|
||||
WriteProgress(&Progress{
|
||||
FileCount: 0,
|
||||
IsDone: false,
|
||||
LastDoneTime: nil,
|
||||
})
|
||||
var batchs []*bleve.Batch
|
||||
var fileCount uint64 = 0
|
||||
for _, indexPath := range indexPaths {
|
||||
batch := func() *bleve.Batch {
|
||||
batch := index.NewBatch()
|
||||
// TODO: cache unchanged part
|
||||
// TODO: store current progress
|
||||
walkFn := func(indexPath string, info model.Obj, err error) error {
|
||||
for _, avoidPath := range ignorePaths {
|
||||
if indexPath == avoidPath {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
}
|
||||
if !info.IsDir() {
|
||||
batch.Index(uuid.NewString(), Data{Path: indexPath})
|
||||
fileCount += 1
|
||||
if fileCount%100 == 0 {
|
||||
WriteProgress(&Progress{
|
||||
FileCount: fileCount,
|
||||
IsDone: false,
|
||||
LastDoneTime: nil,
|
||||
})
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
fi, err := fs.Get(ctx, indexPath)
|
||||
if err != nil {
|
||||
return batch
|
||||
}
|
||||
// TODO: run walkFS concurrently
|
||||
walkFS(ctx, maxDepth, indexPath, fi, walkFn)
|
||||
return batch
|
||||
}()
|
||||
batchs = append(batchs, batch)
|
||||
}
|
||||
for _, batch := range batchs {
|
||||
index.Batch(batch)
|
||||
}
|
||||
now := time.Now()
|
||||
WriteProgress(&Progress{
|
||||
FileCount: fileCount,
|
||||
IsDone: true,
|
||||
LastDoneTime: &now,
|
||||
})
|
||||
}
|
30
internal/index/index.go
Normal file
30
internal/index/index.go
Normal file
@ -0,0 +1,30 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var index bleve.Index
|
||||
|
||||
func Init(indexPath *string) {
|
||||
fileIndex, err := bleve.Open(*indexPath)
|
||||
if err == bleve.ErrorIndexPathDoesNotExist {
|
||||
log.Infof("Creating new index...")
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
fileIndex, err = bleve.New(*indexPath, indexMapping)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
index = fileIndex
|
||||
progress := ReadProgress()
|
||||
if !progress.IsDone {
|
||||
log.Warnf("Last index build does not succeed!")
|
||||
WriteProgress(&Progress{
|
||||
FileCount: progress.FileCount,
|
||||
IsDone: false,
|
||||
LastDoneTime: nil,
|
||||
})
|
||||
}
|
||||
}
|
19
internal/index/search.go
Normal file
19
internal/index/search.go
Normal file
@ -0,0 +1,19 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func Search(queryString string, size int) (*bleve.SearchResult, error) {
|
||||
query := bleve.NewMatchQuery(queryString)
|
||||
search := bleve.NewSearchRequest(query)
|
||||
search.Size = size
|
||||
search.Fields = []string{"Path"}
|
||||
searchResults, err := index.Search(search)
|
||||
if err != nil {
|
||||
log.Errorf("search error: %+v", err)
|
||||
return nil, err
|
||||
}
|
||||
return searchResults, nil
|
||||
}
|
46
internal/index/util.go
Normal file
46
internal/index/util.go
Normal file
@ -0,0 +1,46 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/alist-org/alist/v3/internal/conf"
|
||||
"github.com/alist-org/alist/v3/pkg/utils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type Progress struct {
|
||||
FileCount uint64 `json:"file_count"`
|
||||
IsDone bool `json:"is_done"`
|
||||
LastDoneTime *time.Time `json:"last_done_time"`
|
||||
}
|
||||
|
||||
func ReadProgress() Progress {
|
||||
progressFilePath := filepath.Join(conf.Conf.IndexDir, "progress.json")
|
||||
_, err := os.Stat(progressFilePath)
|
||||
progress := Progress{0, false, nil}
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
if !utils.WriteJsonToFile(progressFilePath, progress) {
|
||||
log.Fatalf("failed to create index progress file")
|
||||
}
|
||||
}
|
||||
progressBytes, err := os.ReadFile(progressFilePath)
|
||||
if err != nil {
|
||||
log.Fatalf("reading index progress file error: %+v", err)
|
||||
}
|
||||
err = utils.Json.Unmarshal(progressBytes, &progress)
|
||||
if err != nil {
|
||||
log.Fatalf("load index progress error: %+v", err)
|
||||
}
|
||||
return progress
|
||||
}
|
||||
|
||||
func WriteProgress(progress *Progress) {
|
||||
progressFilePath := filepath.Join(conf.Conf.IndexDir, "progress.json")
|
||||
log.Infof("write index progress: %v", progress)
|
||||
if !utils.WriteJsonToFile(progressFilePath, progress) {
|
||||
log.Fatalf("failed to write to index progress file")
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user