mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-11 23:25:41 +00:00
Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
63845ad211 | ||
|
|
4fad5e5d42 | ||
|
|
726eae5e97 | ||
|
|
2d85baa960 | ||
|
|
fe2577f0dc | ||
|
|
3c0313626d | ||
|
|
8cd1c82ebf | ||
|
|
fdef327abb | ||
|
|
979cae3221 | ||
|
|
da802ab9e4 | ||
|
|
e754d1b514 | ||
|
|
694df94ead | ||
|
|
f0ba223c26 | ||
|
|
31c6e20ab2 | ||
|
|
67fa23d710 | ||
|
|
5dd45c4953 | ||
|
|
2772c06320 | ||
|
|
8e6eea3cc3 | ||
|
|
c8ed1158c4 | ||
|
|
5d94d82d1f | ||
|
|
2148eca563 | ||
|
|
5f77e27853 | ||
|
|
4effc02b2d | ||
|
|
f2918d421f | ||
|
|
f5d53aabfd | ||
|
|
b1081d6148 | ||
|
|
d09566f751 | ||
|
|
1b1c396656 | ||
|
|
2da48cdd34 | ||
|
|
da733135c8 | ||
|
|
c3f14b1f00 |
3
.github/workflows/test-suite.yml
vendored
3
.github/workflows/test-suite.yml
vendored
@@ -53,6 +53,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
os: [macos-14, windows-2022]
|
os: [macos-14, windows-2022]
|
||||||
features: ["", "--features enterprise"]
|
features: ["", "--features enterprise"]
|
||||||
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
@@ -158,8 +159,6 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.89
|
||||||
- name: Cache dependencies
|
|
||||||
uses: Swatinem/rust-cache@v2.8.0
|
|
||||||
- name: Run tests in debug
|
- name: Run tests in debug
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
|
|||||||
34
Cargo.lock
generated
34
Cargo.lock
generated
@@ -580,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#e8af4a4bccc8eb36b2b0442c4a9
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -790,7 +790,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@@ -1786,7 +1786,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -2018,7 +2018,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror 2.0.17",
|
"thiserror 2.0.17",
|
||||||
@@ -2040,7 +2040,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
@@ -2068,7 +2068,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -2231,7 +2231,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -3198,7 +3198,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"backoff",
|
"backoff",
|
||||||
@@ -3460,7 +3460,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -3974,7 +3974,7 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5 0.8.0",
|
"md5 0.8.0",
|
||||||
@@ -3985,7 +3985,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@@ -4083,7 +4083,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@@ -4102,7 +4102,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -4137,7 +4137,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -4171,7 +4171,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arroy",
|
"arroy",
|
||||||
"bbqueue",
|
"bbqueue",
|
||||||
@@ -4750,7 +4750,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -7783,7 +7783,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.28.1"
|
version = "1.28.2"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
|||||||
[timestamp] [4,]
|
[timestamp] [4,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
@@ -37,7 +37,7 @@ catto [1,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
@@ -40,7 +40,7 @@ doggo [2,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -183,7 +183,11 @@ pub async fn get_metrics(
|
|||||||
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS.reset();
|
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS.reset();
|
||||||
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
|
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
|
||||||
// Fetch the finished batches...
|
// Fetch the finished batches...
|
||||||
&Query { statuses: Some(vec![Status::Succeeded, Status::Failed]), ..Query::default() },
|
&Query {
|
||||||
|
statuses: Some(vec![Status::Succeeded, Status::Failed]),
|
||||||
|
limit: Some(1),
|
||||||
|
..Query::default()
|
||||||
|
},
|
||||||
auth_filters,
|
auth_filters,
|
||||||
)?;
|
)?;
|
||||||
// ...and get the last batch only.
|
// ...and get the last batch only.
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ mod chat;
|
|||||||
mod distinct;
|
mod distinct;
|
||||||
mod errors;
|
mod errors;
|
||||||
mod get_settings;
|
mod get_settings;
|
||||||
|
mod parent_seachable_fields;
|
||||||
mod prefix_search_settings;
|
mod prefix_search_settings;
|
||||||
mod proximity_settings;
|
mod proximity_settings;
|
||||||
mod tokenizer_customization;
|
mod tokenizer_customization;
|
||||||
|
|||||||
114
crates/meilisearch/tests/settings/parent_seachable_fields.rs
Normal file
114
crates/meilisearch/tests/settings/parent_seachable_fields.rs
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
use crate::common::Server;
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of the day",
|
||||||
|
"description": "many the fish",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of day",
|
||||||
|
"description": "many the lazy fish",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"meta": {
|
||||||
|
"title": "the Soup of day",
|
||||||
|
"description": "many the fish",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
])
|
||||||
|
});
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn nested_field_becomes_searchable() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["meta.title"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{response:?}");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// We expect no documents when searching for
|
||||||
|
// a nested non-searchable field
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"[]"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["meta.title", "meta.description"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{response:?}");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// We expect all the documents when the nested field becomes searchable
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of the day",
|
||||||
|
"description": "many the fish"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"meta": {
|
||||||
|
"title": "the Soup of day",
|
||||||
|
"description": "many the fish"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of day",
|
||||||
|
"description": "many the lazy fish"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["meta.title"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{response:?}");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// We expect no documents when searching for
|
||||||
|
// a nested non-searchable field
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"[]"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
@@ -43,7 +43,7 @@ async fn version_too_old() {
|
|||||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.28.1");
|
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.28.2");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
|
|||||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
||||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||||
snapshot!(err, @"Database version 1.28.2 is higher than the Meilisearch version 1.28.1. Downgrade is not supported");
|
snapshot!(err, @"Database version 1.28.3 is higher than the Meilisearch version 1.28.2. Downgrade is not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "v1.28.2"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ use crate::{
|
|||||||
pub struct Metadata {
|
pub struct Metadata {
|
||||||
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
|
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
|
||||||
pub searchable: Option<Weight>,
|
pub searchable: Option<Weight>,
|
||||||
|
/// The field is part of the exact attributes.
|
||||||
|
pub exact: bool,
|
||||||
/// The field is part of the sortable attributes.
|
/// The field is part of the sortable attributes.
|
||||||
pub sortable: bool,
|
pub sortable: bool,
|
||||||
/// The field is defined as the distinct attribute.
|
/// The field is defined as the distinct attribute.
|
||||||
@@ -209,6 +211,7 @@ impl Metadata {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct MetadataBuilder {
|
pub struct MetadataBuilder {
|
||||||
searchable_attributes: Option<Vec<String>>,
|
searchable_attributes: Option<Vec<String>>,
|
||||||
|
exact_searchable_attributes: Vec<String>,
|
||||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||||
sortable_attributes: HashSet<String>,
|
sortable_attributes: HashSet<String>,
|
||||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||||
@@ -220,15 +223,18 @@ impl MetadataBuilder {
|
|||||||
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||||
let searchable_attributes = index
|
let searchable_attributes = index
|
||||||
.user_defined_searchable_fields(rtxn)?
|
.user_defined_searchable_fields(rtxn)?
|
||||||
.map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
|
let exact_searchable_attributes =
|
||||||
|
index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
|
||||||
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
|
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
|
||||||
let sortable_attributes = index.sortable_fields(rtxn)?;
|
let sortable_attributes = index.sortable_fields(rtxn)?;
|
||||||
let localized_attributes = index.localized_attributes_rules(rtxn)?;
|
let localized_attributes = index.localized_attributes_rules(rtxn)?;
|
||||||
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
|
let distinct_attribute = index.distinct_field(rtxn)?.map(String::from);
|
||||||
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
|
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
|
||||||
|
|
||||||
Ok(Self::new(
|
Ok(Self::new(
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
|
exact_searchable_attributes,
|
||||||
filterable_attributes,
|
filterable_attributes,
|
||||||
sortable_attributes,
|
sortable_attributes,
|
||||||
localized_attributes,
|
localized_attributes,
|
||||||
@@ -242,6 +248,7 @@ impl MetadataBuilder {
|
|||||||
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
|
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
|
||||||
pub fn new(
|
pub fn new(
|
||||||
searchable_attributes: Option<Vec<String>>,
|
searchable_attributes: Option<Vec<String>>,
|
||||||
|
exact_searchable_attributes: Vec<String>,
|
||||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||||
sortable_attributes: HashSet<String>,
|
sortable_attributes: HashSet<String>,
|
||||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||||
@@ -256,6 +263,7 @@ impl MetadataBuilder {
|
|||||||
|
|
||||||
Self {
|
Self {
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
|
exact_searchable_attributes,
|
||||||
filterable_attributes,
|
filterable_attributes,
|
||||||
sortable_attributes,
|
sortable_attributes,
|
||||||
localized_attributes,
|
localized_attributes,
|
||||||
@@ -269,6 +277,7 @@ impl MetadataBuilder {
|
|||||||
// Vectors fields are not searchable, filterable, distinct or asc_desc
|
// Vectors fields are not searchable, filterable, distinct or asc_desc
|
||||||
return Metadata {
|
return Metadata {
|
||||||
searchable: None,
|
searchable: None,
|
||||||
|
exact: false,
|
||||||
sortable: false,
|
sortable: false,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
@@ -296,6 +305,7 @@ impl MetadataBuilder {
|
|||||||
// Geo fields are not searchable, distinct or asc_desc
|
// Geo fields are not searchable, distinct or asc_desc
|
||||||
return Metadata {
|
return Metadata {
|
||||||
searchable: None,
|
searchable: None,
|
||||||
|
exact: false,
|
||||||
sortable,
|
sortable,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
@@ -309,6 +319,7 @@ impl MetadataBuilder {
|
|||||||
debug_assert!(!sortable, "geojson fields should not be sortable");
|
debug_assert!(!sortable, "geojson fields should not be sortable");
|
||||||
return Metadata {
|
return Metadata {
|
||||||
searchable: None,
|
searchable: None,
|
||||||
|
exact: false,
|
||||||
sortable,
|
sortable,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
@@ -329,6 +340,8 @@ impl MetadataBuilder {
|
|||||||
None => Some(0),
|
None => Some(0),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let exact = self.exact_searchable_attributes.iter().any(|attr| is_faceted_by(field, attr));
|
||||||
|
|
||||||
let distinct =
|
let distinct =
|
||||||
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
|
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
|
||||||
let asc_desc = self.asc_desc_attributes.contains(field);
|
let asc_desc = self.asc_desc_attributes.contains(field);
|
||||||
@@ -343,6 +356,7 @@ impl MetadataBuilder {
|
|||||||
|
|
||||||
Metadata {
|
Metadata {
|
||||||
searchable,
|
searchable,
|
||||||
|
exact,
|
||||||
sortable,
|
sortable,
|
||||||
distinct,
|
distinct,
|
||||||
asc_desc,
|
asc_desc,
|
||||||
|
|||||||
@@ -8,17 +8,26 @@ use bumpalo::Bump;
|
|||||||
|
|
||||||
use super::match_searchable_field;
|
use super::match_searchable_field;
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
use crate::fields_ids_map::metadata::Metadata;
|
||||||
use crate::update::new::document::DocumentContext;
|
use crate::update::new::document::DocumentContext;
|
||||||
use crate::update::new::extract::cache::BalancedCaches;
|
use crate::update::new::extract::cache::BalancedCaches;
|
||||||
use crate::update::new::extract::perm_json_p::contained_in;
|
use crate::update::new::extract::perm_json_p::contained_in;
|
||||||
|
use crate::update::new::extract::searchable::has_searchable_children;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChanges, Extractor, IndexingContext,
|
extract, DocumentChanges, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
|
use crate::update::new::indexer::settings_changes::{
|
||||||
|
settings_change_extract, DocumentsIndentifiers, SettingsChangeExtractor,
|
||||||
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::{DocumentChange, DocumentIdentifiers};
|
||||||
use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
|
use crate::update::settings::SettingsDelta;
|
||||||
|
use crate::{
|
||||||
|
bucketed_position, DocumentId, FieldId, PatternMatch, Result, UserError,
|
||||||
|
MAX_POSITION_PER_ATTRIBUTE,
|
||||||
|
};
|
||||||
|
|
||||||
const MAX_COUNTED_WORDS: usize = 30;
|
const MAX_COUNTED_WORDS: usize = 30;
|
||||||
|
|
||||||
@@ -34,6 +43,15 @@ pub struct WordDocidsBalancedCaches<'extractor> {
|
|||||||
|
|
||||||
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
|
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
|
||||||
|
|
||||||
|
/// Whether to extract or skip fields during word extraction.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum FieldDbExtraction {
|
||||||
|
/// Extract the word and put it in to the fid-based databases.
|
||||||
|
Extract,
|
||||||
|
/// Do not store the word in the fid-based databases.
|
||||||
|
Skip,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||||
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
|
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@@ -47,12 +65,14 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn insert_add_u32(
|
fn insert_add_u32(
|
||||||
&mut self,
|
&mut self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
position: u16,
|
position: u16,
|
||||||
word: &str,
|
word: &str,
|
||||||
exact: bool,
|
exact: bool,
|
||||||
|
field_db_extraction: FieldDbExtraction,
|
||||||
docid: u32,
|
docid: u32,
|
||||||
bump: &Bump,
|
bump: &Bump,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@@ -66,11 +86,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
||||||
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
||||||
|
|
||||||
buffer.clear();
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
buffer.extend_from_slice(word_bytes);
|
buffer.clear();
|
||||||
buffer.push(0);
|
buffer.extend_from_slice(word_bytes);
|
||||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
buffer.push(0);
|
||||||
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
|
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
|
||||||
|
}
|
||||||
|
|
||||||
let position = bucketed_position(position);
|
let position = bucketed_position(position);
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
@@ -83,21 +105,26 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
self.flush_fid_word_count(&mut buffer)?;
|
self.flush_fid_word_count(&mut buffer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.fid_word_count
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
.entry(field_id)
|
self.fid_word_count
|
||||||
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
.entry(field_id)
|
||||||
.or_insert((None, Some(1)));
|
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
||||||
|
.or_insert((None, Some(1)));
|
||||||
|
}
|
||||||
|
|
||||||
self.current_docid = Some(docid);
|
self.current_docid = Some(docid);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn insert_del_u32(
|
fn insert_del_u32(
|
||||||
&mut self,
|
&mut self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
position: u16,
|
position: u16,
|
||||||
word: &str,
|
word: &str,
|
||||||
exact: bool,
|
exact: bool,
|
||||||
|
field_db_extraction: FieldDbExtraction,
|
||||||
docid: u32,
|
docid: u32,
|
||||||
bump: &Bump,
|
bump: &Bump,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@@ -111,11 +138,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
||||||
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
||||||
|
|
||||||
buffer.clear();
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
buffer.extend_from_slice(word_bytes);
|
buffer.clear();
|
||||||
buffer.push(0);
|
buffer.extend_from_slice(word_bytes);
|
||||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
buffer.push(0);
|
||||||
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
|
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
|
||||||
|
}
|
||||||
|
|
||||||
let position = bucketed_position(position);
|
let position = bucketed_position(position);
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
@@ -128,10 +157,12 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
self.flush_fid_word_count(&mut buffer)?;
|
self.flush_fid_word_count(&mut buffer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.fid_word_count
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
.entry(field_id)
|
self.fid_word_count
|
||||||
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
.entry(field_id)
|
||||||
.or_insert((Some(1), None));
|
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
||||||
|
.or_insert((Some(1), None));
|
||||||
|
}
|
||||||
|
|
||||||
self.current_docid = Some(docid);
|
self.current_docid = Some(docid);
|
||||||
|
|
||||||
@@ -325,6 +356,24 @@ impl WordDocidsExtractors {
|
|||||||
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|
||||||
|| disabled_typos_terms.is_exact(word)
|
|| disabled_typos_terms.is_exact(word)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let Some((field_id, meta)) = new_fields_ids_map.id_with_metadata_or_insert(field_name)
|
||||||
|
else {
|
||||||
|
return Err(UserError::AttributeLimitReached.into());
|
||||||
|
};
|
||||||
|
|
||||||
|
let pattern_match = if meta.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
} else {
|
||||||
|
// TODO: should be a match on the field_name using `match_field_legacy` function,
|
||||||
|
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||||
|
PatternMatch::Parent
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, pattern_match))
|
||||||
|
};
|
||||||
|
|
||||||
match document_change {
|
match document_change {
|
||||||
DocumentChange::Deletion(inner) => {
|
DocumentChange::Deletion(inner) => {
|
||||||
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
||||||
@@ -333,13 +382,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -361,13 +411,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@@ -377,13 +428,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -394,13 +446,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.inserted(),
|
inner.inserted(),
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -411,3 +464,292 @@ impl WordDocidsExtractors {
|
|||||||
cached_sorter.flush_fid_word_count(&mut buffer)
|
cached_sorter.flush_fid_word_count(&mut buffer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct WordDocidsSettingsExtractorsData<'a, SD> {
|
||||||
|
tokenizer: DocumentTokenizer<'a>,
|
||||||
|
max_memory_by_thread: Option<usize>,
|
||||||
|
buckets: usize,
|
||||||
|
settings_delta: &'a SD,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||||
|
for WordDocidsSettingsExtractorsData<'_, SD>
|
||||||
|
{
|
||||||
|
type Data = RefCell<Option<WordDocidsBalancedCaches<'extractor>>>;
|
||||||
|
|
||||||
|
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||||
|
Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
|
||||||
|
self.buckets,
|
||||||
|
self.max_memory_by_thread,
|
||||||
|
extractor_alloc,
|
||||||
|
))))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process<'doc>(
|
||||||
|
&'doc self,
|
||||||
|
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
|
||||||
|
context: &'doc DocumentContext<Self::Data>,
|
||||||
|
) -> crate::Result<()> {
|
||||||
|
for document in documents {
|
||||||
|
let document = document?;
|
||||||
|
SettingsChangeWordDocidsExtractors::extract_document_from_settings_change(
|
||||||
|
document,
|
||||||
|
context,
|
||||||
|
&self.tokenizer,
|
||||||
|
self.settings_delta,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SettingsChangeWordDocidsExtractors;
|
||||||
|
|
||||||
|
impl SettingsChangeWordDocidsExtractors {
|
||||||
|
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
|
||||||
|
settings_delta: &SD,
|
||||||
|
documents: &'indexer DocumentsIndentifiers<'indexer>,
|
||||||
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
|
step: IndexingStep,
|
||||||
|
) -> Result<WordDocidsCaches<'extractor>>
|
||||||
|
where
|
||||||
|
SD: SettingsDelta + Sync,
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
// Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
|
||||||
|
// TODO we need to read the new AND old settings to support changing global parameters
|
||||||
|
let rtxn = indexing_context.index.read_txn()?;
|
||||||
|
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||||
|
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||||
|
let allowed_separators: Option<Vec<_>> =
|
||||||
|
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let mut builder = tokenizer_builder(
|
||||||
|
stop_words.as_ref(),
|
||||||
|
allowed_separators.as_deref(),
|
||||||
|
dictionary.as_deref(),
|
||||||
|
);
|
||||||
|
let tokenizer = builder.build();
|
||||||
|
let localized_attributes_rules =
|
||||||
|
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||||
|
let document_tokenizer = DocumentTokenizer {
|
||||||
|
tokenizer: &tokenizer,
|
||||||
|
localized_attributes_rules: &localized_attributes_rules,
|
||||||
|
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||||
|
};
|
||||||
|
let extractor_data = WordDocidsSettingsExtractorsData {
|
||||||
|
tokenizer: document_tokenizer,
|
||||||
|
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||||
|
buckets: rayon::current_num_threads(),
|
||||||
|
settings_delta,
|
||||||
|
};
|
||||||
|
let datastore = ThreadLocal::new();
|
||||||
|
{
|
||||||
|
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
settings_change_extract(
|
||||||
|
documents,
|
||||||
|
&extractor_data,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
&datastore,
|
||||||
|
step,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut merger = WordDocidsCaches::new();
|
||||||
|
for cache in datastore.into_iter().flat_map(RefCell::into_inner) {
|
||||||
|
merger.push(cache)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(merger)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts document words from a settings change.
|
||||||
|
fn extract_document_from_settings_change<SD: SettingsDelta>(
|
||||||
|
document: DocumentIdentifiers<'_>,
|
||||||
|
context: &DocumentContext<RefCell<Option<WordDocidsBalancedCaches>>>,
|
||||||
|
document_tokenizer: &DocumentTokenizer,
|
||||||
|
settings_delta: &SD,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cached_sorter_ref = context.data.borrow_mut_or_yield();
|
||||||
|
let cached_sorter = cached_sorter_ref.as_mut().unwrap();
|
||||||
|
let doc_alloc = &context.doc_alloc;
|
||||||
|
|
||||||
|
let new_fields_ids_map = settings_delta.new_fields_ids_map();
|
||||||
|
let old_fields_ids_map = context.index.fields_ids_map_with_metadata(&context.rtxn)?;
|
||||||
|
let old_searchable = settings_delta.old_searchable_attributes().as_ref();
|
||||||
|
let new_searchable = settings_delta.new_searchable_attributes().as_ref();
|
||||||
|
|
||||||
|
let current_document = document.current(
|
||||||
|
&context.rtxn,
|
||||||
|
context.index,
|
||||||
|
old_fields_ids_map.as_fields_ids_map(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
enum ActionToOperate {
|
||||||
|
ReindexAllFields,
|
||||||
|
// TODO improve by listing field prefixes
|
||||||
|
IndexAddedFields,
|
||||||
|
SkipDocument,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut action = ActionToOperate::SkipDocument;
|
||||||
|
// Here we do a preliminary check to determine the action to take.
|
||||||
|
// This check doesn't trigger the tokenizer as we never return
|
||||||
|
// PatternMatch::Match.
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
current_document,
|
||||||
|
&mut |field_name| {
|
||||||
|
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||||
|
|
||||||
|
// If the document must be reindexed, early return NoMatch to stop the scanning process.
|
||||||
|
if action == ActionToOperate::ReindexAllFields {
|
||||||
|
return Ok((fid, PatternMatch::NoMatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
|
||||||
|
action = match (old_field_metadata, new_field_metadata) {
|
||||||
|
// At least one field is added or removed from the exact fields => ReindexAllFields
|
||||||
|
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. })
|
||||||
|
if old_exact != new_exact =>
|
||||||
|
{
|
||||||
|
ActionToOperate::ReindexAllFields
|
||||||
|
}
|
||||||
|
// At least one field is removed from the searchable fields => ReindexAllFields
|
||||||
|
(Metadata { searchable: Some(_), .. }, Metadata { searchable: None, .. }) => {
|
||||||
|
ActionToOperate::ReindexAllFields
|
||||||
|
}
|
||||||
|
// At least one field is added in the searchable fields => IndexAddedFields
|
||||||
|
(Metadata { searchable: None, .. }, Metadata { searchable: Some(_), .. }) => {
|
||||||
|
// We can safely overwrite the action, because we early return when action is ReindexAllFields.
|
||||||
|
ActionToOperate::IndexAddedFields
|
||||||
|
}
|
||||||
|
_ => action,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((fid, PatternMatch::Parent))
|
||||||
|
},
|
||||||
|
&mut |_, _, _, _| Ok(()),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Early return when we don't need to index the document
|
||||||
|
if action == ActionToOperate::SkipDocument {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let field_id = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
|
||||||
|
let pattern_match = match action {
|
||||||
|
ActionToOperate::ReindexAllFields => {
|
||||||
|
if old_field_metadata.is_searchable() || new_field_metadata.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
// If any old or new field is searchable then we need to iterate over all fields
|
||||||
|
// else if any field matches we need to iterate over all fields
|
||||||
|
} else if has_searchable_children(
|
||||||
|
field_name,
|
||||||
|
old_searchable.zip(new_searchable).map(|(old, new)| old.iter().chain(new)),
|
||||||
|
) {
|
||||||
|
PatternMatch::Parent
|
||||||
|
} else {
|
||||||
|
PatternMatch::NoMatch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ActionToOperate::IndexAddedFields => {
|
||||||
|
// Was not searchable but now is
|
||||||
|
if !old_field_metadata.is_searchable() && new_field_metadata.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
// If the field is now a parent of a searchable field
|
||||||
|
} else if has_searchable_children(field_name, new_searchable) {
|
||||||
|
PatternMatch::Parent
|
||||||
|
} else {
|
||||||
|
PatternMatch::NoMatch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ActionToOperate::SkipDocument => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, pattern_match))
|
||||||
|
};
|
||||||
|
|
||||||
|
let old_disabled_typos_terms = settings_delta.old_disabled_typos_terms();
|
||||||
|
let new_disabled_typos_terms = settings_delta.new_disabled_typos_terms();
|
||||||
|
let mut token_fn = |_field_name: &str, field_id, pos, word: &str| {
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
|
||||||
|
match (old_field_metadata, new_field_metadata) {
|
||||||
|
(
|
||||||
|
Metadata { searchable: Some(_), exact: old_exact, .. },
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
) => cached_sorter.insert_del_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
old_exact || old_disabled_typos_terms.is_exact(word),
|
||||||
|
// We deleted the field globally
|
||||||
|
FieldDbExtraction::Skip,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
Metadata { searchable: Some(_), exact: new_exact, .. },
|
||||||
|
) => cached_sorter.insert_add_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
new_exact || new_disabled_typos_terms.is_exact(word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
),
|
||||||
|
(Metadata { searchable: None, .. }, Metadata { searchable: None, .. }) => {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. }) => {
|
||||||
|
cached_sorter.insert_del_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
old_exact || old_disabled_typos_terms.is_exact(word),
|
||||||
|
// The field has already been extracted
|
||||||
|
FieldDbExtraction::Skip,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
)?;
|
||||||
|
cached_sorter.insert_add_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
new_exact || new_disabled_typos_terms.is_exact(word),
|
||||||
|
// The field has already been extracted
|
||||||
|
FieldDbExtraction::Skip,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO we must tokenize twice when we change global parameters like stop words,
|
||||||
|
// the language settings, dictionary, separators, non-separators...
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
current_document,
|
||||||
|
&mut should_tokenize,
|
||||||
|
&mut token_fn,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,17 +6,24 @@ use bumpalo::Bump;
|
|||||||
|
|
||||||
use super::match_searchable_field;
|
use super::match_searchable_field;
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
use crate::fields_ids_map::metadata::Metadata;
|
||||||
|
use crate::proximity::ProximityPrecision::*;
|
||||||
use crate::proximity::{index_proximity, MAX_DISTANCE};
|
use crate::proximity::{index_proximity, MAX_DISTANCE};
|
||||||
use crate::update::new::document::{Document, DocumentContext};
|
use crate::update::new::document::{Document, DocumentContext};
|
||||||
use crate::update::new::extract::cache::BalancedCaches;
|
use crate::update::new::extract::cache::BalancedCaches;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChanges, Extractor, IndexingContext,
|
extract, DocumentChanges, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
|
use crate::update::new::indexer::settings_change_extract;
|
||||||
|
use crate::update::new::indexer::settings_changes::{
|
||||||
|
DocumentsIndentifiers, SettingsChangeExtractor,
|
||||||
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::{DocumentChange, DocumentIdentifiers};
|
||||||
use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
|
use crate::update::settings::SettingsDelta;
|
||||||
|
use crate::{FieldId, PatternMatch, Result, UserError, MAX_POSITION_PER_ATTRIBUTE};
|
||||||
|
|
||||||
pub struct WordPairProximityDocidsExtractorData<'a> {
|
pub struct WordPairProximityDocidsExtractorData<'a> {
|
||||||
tokenizer: DocumentTokenizer<'a>,
|
tokenizer: DocumentTokenizer<'a>,
|
||||||
@@ -116,7 +123,7 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
// and to store the docids of the documents that have a number of words in a given field
|
// and to store the docids of the documents that have a number of words in a given field
|
||||||
// equal to or under than MAX_COUNTED_WORDS.
|
// equal to or under than MAX_COUNTED_WORDS.
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
context: &DocumentContext<RefCell<BalancedCaches>>,
|
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
|
||||||
document_tokenizer: &DocumentTokenizer,
|
document_tokenizer: &DocumentTokenizer,
|
||||||
searchable_attributes: Option<&[&str]>,
|
searchable_attributes: Option<&[&str]>,
|
||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
@@ -147,8 +154,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
del_word_pair_proximity.push(((w1, w2), prox));
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -170,8 +181,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
del_word_pair_proximity.push(((w1, w2), prox));
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -180,8 +195,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
add_word_pair_proximity.push(((w1, w2), prox));
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -192,8 +211,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
add_word_pair_proximity.push(((w1, w2), prox));
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -257,8 +280,8 @@ fn drain_word_positions(
|
|||||||
fn process_document_tokens<'doc>(
|
fn process_document_tokens<'doc>(
|
||||||
document: impl Document<'doc>,
|
document: impl Document<'doc>,
|
||||||
document_tokenizer: &DocumentTokenizer,
|
document_tokenizer: &DocumentTokenizer,
|
||||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
|
||||||
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
||||||
|
field_id_and_metadata: &mut impl FnMut(&str) -> Result<(FieldId, Metadata)>,
|
||||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut field_id = None;
|
let mut field_id = None;
|
||||||
@@ -279,8 +302,248 @@ fn process_document_tokens<'doc>(
|
|||||||
word_positions.push_back((Rc::from(word), pos));
|
word_positions.push_back((Rc::from(word), pos));
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let (field_id, meta) = field_id_and_metadata(field_name)?;
|
||||||
|
|
||||||
|
let pattern_match = if meta.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
} else {
|
||||||
|
// TODO: should be a match on the field_name using `match_field_legacy` function,
|
||||||
|
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||||
|
PatternMatch::Parent
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, pattern_match))
|
||||||
|
};
|
||||||
|
|
||||||
|
document_tokenizer.tokenize_document(document, &mut should_tokenize, &mut token_fn)?;
|
||||||
|
|
||||||
drain_word_positions(word_positions, word_pair_proximity);
|
drain_word_positions(word_positions, word_pair_proximity);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct WordPairProximityDocidsSettingsExtractorsData<'a, SD> {
|
||||||
|
tokenizer: DocumentTokenizer<'a>,
|
||||||
|
max_memory_by_thread: Option<usize>,
|
||||||
|
buckets: usize,
|
||||||
|
settings_delta: &'a SD,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||||
|
for WordPairProximityDocidsSettingsExtractorsData<'_, SD>
|
||||||
|
{
|
||||||
|
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||||
|
|
||||||
|
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||||
|
Ok(RefCell::new(BalancedCaches::new_in(
|
||||||
|
self.buckets,
|
||||||
|
self.max_memory_by_thread,
|
||||||
|
extractor_alloc,
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process<'doc>(
|
||||||
|
&'doc self,
|
||||||
|
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
|
||||||
|
context: &'doc DocumentContext<Self::Data>,
|
||||||
|
) -> crate::Result<()> {
|
||||||
|
for document in documents {
|
||||||
|
let document = document?;
|
||||||
|
SettingsChangeWordPairProximityDocidsExtractors::extract_document_from_settings_change(
|
||||||
|
document,
|
||||||
|
context,
|
||||||
|
&self.tokenizer,
|
||||||
|
self.settings_delta,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SettingsChangeWordPairProximityDocidsExtractors;
|
||||||
|
|
||||||
|
impl SettingsChangeWordPairProximityDocidsExtractors {
|
||||||
|
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
|
||||||
|
settings_delta: &SD,
|
||||||
|
documents: &'indexer DocumentsIndentifiers<'indexer>,
|
||||||
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
|
step: IndexingStep,
|
||||||
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
|
where
|
||||||
|
SD: SettingsDelta + Sync,
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
// Warning: this is duplicated code from extract_word_docids.rs
|
||||||
|
let rtxn = indexing_context.index.read_txn()?;
|
||||||
|
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||||
|
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||||
|
let allowed_separators: Option<Vec<_>> =
|
||||||
|
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let mut builder = tokenizer_builder(
|
||||||
|
stop_words.as_ref(),
|
||||||
|
allowed_separators.as_deref(),
|
||||||
|
dictionary.as_deref(),
|
||||||
|
);
|
||||||
|
let tokenizer = builder.build();
|
||||||
|
let localized_attributes_rules =
|
||||||
|
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||||
|
let document_tokenizer = DocumentTokenizer {
|
||||||
|
tokenizer: &tokenizer,
|
||||||
|
localized_attributes_rules: &localized_attributes_rules,
|
||||||
|
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||||
|
};
|
||||||
|
let extractor_data = WordPairProximityDocidsSettingsExtractorsData {
|
||||||
|
tokenizer: document_tokenizer,
|
||||||
|
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||||
|
buckets: rayon::current_num_threads(),
|
||||||
|
settings_delta,
|
||||||
|
};
|
||||||
|
let datastore = ThreadLocal::new();
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids_extraction");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
settings_change_extract(
|
||||||
|
documents,
|
||||||
|
&extractor_data,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
&datastore,
|
||||||
|
step,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts document words from a settings change.
|
||||||
|
fn extract_document_from_settings_change<SD: SettingsDelta>(
|
||||||
|
document: DocumentIdentifiers<'_>,
|
||||||
|
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
|
||||||
|
document_tokenizer: &DocumentTokenizer,
|
||||||
|
settings_delta: &SD,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cached_sorter = context.data.borrow_mut_or_yield();
|
||||||
|
let doc_alloc = &context.doc_alloc;
|
||||||
|
|
||||||
|
let new_fields_ids_map = settings_delta.new_fields_ids_map();
|
||||||
|
let old_fields_ids_map = settings_delta.old_fields_ids_map();
|
||||||
|
let old_proximity_precision = *settings_delta.old_proximity_precision();
|
||||||
|
let new_proximity_precision = *settings_delta.new_proximity_precision();
|
||||||
|
|
||||||
|
let current_document = document.current(
|
||||||
|
&context.rtxn,
|
||||||
|
context.index,
|
||||||
|
old_fields_ids_map.as_fields_ids_map(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
enum ActionToOperate {
|
||||||
|
ReindexAllFields,
|
||||||
|
SkipDocument,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO prefix_fid delete_old_fid_based_databases
|
||||||
|
let mut action = match (old_proximity_precision, new_proximity_precision) {
|
||||||
|
(ByAttribute, ByWord) => ActionToOperate::ReindexAllFields,
|
||||||
|
(_, _) => ActionToOperate::SkipDocument,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Here we do a preliminary check to determine the action to take.
|
||||||
|
// This check doesn't trigger the tokenizer as we never return
|
||||||
|
// PatternMatch::Match.
|
||||||
|
if action != ActionToOperate::ReindexAllFields {
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
current_document,
|
||||||
|
&mut |field_name| {
|
||||||
|
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||||
|
|
||||||
|
// If the document must be reindexed, early return NoMatch to stop the scanning process.
|
||||||
|
if action == ActionToOperate::ReindexAllFields {
|
||||||
|
return Ok((fid, PatternMatch::NoMatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
|
||||||
|
action = match (old_field_metadata, new_field_metadata) {
|
||||||
|
// At least one field is removed or added from the searchable fields
|
||||||
|
(
|
||||||
|
Metadata { searchable: Some(_), .. },
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
)
|
||||||
|
| (
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
Metadata { searchable: Some(_), .. },
|
||||||
|
) => ActionToOperate::ReindexAllFields,
|
||||||
|
_ => action,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((fid, PatternMatch::Parent))
|
||||||
|
},
|
||||||
|
&mut |_, _, _, _| Ok(()),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Early return when we don't need to index the document
|
||||||
|
if action == ActionToOperate::SkipDocument {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut del_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
|
let mut add_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
|
|
||||||
|
// is a vecdequeue, and will be smol, so can stay on the heap for now
|
||||||
|
let mut word_positions: VecDeque<(Rc<str>, u16)> =
|
||||||
|
VecDeque::with_capacity(MAX_DISTANCE as usize);
|
||||||
|
|
||||||
|
process_document_tokens(
|
||||||
|
current_document,
|
||||||
|
// TODO Tokenize must be based on old settings
|
||||||
|
document_tokenizer,
|
||||||
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
Ok(old_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
|
||||||
|
},
|
||||||
|
&mut |(w1, w2), prox| {
|
||||||
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
process_document_tokens(
|
||||||
|
current_document,
|
||||||
|
// TODO Tokenize must be based on new settings
|
||||||
|
document_tokenizer,
|
||||||
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
Ok(new_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
|
||||||
|
},
|
||||||
|
&mut |(w1, w2), prox| {
|
||||||
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let mut key_buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
|
|
||||||
|
del_word_pair_proximity.sort_unstable();
|
||||||
|
del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||||
|
for ((w1, w2), prox) in del_word_pair_proximity.iter() {
|
||||||
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
|
cached_sorter.insert_del_u32(key, document.docid())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
add_word_pair_proximity.sort_unstable();
|
||||||
|
add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||||
|
for ((w1, w2), prox) in add_word_pair_proximity.iter() {
|
||||||
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
|
cached_sorter.insert_add_u32(key, document.docid())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,8 +2,12 @@ mod extract_word_docids;
|
|||||||
mod extract_word_pair_proximity_docids;
|
mod extract_word_pair_proximity_docids;
|
||||||
mod tokenize_document;
|
mod tokenize_document;
|
||||||
|
|
||||||
pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
|
pub use extract_word_docids::{
|
||||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
SettingsChangeWordDocidsExtractors, WordDocidsCaches, WordDocidsExtractors,
|
||||||
|
};
|
||||||
|
pub use extract_word_pair_proximity_docids::{
|
||||||
|
SettingsChangeWordPairProximityDocidsExtractors, WordPairProximityDocidsExtractor,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
||||||
|
|
||||||
@@ -27,3 +31,17 @@ pub fn match_searchable_field(
|
|||||||
|
|
||||||
selection
|
selection
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// return `true` if the provided `field_name` is a parent of at least one of the fields contained in `searchable`,
|
||||||
|
/// or if `searchable` is `None`.
|
||||||
|
fn has_searchable_children<I, A>(field_name: &str, searchable: Option<I>) -> bool
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = A>,
|
||||||
|
A: AsRef<str>,
|
||||||
|
{
|
||||||
|
searchable.is_none_or(|fields| {
|
||||||
|
fields
|
||||||
|
.into_iter()
|
||||||
|
.any(|attr| match_field_legacy(attr.as_ref(), field_name) == PatternMatch::Parent)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,10 +8,7 @@ use crate::update::new::document::Document;
|
|||||||
use crate::update::new::extract::perm_json_p::{
|
use crate::update::new::extract::perm_json_p::{
|
||||||
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
|
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{FieldId, InternalError, LocalizedAttributesRule, Result, MAX_WORD_LENGTH};
|
||||||
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
|
||||||
MAX_WORD_LENGTH,
|
|
||||||
};
|
|
||||||
|
|
||||||
// todo: should be crate::proximity::MAX_DISTANCE but it has been forgotten
|
// todo: should be crate::proximity::MAX_DISTANCE but it has been forgotten
|
||||||
const MAX_DISTANCE: u32 = 8;
|
const MAX_DISTANCE: u32 = 8;
|
||||||
@@ -26,26 +23,25 @@ impl DocumentTokenizer<'_> {
|
|||||||
pub fn tokenize_document<'doc>(
|
pub fn tokenize_document<'doc>(
|
||||||
&self,
|
&self,
|
||||||
document: impl Document<'doc>,
|
document: impl Document<'doc>,
|
||||||
field_id_map: &mut GlobalFieldsIdsMap,
|
should_tokenize: &mut impl FnMut(&str) -> Result<(FieldId, PatternMatch)>,
|
||||||
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
|
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut field_position = HashMap::new();
|
let mut field_position = HashMap::new();
|
||||||
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
|
||||||
let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
|
|
||||||
return Err(UserError::AttributeLimitReached.into());
|
|
||||||
};
|
|
||||||
|
|
||||||
if meta.is_searchable() {
|
|
||||||
self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo: should be a match on the field_name using `match_field_legacy` function,
|
|
||||||
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
|
||||||
Ok(PatternMatch::Match)
|
|
||||||
};
|
|
||||||
|
|
||||||
for entry in document.iter_top_level_fields() {
|
for entry in document.iter_top_level_fields() {
|
||||||
let (field_name, value) = entry?;
|
let (field_name, value) = entry?;
|
||||||
|
|
||||||
|
if let (_, PatternMatch::NoMatch) = should_tokenize(field_name)? {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
||||||
|
let (fid, pattern_match) = should_tokenize(field_name)?;
|
||||||
|
if pattern_match == PatternMatch::Match {
|
||||||
|
self.tokenize_field(fid, field_name, value, token_fn, &mut field_position)?;
|
||||||
|
}
|
||||||
|
Ok(pattern_match)
|
||||||
|
};
|
||||||
|
|
||||||
// parse json.
|
// parse json.
|
||||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||||
Value::Object(object) => seek_leaf_values_in_object(
|
Value::Object(object) => seek_leaf_values_in_object(
|
||||||
@@ -192,7 +188,7 @@ mod test {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::update::new::document::{DocumentFromVersions, Versions};
|
use crate::update::new::document::{DocumentFromVersions, Versions};
|
||||||
use crate::FieldsIdsMap;
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, UserError};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tokenize_document() {
|
fn test_tokenize_document() {
|
||||||
@@ -231,6 +227,7 @@ mod test {
|
|||||||
Default::default(),
|
Default::default(),
|
||||||
Default::default(),
|
Default::default(),
|
||||||
Default::default(),
|
Default::default(),
|
||||||
|
Default::default(),
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
Default::default(),
|
Default::default(),
|
||||||
@@ -251,15 +248,19 @@ mod test {
|
|||||||
let document = Versions::single(document);
|
let document = Versions::single(document);
|
||||||
let document = DocumentFromVersions::new(&document);
|
let document = DocumentFromVersions::new(&document);
|
||||||
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let Some(field_id) = global_fields_ids_map.id_or_insert(field_name) else {
|
||||||
|
return Err(UserError::AttributeLimitReached.into());
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, PatternMatch::Match))
|
||||||
|
};
|
||||||
|
|
||||||
document_tokenizer
|
document_tokenizer
|
||||||
.tokenize_document(
|
.tokenize_document(document, &mut should_tokenize, &mut |_fname, fid, pos, word| {
|
||||||
document,
|
words.insert([fid, pos], word.to_string());
|
||||||
&mut global_fields_ids_map,
|
Ok(())
|
||||||
&mut |_fname, fid, pos, word| {
|
})
|
||||||
words.insert([fid, pos], word.to_string());
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
snapshot!(format!("{:#?}", words), @r###"
|
snapshot!(format!("{:#?}", words), @r###"
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
use bumpalo::collections::Vec as BVec;
|
use bumpalo::collections::Vec as BVec;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@@ -27,7 +28,10 @@ use crate::vector::extractor::{
|
|||||||
use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed};
|
use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed};
|
||||||
use crate::vector::settings::ReindexAction;
|
use crate::vector::settings::ReindexAction;
|
||||||
use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
|
use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
|
||||||
use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
|
use crate::{
|
||||||
|
DocumentId, FieldDistribution, GlobalFieldsIdsMap, InternalError, Result, ThreadPoolNoAbort,
|
||||||
|
UserError,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct EmbeddingExtractor<'a, 'b> {
|
pub struct EmbeddingExtractor<'a, 'b> {
|
||||||
embedders: &'a RuntimeEmbedders,
|
embedders: &'a RuntimeEmbedders,
|
||||||
@@ -321,6 +325,15 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
let old_embedders = self.settings_delta.old_embedders();
|
let old_embedders = self.settings_delta.old_embedders();
|
||||||
let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc);
|
let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc);
|
||||||
|
|
||||||
|
// We get a reference to the new and old fields ids maps but
|
||||||
|
// note that those are local versions where updates to them
|
||||||
|
// will not be reflected in the database. It's not an issue
|
||||||
|
// because new settings do not generate new fields.
|
||||||
|
let new_fields_ids_map = RwLock::new(self.settings_delta.new_fields_ids_map().clone());
|
||||||
|
let new_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&new_fields_ids_map));
|
||||||
|
let old_fields_ids_map = RwLock::new(self.settings_delta.old_fields_ids_map().clone());
|
||||||
|
let old_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&old_fields_ids_map));
|
||||||
|
|
||||||
let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc);
|
let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc);
|
||||||
let embedder_configs = context.index.embedding_configs();
|
let embedder_configs = context.index.embedding_configs();
|
||||||
for (embedder_name, action) in self.settings_delta.embedder_actions().iter() {
|
for (embedder_name, action) in self.settings_delta.embedder_actions().iter() {
|
||||||
@@ -396,6 +409,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
if !must_regenerate {
|
if !must_regenerate {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we need to regenerate the prompts for the document
|
// we need to regenerate the prompts for the document
|
||||||
chunks.settings_change_autogenerated(
|
chunks.settings_change_autogenerated(
|
||||||
document.docid(),
|
document.docid(),
|
||||||
@@ -406,7 +420,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
)?,
|
)?,
|
||||||
self.settings_delta,
|
self.settings_delta,
|
||||||
context.new_fields_ids_map,
|
&old_fields_ids_map,
|
||||||
|
&new_fields_ids_map,
|
||||||
&unused_vectors_distribution,
|
&unused_vectors_distribution,
|
||||||
old_is_user_provided,
|
old_is_user_provided,
|
||||||
fragments_changed,
|
fragments_changed,
|
||||||
@@ -442,7 +457,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
)?,
|
)?,
|
||||||
self.settings_delta,
|
self.settings_delta,
|
||||||
context.new_fields_ids_map,
|
&old_fields_ids_map,
|
||||||
|
&new_fields_ids_map,
|
||||||
&unused_vectors_distribution,
|
&unused_vectors_distribution,
|
||||||
old_is_user_provided,
|
old_is_user_provided,
|
||||||
true,
|
true,
|
||||||
@@ -638,7 +654,8 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
external_docid: &'a str,
|
external_docid: &'a str,
|
||||||
document: D,
|
document: D,
|
||||||
settings_delta: &SD,
|
settings_delta: &SD,
|
||||||
fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
|
old_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
|
||||||
|
new_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||||
old_is_user_provided: bool,
|
old_is_user_provided: bool,
|
||||||
full_reindex: bool,
|
full_reindex: bool,
|
||||||
@@ -733,10 +750,17 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
||||||
};
|
};
|
||||||
|
|
||||||
let extractor =
|
let extractor = DocumentTemplateExtractor::new(
|
||||||
DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
|
document_template,
|
||||||
|
doc_alloc,
|
||||||
|
new_fields_ids_map,
|
||||||
|
);
|
||||||
let old_extractor = old_document_template.map(|old_document_template| {
|
let old_extractor = old_document_template.map(|old_document_template| {
|
||||||
DocumentTemplateExtractor::new(old_document_template, doc_alloc, fields_ids_map)
|
DocumentTemplateExtractor::new(
|
||||||
|
old_document_template,
|
||||||
|
doc_alloc,
|
||||||
|
old_fields_ids_map,
|
||||||
|
)
|
||||||
});
|
});
|
||||||
let metadata =
|
let metadata =
|
||||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||||
|
|||||||
@@ -372,11 +372,10 @@ where
|
|||||||
SD: SettingsDelta + Sync,
|
SD: SettingsDelta + Sync,
|
||||||
{
|
{
|
||||||
// Create the list of document ids to extract
|
// Create the list of document ids to extract
|
||||||
let rtxn = indexing_context.index.read_txn()?;
|
let index = indexing_context.index;
|
||||||
let all_document_ids =
|
let rtxn = index.read_txn()?;
|
||||||
indexing_context.index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
|
let all_document_ids = index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
|
||||||
let primary_key =
|
let primary_key = primary_key_from_db(index, &rtxn, &indexing_context.db_fields_ids_map)?;
|
||||||
primary_key_from_db(indexing_context.index, &rtxn, &indexing_context.db_fields_ids_map)?;
|
|
||||||
let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key);
|
let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key);
|
||||||
|
|
||||||
let span =
|
let span =
|
||||||
@@ -391,6 +390,133 @@ where
|
|||||||
extractor_allocs,
|
extractor_allocs,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
{
|
||||||
|
let WordDocidsCaches {
|
||||||
|
word_docids,
|
||||||
|
word_fid_docids,
|
||||||
|
exact_word_docids,
|
||||||
|
word_position_docids,
|
||||||
|
fid_word_count_docids,
|
||||||
|
} = {
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
SettingsChangeWordDocidsExtractors::run_extraction(
|
||||||
|
settings_delta,
|
||||||
|
&documents,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
IndexingStep::ExtractingWords,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::WordDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
word_docids,
|
||||||
|
index.word_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
word_fid_docids,
|
||||||
|
index.word_fid_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordFidDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
exact_word_docids,
|
||||||
|
index.exact_word_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<ExactWordDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
word_position_docids,
|
||||||
|
index.word_position_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordPositionDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
fid_word_count_docids,
|
||||||
|
index.field_id_word_count_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<FidWordCountDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the proximity extraction only if the precision is ByWord.
|
||||||
|
let new_proximity_precision = settings_delta.new_proximity_precision();
|
||||||
|
if *new_proximity_precision == ProximityPrecision::ByWord {
|
||||||
|
let caches = {
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
SettingsChangeWordPairProximityDocidsExtractors::run_extraction(
|
||||||
|
settings_delta,
|
||||||
|
&documents,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
IndexingStep::ExtractingWordProximity,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
caches,
|
||||||
|
index.word_pair_proximity_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordPairProximityDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
'vectors: {
|
'vectors: {
|
||||||
if settings_delta.embedder_actions().is_empty() {
|
if settings_delta.embedder_actions().is_empty() {
|
||||||
break 'vectors;
|
break 'vectors;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::{Arc, Once, RwLock};
|
use std::sync::{Arc, Once, RwLock};
|
||||||
use std::thread::{self, Builder};
|
use std::thread::{self, Builder};
|
||||||
@@ -8,9 +8,11 @@ use document_changes::{DocumentChanges, IndexingContext};
|
|||||||
pub use document_deletion::DocumentDeletion;
|
pub use document_deletion::DocumentDeletion;
|
||||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use heed::{RoTxn, RwTxn};
|
use heed::types::DecodeIgnore;
|
||||||
|
use heed::{BytesDecode, Database, RoTxn, RwTxn};
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
||||||
|
pub use settings_changes::settings_change_extract;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
pub use write::ChannelCongestion;
|
pub use write::ChannelCongestion;
|
||||||
use write::{build_vectors, update_index, write_to_db};
|
use write::{build_vectors, update_index, write_to_db};
|
||||||
@@ -20,12 +22,18 @@ use super::steps::IndexingStep;
|
|||||||
use super::thread_local::ThreadLocal;
|
use super::thread_local::ThreadLocal;
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
|
use crate::heed_codec::StrBEU16Codec;
|
||||||
use crate::progress::{EmbedderStats, Progress};
|
use crate::progress::{EmbedderStats, Progress};
|
||||||
|
use crate::proximity::ProximityPrecision;
|
||||||
|
use crate::update::new::steps::SettingsIndexerStep;
|
||||||
|
use crate::update::new::FacetFieldIdsDelta;
|
||||||
use crate::update::settings::SettingsDelta;
|
use crate::update::settings::SettingsDelta;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
|
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
|
||||||
use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
|
use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
|
||||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
|
use crate::{
|
||||||
|
Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
|
||||||
|
};
|
||||||
|
|
||||||
#[cfg(not(feature = "enterprise"))]
|
#[cfg(not(feature = "enterprise"))]
|
||||||
pub mod community_edition;
|
pub mod community_edition;
|
||||||
@@ -242,6 +250,20 @@ where
|
|||||||
SD: SettingsDelta + Sync,
|
SD: SettingsDelta + Sync,
|
||||||
{
|
{
|
||||||
delete_old_embedders_and_fragments(wtxn, index, settings_delta)?;
|
delete_old_embedders_and_fragments(wtxn, index, settings_delta)?;
|
||||||
|
delete_old_fid_based_databases(wtxn, index, settings_delta, must_stop_processing, progress)?;
|
||||||
|
|
||||||
|
// Clear word_pair_proximity if byWord to byAttribute
|
||||||
|
let old_proximity_precision = settings_delta.old_proximity_precision();
|
||||||
|
let new_proximity_precision = settings_delta.new_proximity_precision();
|
||||||
|
if *old_proximity_precision == ProximityPrecision::ByWord
|
||||||
|
&& *new_proximity_precision == ProximityPrecision::ByAttribute
|
||||||
|
{
|
||||||
|
index.word_pair_proximity_docids.clear(wtxn)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO delete useless searchable databases
|
||||||
|
// - Clear fid_prefix_* in the post processing
|
||||||
|
// - clear the prefix + fid_prefix if setting `PrefixSearch` is enabled
|
||||||
|
|
||||||
let mut bbbuffers = Vec::new();
|
let mut bbbuffers = Vec::new();
|
||||||
let finished_extraction = AtomicBool::new(false);
|
let finished_extraction = AtomicBool::new(false);
|
||||||
@@ -300,6 +322,8 @@ where
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||||
|
|
||||||
let new_embedders = settings_delta.new_embedders();
|
let new_embedders = settings_delta.new_embedders();
|
||||||
let embedder_actions = settings_delta.embedder_actions();
|
let embedder_actions = settings_delta.embedder_actions();
|
||||||
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
||||||
@@ -334,6 +358,18 @@ where
|
|||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
|
|
||||||
|
pool.install(|| {
|
||||||
|
// WARN When implementing the facets don't forget this
|
||||||
|
let facet_field_ids_delta = FacetFieldIdsDelta::new(0, 0);
|
||||||
|
post_processing::post_process(
|
||||||
|
indexing_context,
|
||||||
|
wtxn,
|
||||||
|
global_fields_ids_map,
|
||||||
|
facet_field_ids_delta,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap()?;
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
||||||
index.cellulite.build(
|
index.cellulite.build(
|
||||||
wtxn,
|
wtxn,
|
||||||
@@ -463,6 +499,106 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Deletes entries refering the provided
|
||||||
|
/// fids from the fid-based databases.
|
||||||
|
fn delete_old_fid_based_databases<SD, MSP>(
|
||||||
|
wtxn: &mut RwTxn<'_>,
|
||||||
|
index: &Index,
|
||||||
|
settings_delta: &SD,
|
||||||
|
must_stop_processing: &MSP,
|
||||||
|
progress: &Progress,
|
||||||
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
SD: SettingsDelta + Sync,
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
let fids_to_delete: Option<BTreeSet<_>> = {
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
|
let old_searchable_attributes = settings_delta.old_searchable_attributes().as_ref();
|
||||||
|
let new_searchable_attributes = settings_delta.new_searchable_attributes().as_ref();
|
||||||
|
old_searchable_attributes.zip(new_searchable_attributes).map(|(old, new)| {
|
||||||
|
old.iter()
|
||||||
|
// Ignore the field if it is not searchable anymore
|
||||||
|
// or if it was never referenced in any document
|
||||||
|
.filter_map(|name| if new.contains(name) { None } else { fields_ids_map.id(name) })
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(fids_to_delete) = fids_to_delete else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
progress.update_progress(SettingsIndexerStep::DeletingOldWordFidDocids);
|
||||||
|
delete_old_word_fid_docids(wtxn, index.word_fid_docids, must_stop_processing, &fids_to_delete)?;
|
||||||
|
|
||||||
|
progress.update_progress(SettingsIndexerStep::DeletingOldFidWordCountDocids);
|
||||||
|
delete_old_fid_word_count_docids(wtxn, index, must_stop_processing, &fids_to_delete)?;
|
||||||
|
|
||||||
|
progress.update_progress(SettingsIndexerStep::DeletingOldWordPrefixFidDocids);
|
||||||
|
delete_old_word_fid_docids(
|
||||||
|
wtxn,
|
||||||
|
index.word_prefix_fid_docids,
|
||||||
|
must_stop_processing,
|
||||||
|
&fids_to_delete,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_old_word_fid_docids<'txn, MSP, DC>(
|
||||||
|
wtxn: &mut RwTxn<'txn>,
|
||||||
|
database: Database<StrBEU16Codec, DC>,
|
||||||
|
must_stop_processing: &MSP,
|
||||||
|
fids_to_delete: &BTreeSet<u16>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
DC: BytesDecode<'txn>,
|
||||||
|
{
|
||||||
|
let mut iter = database.iter_mut(wtxn)?.remap_data_type::<DecodeIgnore>();
|
||||||
|
while let Some(((_word, fid), ())) = iter.next().transpose()? {
|
||||||
|
// TODO should I call it that often?
|
||||||
|
if must_stop_processing() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
|
if fids_to_delete.contains(&fid) {
|
||||||
|
// safety: We don't keep any references to the data.
|
||||||
|
unsafe { iter.del_current()? };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_old_fid_word_count_docids<MSP>(
|
||||||
|
wtxn: &mut RwTxn<'_>,
|
||||||
|
index: &Index,
|
||||||
|
must_stop_processing: &MSP,
|
||||||
|
fids_to_delete: &BTreeSet<u16>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
let db = index.field_id_word_count_docids.remap_data_type::<DecodeIgnore>();
|
||||||
|
for &fid_to_delete in fids_to_delete {
|
||||||
|
if must_stop_processing() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut iter = db.prefix_iter_mut(wtxn, &(fid_to_delete, 0))?;
|
||||||
|
while let Some(((fid, _word_count), ())) = iter.next().transpose()? {
|
||||||
|
debug_assert_eq!(fid, fid_to_delete);
|
||||||
|
// safety: We don't keep any references to the data.
|
||||||
|
unsafe { iter.del_current()? };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn indexer_memory_settings(
|
fn indexer_memory_settings(
|
||||||
current_num_threads: usize,
|
current_num_threads: usize,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
|
|||||||
@@ -28,6 +28,9 @@ make_enum_progress! {
|
|||||||
ChangingVectorStore,
|
ChangingVectorStore,
|
||||||
UsingStableIndexer,
|
UsingStableIndexer,
|
||||||
UsingExperimentalIndexer,
|
UsingExperimentalIndexer,
|
||||||
|
DeletingOldWordFidDocids,
|
||||||
|
DeletingOldFidWordCountDocids,
|
||||||
|
DeletingOldWordPrefixFidDocids,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1589,33 +1589,33 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
|
|
||||||
// only use the new indexer when only the embedder possibly changed
|
// only use the new indexer when only the embedder possibly changed
|
||||||
if let Self {
|
if let Self {
|
||||||
searchable_fields: Setting::NotSet,
|
searchable_fields: _,
|
||||||
displayed_fields: Setting::NotSet,
|
displayed_fields: Setting::NotSet,
|
||||||
filterable_fields: Setting::NotSet,
|
filterable_fields: Setting::NotSet,
|
||||||
sortable_fields: Setting::NotSet,
|
sortable_fields: Setting::NotSet,
|
||||||
criteria: Setting::NotSet,
|
criteria: Setting::NotSet,
|
||||||
stop_words: Setting::NotSet,
|
stop_words: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
non_separator_tokens: Setting::NotSet,
|
non_separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
separator_tokens: Setting::NotSet,
|
separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
dictionary: Setting::NotSet,
|
dictionary: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
distinct_field: Setting::NotSet,
|
distinct_field: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
primary_key: Setting::NotSet,
|
primary_key: Setting::NotSet,
|
||||||
authorize_typos: Setting::NotSet,
|
authorize_typos: Setting::NotSet,
|
||||||
min_word_len_two_typos: Setting::NotSet,
|
min_word_len_two_typos: Setting::NotSet,
|
||||||
min_word_len_one_typo: Setting::NotSet,
|
min_word_len_one_typo: Setting::NotSet,
|
||||||
exact_words: Setting::NotSet,
|
exact_words: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
exact_attributes: Setting::NotSet,
|
exact_attributes: _,
|
||||||
max_values_per_facet: Setting::NotSet,
|
max_values_per_facet: Setting::NotSet,
|
||||||
sort_facet_values_by: Setting::NotSet,
|
sort_facet_values_by: Setting::NotSet,
|
||||||
pagination_max_total_hits: Setting::NotSet,
|
pagination_max_total_hits: Setting::NotSet,
|
||||||
proximity_precision: Setting::NotSet,
|
proximity_precision: _,
|
||||||
embedder_settings: _,
|
embedder_settings: _,
|
||||||
search_cutoff: Setting::NotSet,
|
search_cutoff: Setting::NotSet,
|
||||||
localized_attributes_rules: Setting::NotSet,
|
localized_attributes_rules: Setting::NotSet, // TODO to start with
|
||||||
prefix_search: Setting::NotSet,
|
prefix_search: Setting::NotSet, // TODO continue with this
|
||||||
facet_search: Setting::NotSet,
|
facet_search: Setting::NotSet,
|
||||||
disable_on_numbers: Setting::NotSet,
|
disable_on_numbers: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
chat: Setting::NotSet,
|
chat: Setting::NotSet,
|
||||||
vector_store: Setting::NotSet,
|
vector_store: Setting::NotSet,
|
||||||
wtxn: _,
|
wtxn: _,
|
||||||
@@ -1632,10 +1632,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
// Update index settings
|
// Update index settings
|
||||||
let embedding_config_updates = self.update_embedding_configs()?;
|
let embedding_config_updates = self.update_embedding_configs()?;
|
||||||
self.update_user_defined_searchable_attributes()?;
|
self.update_user_defined_searchable_attributes()?;
|
||||||
|
self.update_exact_attributes()?;
|
||||||
|
self.update_proximity_precision()?;
|
||||||
|
|
||||||
let mut new_inner_settings =
|
// Note that we don't need to update the searchables here,
|
||||||
InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
// as it will be done after the settings update.
|
||||||
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
|
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||||
|
|
||||||
let primary_key_id = self
|
let primary_key_id = self
|
||||||
.index
|
.index
|
||||||
@@ -2062,9 +2064,12 @@ impl InnerIndexSettings {
|
|||||||
let sortable_fields = index.sortable_fields(rtxn)?;
|
let sortable_fields = index.sortable_fields(rtxn)?;
|
||||||
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
|
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
|
||||||
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
|
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
|
||||||
let user_defined_searchable_attributes = index
|
let user_defined_searchable_attributes = match index.user_defined_searchable_fields(rtxn)? {
|
||||||
.user_defined_searchable_fields(rtxn)?
|
Some(fields) if fields.contains(&"*") => None,
|
||||||
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
|
Some(fields) => Some(fields.into_iter().map(|f| f.to_string()).collect()),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
let builder = MetadataBuilder::from_index(index, rtxn)?;
|
let builder = MetadataBuilder::from_index(index, rtxn)?;
|
||||||
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
||||||
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
|
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
|
||||||
@@ -2578,8 +2583,20 @@ fn deserialize_sub_embedder(
|
|||||||
/// Implement this trait for the settings delta type.
|
/// Implement this trait for the settings delta type.
|
||||||
/// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`.
|
/// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`.
|
||||||
pub trait SettingsDelta {
|
pub trait SettingsDelta {
|
||||||
fn new_embedders(&self) -> &RuntimeEmbedders;
|
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
||||||
|
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
||||||
|
|
||||||
|
fn old_searchable_attributes(&self) -> &Option<Vec<String>>;
|
||||||
|
fn new_searchable_attributes(&self) -> &Option<Vec<String>>;
|
||||||
|
|
||||||
|
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms;
|
||||||
|
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms;
|
||||||
|
|
||||||
|
fn old_proximity_precision(&self) -> &ProximityPrecision;
|
||||||
|
fn new_proximity_precision(&self) -> &ProximityPrecision;
|
||||||
|
|
||||||
fn old_embedders(&self) -> &RuntimeEmbedders;
|
fn old_embedders(&self) -> &RuntimeEmbedders;
|
||||||
|
fn new_embedders(&self) -> &RuntimeEmbedders;
|
||||||
fn new_embedder_category_id(&self) -> &HashMap<String, u8>;
|
fn new_embedder_category_id(&self) -> &HashMap<String, u8>;
|
||||||
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction>;
|
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction>;
|
||||||
fn try_for_each_fragment_diff<F, E>(
|
fn try_for_each_fragment_diff<F, E>(
|
||||||
@@ -2589,7 +2606,6 @@ pub trait SettingsDelta {
|
|||||||
) -> std::result::Result<(), E>
|
) -> std::result::Result<(), E>
|
||||||
where
|
where
|
||||||
F: FnMut(FragmentDiff) -> std::result::Result<(), E>;
|
F: FnMut(FragmentDiff) -> std::result::Result<(), E>;
|
||||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FragmentDiff<'a> {
|
pub struct FragmentDiff<'a> {
|
||||||
@@ -2598,26 +2614,47 @@ pub struct FragmentDiff<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl SettingsDelta for InnerIndexSettingsDiff {
|
impl SettingsDelta for InnerIndexSettingsDiff {
|
||||||
fn new_embedders(&self) -> &RuntimeEmbedders {
|
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
||||||
&self.new.runtime_embedders
|
&self.old.fields_ids_map
|
||||||
|
}
|
||||||
|
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
||||||
|
&self.new.fields_ids_map
|
||||||
|
}
|
||||||
|
|
||||||
|
fn old_searchable_attributes(&self) -> &Option<Vec<String>> {
|
||||||
|
&self.old.user_defined_searchable_attributes
|
||||||
|
}
|
||||||
|
fn new_searchable_attributes(&self) -> &Option<Vec<String>> {
|
||||||
|
&self.new.user_defined_searchable_attributes
|
||||||
|
}
|
||||||
|
|
||||||
|
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms {
|
||||||
|
&self.old.disabled_typos_terms
|
||||||
|
}
|
||||||
|
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms {
|
||||||
|
&self.new.disabled_typos_terms
|
||||||
|
}
|
||||||
|
|
||||||
|
fn old_proximity_precision(&self) -> &ProximityPrecision {
|
||||||
|
&self.old.proximity_precision
|
||||||
|
}
|
||||||
|
fn new_proximity_precision(&self) -> &ProximityPrecision {
|
||||||
|
&self.new.proximity_precision
|
||||||
}
|
}
|
||||||
|
|
||||||
fn old_embedders(&self) -> &RuntimeEmbedders {
|
fn old_embedders(&self) -> &RuntimeEmbedders {
|
||||||
&self.old.runtime_embedders
|
&self.old.runtime_embedders
|
||||||
}
|
}
|
||||||
|
fn new_embedders(&self) -> &RuntimeEmbedders {
|
||||||
|
&self.new.runtime_embedders
|
||||||
|
}
|
||||||
|
|
||||||
fn new_embedder_category_id(&self) -> &HashMap<String, u8> {
|
fn new_embedder_category_id(&self) -> &HashMap<String, u8> {
|
||||||
&self.new.embedder_category_id
|
&self.new.embedder_category_id
|
||||||
}
|
}
|
||||||
|
|
||||||
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction> {
|
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction> {
|
||||||
&self.embedding_config_updates
|
&self.embedding_config_updates
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
|
||||||
&self.new.fields_ids_map
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_for_each_fragment_diff<F, E>(
|
fn try_for_each_fragment_diff<F, E>(
|
||||||
&self,
|
&self,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
|
|||||||
@@ -14,28 +14,21 @@ fn set_and_reset_searchable_fields() {
|
|||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|
||||||
// First we send 3 documents with ids from 1 to 3.
|
// First we send 3 documents with ids from 1 to 3.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
|
||||||
|
|
||||||
index
|
index
|
||||||
.add_documents_using_wtxn(
|
.add_documents(documents!([
|
||||||
&mut wtxn,
|
{ "id": 1, "name": "kevin", "age": 23 },
|
||||||
documents!([
|
{ "id": 2, "name": "kevina", "age": 21},
|
||||||
{ "id": 1, "name": "kevin", "age": 23 },
|
{ "id": 3, "name": "benoit", "age": 34 }
|
||||||
{ "id": 2, "name": "kevina", "age": 21},
|
]))
|
||||||
{ "id": 3, "name": "benoit", "age": 34 }
|
|
||||||
]),
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
index
|
index
|
||||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
.update_settings(|settings| {
|
||||||
settings.set_searchable_fields(vec!["name".into()]);
|
settings.set_searchable_fields(vec!["name".into()]);
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
|
||||||
|
|
||||||
db_snap!(index, fields_ids_map, @r###"
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
0 id |
|
0 id |
|
||||||
1 name |
|
1 name |
|
||||||
|
|||||||
Reference in New Issue
Block a user