mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-12 23:47:00 +00:00
Compare commits
3 Commits
v1.12.7
...
try-merge-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14a980e54e | ||
|
|
cbc453c6d1 | ||
|
|
2fb065b9fb |
90
Cargo.lock
generated
90
Cargo.lock
generated
@@ -496,7 +496,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -689,7 +689,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@@ -706,20 +706,6 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "bumparaw-collections"
|
|
||||||
version = "0.1.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
|
|
||||||
dependencies = [
|
|
||||||
"allocator-api2",
|
|
||||||
"bitpacking",
|
|
||||||
"bumpalo",
|
|
||||||
"hashbrown 0.15.1",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "byte-unit"
|
name = "byte-unit"
|
||||||
version = "5.1.4"
|
version = "5.1.4"
|
||||||
@@ -1664,7 +1650,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -1876,7 +1862,7 @@ checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
@@ -1898,7 +1884,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"nom",
|
"nom",
|
||||||
@@ -1918,7 +1904,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -2057,7 +2043,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -2624,15 +2610,13 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"big_s",
|
"big_s",
|
||||||
"bincode",
|
"bincode",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"bumparaw-collections",
|
|
||||||
"convert_case 0.6.0",
|
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
"derive_builder 0.20.0",
|
"derive_builder 0.20.0",
|
||||||
@@ -2647,6 +2631,7 @@ dependencies = [
|
|||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"page_size",
|
"page_size",
|
||||||
|
"raw-collections",
|
||||||
"rayon",
|
"rayon",
|
||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -2662,12 +2647,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "2.7.0"
|
version = "2.2.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f"
|
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"equivalent",
|
"equivalent",
|
||||||
"hashbrown 0.15.1",
|
"hashbrown 0.14.3",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -2822,7 +2807,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -3441,7 +3426,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5",
|
"md5",
|
||||||
@@ -3450,7 +3435,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@@ -3540,7 +3525,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@@ -3559,12 +3544,11 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"bumparaw-collections",
|
|
||||||
"convert_case 0.6.0",
|
"convert_case 0.6.0",
|
||||||
"csv",
|
"csv",
|
||||||
"deserr",
|
"deserr",
|
||||||
@@ -3577,8 +3561,8 @@ dependencies = [
|
|||||||
"meili-snap",
|
"meili-snap",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"milli",
|
"milli",
|
||||||
|
"raw-collections",
|
||||||
"roaring",
|
"roaring",
|
||||||
"rustc-hash 2.1.0",
|
|
||||||
"serde",
|
"serde",
|
||||||
"serde-cs",
|
"serde-cs",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -3592,19 +3576,16 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
|
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
|
||||||
"clap",
|
"clap",
|
||||||
"dump",
|
"dump",
|
||||||
"file-store",
|
"file-store",
|
||||||
"indexmap",
|
|
||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
|
||||||
"tempfile",
|
|
||||||
"time",
|
"time",
|
||||||
"uuid",
|
"uuid",
|
||||||
]
|
]
|
||||||
@@ -3627,7 +3608,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"allocator-api2",
|
"allocator-api2",
|
||||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@@ -3637,7 +3618,6 @@ dependencies = [
|
|||||||
"bincode",
|
"bincode",
|
||||||
"bstr",
|
"bstr",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"bumparaw-collections",
|
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"candle-core",
|
"candle-core",
|
||||||
@@ -3676,12 +3656,13 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
"rand",
|
"rand",
|
||||||
|
"raw-collections",
|
||||||
"rayon",
|
"rayon",
|
||||||
"rayon-par-bridge",
|
"rayon-par-bridge",
|
||||||
"rhai",
|
"rhai",
|
||||||
"roaring",
|
"roaring",
|
||||||
"rstar",
|
"rstar",
|
||||||
"rustc-hash 2.1.0",
|
"rustc-hash 2.0.0",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"slice-group-by",
|
"slice-group-by",
|
||||||
@@ -4083,7 +4064,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -4430,7 +4411,7 @@ dependencies = [
|
|||||||
"bytes",
|
"bytes",
|
||||||
"rand",
|
"rand",
|
||||||
"ring",
|
"ring",
|
||||||
"rustc-hash 2.1.0",
|
"rustc-hash 2.0.0",
|
||||||
"rustls",
|
"rustls",
|
||||||
"slab",
|
"slab",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
@@ -4506,6 +4487,19 @@ dependencies = [
|
|||||||
"rand",
|
"rand",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "raw-collections"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a"
|
||||||
|
dependencies = [
|
||||||
|
"allocator-api2",
|
||||||
|
"bitpacking",
|
||||||
|
"bumpalo",
|
||||||
|
"hashbrown 0.15.1",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "raw-cpuid"
|
name = "raw-cpuid"
|
||||||
version = "10.7.0"
|
version = "10.7.0"
|
||||||
@@ -4803,9 +4797,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-hash"
|
name = "rustc-hash"
|
||||||
version = "2.1.0"
|
version = "2.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
|
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc_version"
|
name = "rustc_version"
|
||||||
@@ -4974,9 +4968,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_json"
|
name = "serde_json"
|
||||||
version = "1.0.133"
|
version = "1.0.132"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
|
checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"itoa",
|
"itoa",
|
||||||
@@ -6486,7 +6480,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.12.7"
|
version = "1.12.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ use bumpalo::Bump;
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
use milli::documents::PrimaryKey;
|
use milli::documents::PrimaryKey;
|
||||||
use milli::heed::{EnvOpenOptions, RwTxn};
|
use milli::heed::{EnvOpenOptions, RwTxn};
|
||||||
use milli::progress::Progress;
|
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@@ -152,7 +151,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -167,7 +166,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -219,7 +218,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -234,7 +233,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -264,7 +263,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -279,7 +278,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -333,7 +332,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -348,7 +347,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -410,7 +409,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -425,7 +424,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -455,7 +454,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -470,7 +469,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -496,7 +495,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -511,7 +510,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -564,7 +563,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -579,7 +578,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -631,7 +630,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -646,7 +645,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -698,7 +697,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -713,7 +712,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -764,7 +763,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -779,7 +778,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -809,7 +808,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -824,7 +823,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -877,7 +876,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -892,7 +891,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -954,7 +953,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -969,7 +968,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1000,7 +999,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1015,7 +1014,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1042,7 +1041,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1057,7 +1056,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1109,7 +1108,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1124,7 +1123,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1175,7 +1174,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1190,7 +1189,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1220,7 +1219,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1235,7 +1234,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1288,7 +1287,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1303,7 +1302,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1351,7 +1350,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1401,7 +1400,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1416,7 +1415,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1446,7 +1445,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1461,7 +1460,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1487,7 +1486,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1502,7 +1501,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1577,7 +1576,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1592,7 +1591,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1668,7 +1667,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1683,7 +1682,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1751,7 +1750,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1766,7 +1765,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1818,7 +1817,7 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1833,7 +1832,7 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1884,7 +1883,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1899,7 +1898,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1929,7 +1928,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1944,7 +1943,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -1997,7 +1996,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2012,7 +2011,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ use bumpalo::Bump;
|
|||||||
use criterion::BenchmarkId;
|
use criterion::BenchmarkId;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use milli::heed::EnvOpenOptions;
|
use milli::heed::EnvOpenOptions;
|
||||||
use milli::progress::Progress;
|
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@@ -111,7 +110,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -126,7 +125,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -136,14 +136,6 @@ pub struct File {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl File {
|
impl File {
|
||||||
pub fn from_parts(path: PathBuf, file: Option<NamedTempFile>) -> Self {
|
|
||||||
Self { path, file }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_parts(self) -> (PathBuf, Option<NamedTempFile>) {
|
|
||||||
(self.path, self.file)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dry_file() -> Result<Self> {
|
pub fn dry_file() -> Result<Self> {
|
||||||
Ok(Self { path: PathBuf::new(), file: None })
|
Ok(Self { path: PathBuf::new(), file: None })
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ use either::Either;
|
|||||||
use fuzzers::Operation;
|
use fuzzers::Operation;
|
||||||
use milli::documents::mmap_from_objects;
|
use milli::documents::mmap_from_objects;
|
||||||
use milli::heed::EnvOpenOptions;
|
use milli::heed::EnvOpenOptions;
|
||||||
use milli::progress::Progress;
|
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@@ -129,7 +128,7 @@ fn main() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -144,7 +143,7 @@ fn main() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -13,9 +13,6 @@ license.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
bumpalo = "3.16.0"
|
|
||||||
bumparaw-collections = "0.1.2"
|
|
||||||
convert_case = "0.6.0"
|
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
derive_builder = "0.20.0"
|
derive_builder = "0.20.0"
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
@@ -24,8 +21,8 @@ file-store = { path = "../file-store" }
|
|||||||
flate2 = "1.0.30"
|
flate2 = "1.0.30"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
memmap2 = "0.9.4"
|
|
||||||
page_size = "0.6.0"
|
page_size = "0.6.0"
|
||||||
|
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
roaring = { version = "0.10.7", features = ["serde"] }
|
roaring = { version = "0.10.7", features = ["serde"] }
|
||||||
serde = { version = "1.0.204", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
@@ -33,6 +30,7 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
|||||||
synchronoise = "1.0.1"
|
synchronoise = "1.0.1"
|
||||||
tempfile = "3.10.1"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.61"
|
thiserror = "1.0.61"
|
||||||
|
memmap2 = "0.9.4"
|
||||||
time = { version = "0.3.36", features = [
|
time = { version = "0.3.36", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
@@ -42,6 +40,7 @@ time = { version = "0.3.36", features = [
|
|||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = "2.10.0"
|
ureq = "2.10.0"
|
||||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
bumpalo = "3.16.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
arroy = "0.5.0"
|
arroy = "0.5.0"
|
||||||
|
|||||||
@@ -22,26 +22,26 @@ use std::ffi::OsStr;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::io::BufWriter;
|
use std::io::BufWriter;
|
||||||
use std::sync::atomic::Ordering;
|
use std::sync::atomic::{self, AtomicU64};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use bumpalo::collections::CollectIn;
|
use bumpalo::collections::CollectIn;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use dump::IndexMetadata;
|
use dump::IndexMetadata;
|
||||||
use meilisearch_types::batches::BatchId;
|
use meilisearch_types::batches::BatchId;
|
||||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::PrimaryKey;
|
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
|
||||||
use meilisearch_types::milli::heed::CompactionOption;
|
use meilisearch_types::milli::heed::CompactionOption;
|
||||||
use meilisearch_types::milli::progress::Progress;
|
|
||||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||||
use meilisearch_types::milli::update::{
|
use meilisearch_types::milli::update::{IndexDocumentsMethod, Settings as MilliSettings};
|
||||||
DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
|
|
||||||
};
|
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{
|
use meilisearch_types::milli::vector::parsed_vectors::{
|
||||||
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
||||||
};
|
};
|
||||||
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
|
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
|
||||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{
|
||||||
|
Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress,
|
||||||
|
};
|
||||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use time::macros::format_description;
|
use time::macros::format_description;
|
||||||
@@ -49,13 +49,6 @@ use time::OffsetDateTime;
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::autobatcher::{self, BatchKind};
|
use crate::autobatcher::{self, BatchKind};
|
||||||
use crate::processing::{
|
|
||||||
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress,
|
|
||||||
DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress,
|
|
||||||
DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress,
|
|
||||||
SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
|
||||||
UpdateIndexProgress, VariableNameStep,
|
|
||||||
};
|
|
||||||
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
|
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
|
||||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||||
|
|
||||||
@@ -566,12 +559,11 @@ impl IndexScheduler {
|
|||||||
/// The list of tasks that were processed. The metadata of each task in the returned
|
/// The list of tasks that were processed. The metadata of each task in the returned
|
||||||
/// list is updated accordingly, with the exception of the its date fields
|
/// list is updated accordingly, with the exception of the its date fields
|
||||||
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
||||||
#[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
||||||
pub(crate) fn process_batch(
|
pub(crate) fn process_batch(
|
||||||
&self,
|
&self,
|
||||||
batch: Batch,
|
batch: Batch,
|
||||||
current_batch: &mut ProcessingBatch,
|
current_batch: &mut ProcessingBatch,
|
||||||
progress: Progress,
|
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
{
|
{
|
||||||
@@ -591,13 +583,8 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
let mut canceled_tasks = self.cancel_matched_tasks(
|
let mut canceled_tasks =
|
||||||
&rtxn,
|
self.cancel_matched_tasks(&rtxn, task.uid, current_batch, matched_tasks)?;
|
||||||
task.uid,
|
|
||||||
current_batch,
|
|
||||||
matched_tasks,
|
|
||||||
&progress,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
match &mut task.details {
|
match &mut task.details {
|
||||||
@@ -628,8 +615,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut wtxn = self.env.write_txn()?;
|
let mut wtxn = self.env.write_txn()?;
|
||||||
let mut deleted_tasks =
|
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
|
||||||
self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
|
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
for task in tasks.iter_mut() {
|
for task in tasks.iter_mut() {
|
||||||
@@ -655,8 +641,6 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::SnapshotCreation(mut tasks) => {
|
Batch::SnapshotCreation(mut tasks) => {
|
||||||
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
|
||||||
|
|
||||||
fs::create_dir_all(&self.snapshots_path)?;
|
fs::create_dir_all(&self.snapshots_path)?;
|
||||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||||
|
|
||||||
@@ -677,7 +661,6 @@ impl IndexScheduler {
|
|||||||
// two read operations as the task processing is synchronous.
|
// two read operations as the task processing is synchronous.
|
||||||
|
|
||||||
// 2.1 First copy the LMDB env of the index-scheduler
|
// 2.1 First copy the LMDB env of the index-scheduler
|
||||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
|
||||||
let dst = temp_snapshot_dir.path().join("tasks");
|
let dst = temp_snapshot_dir.path().join("tasks");
|
||||||
fs::create_dir_all(&dst)?;
|
fs::create_dir_all(&dst)?;
|
||||||
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||||
@@ -690,29 +673,18 @@ impl IndexScheduler {
|
|||||||
fs::create_dir_all(&update_files_dir)?;
|
fs::create_dir_all(&update_files_dir)?;
|
||||||
|
|
||||||
// 2.4 Only copy the update files of the enqueued tasks
|
// 2.4 Only copy the update files of the enqueued tasks
|
||||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
for task_id in self.get_status(&rtxn, Status::Enqueued)? {
|
||||||
let enqueued = self.get_status(&rtxn, Status::Enqueued)?;
|
|
||||||
let (atomic, update_file_progress) =
|
|
||||||
AtomicUpdateFileStep::new(enqueued.len() as u32);
|
|
||||||
progress.update_progress(update_file_progress);
|
|
||||||
for task_id in enqueued {
|
|
||||||
let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
if let Some(content_uuid) = task.content_uuid() {
|
if let Some(content_uuid) = task.content_uuid() {
|
||||||
let src = self.file_store.get_update_path(content_uuid);
|
let src = self.file_store.get_update_path(content_uuid);
|
||||||
let dst = update_files_dir.join(content_uuid.to_string());
|
let dst = update_files_dir.join(content_uuid.to_string());
|
||||||
fs::copy(src, dst)?;
|
fs::copy(src, dst)?;
|
||||||
}
|
}
|
||||||
atomic.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Snapshot every indexes
|
// 3. Snapshot every indexes
|
||||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
for result in self.index_mapper.index_mapping.iter(&rtxn)? {
|
||||||
let index_mapping = self.index_mapper.index_mapping;
|
|
||||||
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
|
||||||
|
|
||||||
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
|
|
||||||
let (name, uuid) = result?;
|
let (name, uuid) = result?;
|
||||||
progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes));
|
|
||||||
let index = self.index_mapper.index(&rtxn, name)?;
|
let index = self.index_mapper.index(&rtxn, name)?;
|
||||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||||
fs::create_dir_all(&dst)?;
|
fs::create_dir_all(&dst)?;
|
||||||
@@ -724,7 +696,6 @@ impl IndexScheduler {
|
|||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// 4. Snapshot the auth LMDB env
|
// 4. Snapshot the auth LMDB env
|
||||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
|
||||||
let dst = temp_snapshot_dir.path().join("auth");
|
let dst = temp_snapshot_dir.path().join("auth");
|
||||||
fs::create_dir_all(&dst)?;
|
fs::create_dir_all(&dst)?;
|
||||||
// TODO We can't use the open_auth_store_env function here but we should
|
// TODO We can't use the open_auth_store_env function here but we should
|
||||||
@@ -737,7 +708,6 @@ impl IndexScheduler {
|
|||||||
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||||
|
|
||||||
// 5. Copy and tarball the flat snapshot
|
// 5. Copy and tarball the flat snapshot
|
||||||
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
|
|
||||||
// 5.1 Find the original name of the database
|
// 5.1 Find the original name of the database
|
||||||
// TODO find a better way to get this path
|
// TODO find a better way to get this path
|
||||||
let mut base_path = self.env.path().to_owned();
|
let mut base_path = self.env.path().to_owned();
|
||||||
@@ -770,7 +740,6 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::Dump(mut task) => {
|
Batch::Dump(mut task) => {
|
||||||
progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
|
|
||||||
let started_at = OffsetDateTime::now_utc();
|
let started_at = OffsetDateTime::now_utc();
|
||||||
let (keys, instance_uid) =
|
let (keys, instance_uid) =
|
||||||
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
|
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
|
||||||
@@ -781,7 +750,6 @@ impl IndexScheduler {
|
|||||||
let dump = dump::DumpWriter::new(*instance_uid)?;
|
let dump = dump::DumpWriter::new(*instance_uid)?;
|
||||||
|
|
||||||
// 1. dump the keys
|
// 1. dump the keys
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
|
|
||||||
let mut dump_keys = dump.create_keys()?;
|
let mut dump_keys = dump.create_keys()?;
|
||||||
for key in keys {
|
for key in keys {
|
||||||
dump_keys.push_key(key)?;
|
dump_keys.push_key(key)?;
|
||||||
@@ -791,13 +759,7 @@ impl IndexScheduler {
|
|||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
|
|
||||||
// 2. dump the tasks
|
// 2. dump the tasks
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
|
||||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||||
|
|
||||||
let (atomic, update_task_progress) =
|
|
||||||
AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32);
|
|
||||||
progress.update_progress(update_task_progress);
|
|
||||||
|
|
||||||
for ret in self.all_tasks.iter(&rtxn)? {
|
for ret in self.all_tasks.iter(&rtxn)? {
|
||||||
if self.must_stop_processing.get() {
|
if self.must_stop_processing.get() {
|
||||||
return Err(Error::AbortedTask);
|
return Err(Error::AbortedTask);
|
||||||
@@ -819,13 +781,6 @@ impl IndexScheduler {
|
|||||||
t.started_at = Some(started_at);
|
t.started_at = Some(started_at);
|
||||||
t.finished_at = Some(finished_at);
|
t.finished_at = Some(finished_at);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted.
|
|
||||||
// This prevent from referencing *future* batches not actually associated with the task.
|
|
||||||
//
|
|
||||||
// See <https://github.com/meilisearch/meilisearch/issues/5247> for details.
|
|
||||||
t.batch_uid = None;
|
|
||||||
|
|
||||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||||
|
|
||||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||||
@@ -836,37 +791,29 @@ impl IndexScheduler {
|
|||||||
if status == Status::Enqueued {
|
if status == Status::Enqueued {
|
||||||
let content_file = self.file_store.get_update(content_file)?;
|
let content_file = self.file_store.get_update(content_file)?;
|
||||||
|
|
||||||
for document in
|
let reader = DocumentsBatchReader::from_reader(content_file)
|
||||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
.map_err(|e| Error::from_milli(e.into(), None))?;
|
||||||
{
|
|
||||||
let document = document.map_err(|e| {
|
|
||||||
Error::from_milli(
|
|
||||||
milli::InternalError::SerdeJson(e).into(),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
dump_content_file.push_document(&document)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
let (mut cursor, documents_batch_index) =
|
||||||
|
reader.into_cursor_and_fields_index();
|
||||||
|
|
||||||
|
while let Some(doc) = cursor
|
||||||
|
.next_document()
|
||||||
|
.map_err(|e| Error::from_milli(e.into(), None))?
|
||||||
|
{
|
||||||
|
dump_content_file.push_document(
|
||||||
|
&obkv_to_object(doc, &documents_batch_index)
|
||||||
|
.map_err(|e| Error::from_milli(e, None))?,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
dump_content_file.flush()?;
|
dump_content_file.flush()?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
atomic.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
dump_tasks.flush()?;
|
dump_tasks.flush()?;
|
||||||
|
|
||||||
// 3. Dump the indexes
|
// 3. Dump the indexes
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
|
||||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
|
||||||
let mut count = 0;
|
|
||||||
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||||
progress.update_progress(VariableNameStep::new(
|
|
||||||
uid.to_string(),
|
|
||||||
count,
|
|
||||||
nb_indexes,
|
|
||||||
));
|
|
||||||
count += 1;
|
|
||||||
|
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let metadata = IndexMetadata {
|
let metadata = IndexMetadata {
|
||||||
uid: uid.to_owned(),
|
uid: uid.to_owned(),
|
||||||
@@ -886,12 +833,6 @@ impl IndexScheduler {
|
|||||||
.embedding_configs(&rtxn)
|
.embedding_configs(&rtxn)
|
||||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
|
|
||||||
let nb_documents = index
|
|
||||||
.number_of_documents(&rtxn)
|
|
||||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
|
||||||
as u32;
|
|
||||||
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
|
|
||||||
progress.update_progress(update_document_progress);
|
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_documents(&rtxn)
|
||||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
@@ -961,7 +902,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
index_dumper.push_document(&document)?;
|
index_dumper.push_document(&document)?;
|
||||||
atomic.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3.2. Dump the settings
|
// 3.2. Dump the settings
|
||||||
@@ -976,7 +916,6 @@ impl IndexScheduler {
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
// 4. Dump experimental feature settings
|
// 4. Dump experimental feature settings
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
|
||||||
let features = self.features().runtime_features();
|
let features = self.features().runtime_features();
|
||||||
dump.create_experimental_features(features)?;
|
dump.create_experimental_features(features)?;
|
||||||
|
|
||||||
@@ -987,7 +926,6 @@ impl IndexScheduler {
|
|||||||
if self.must_stop_processing.get() {
|
if self.must_stop_processing.get() {
|
||||||
return Err(Error::AbortedTask);
|
return Err(Error::AbortedTask);
|
||||||
}
|
}
|
||||||
progress.update_progress(DumpCreationProgress::CompressTheDump);
|
|
||||||
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
||||||
let file = File::create(path)?;
|
let file = File::create(path)?;
|
||||||
dump.persist_to(BufWriter::new(file))?;
|
dump.persist_to(BufWriter::new(file))?;
|
||||||
@@ -1013,7 +951,7 @@ impl IndexScheduler {
|
|||||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||||
|
|
||||||
let mut index_wtxn = index.write_txn()?;
|
let mut index_wtxn = index.write_txn()?;
|
||||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
|
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||||
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||||
@@ -1047,8 +985,6 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||||
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
|
||||||
|
|
||||||
let wtxn = self.env.write_txn()?;
|
let wtxn = self.env.write_txn()?;
|
||||||
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
||||||
return Err(Error::IndexAlreadyExists(index_uid));
|
return Err(Error::IndexAlreadyExists(index_uid));
|
||||||
@@ -1058,11 +994,9 @@ impl IndexScheduler {
|
|||||||
self.process_batch(
|
self.process_batch(
|
||||||
Batch::IndexUpdate { index_uid, primary_key, task },
|
Batch::IndexUpdate { index_uid, primary_key, task },
|
||||||
current_batch,
|
current_batch,
|
||||||
progress,
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
|
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
|
||||||
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
|
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
let index = self.index_mapper.index(&rtxn, &index_uid)?;
|
let index = self.index_mapper.index(&rtxn, &index_uid)?;
|
||||||
|
|
||||||
@@ -1115,7 +1049,6 @@ impl IndexScheduler {
|
|||||||
Ok(vec![task])
|
Ok(vec![task])
|
||||||
}
|
}
|
||||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||||
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
|
||||||
let wtxn = self.env.write_txn()?;
|
let wtxn = self.env.write_txn()?;
|
||||||
|
|
||||||
// it's possible that the index doesn't exist
|
// it's possible that the index doesn't exist
|
||||||
@@ -1149,8 +1082,6 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::IndexSwap { mut task } => {
|
Batch::IndexSwap { mut task } => {
|
||||||
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
|
||||||
|
|
||||||
let mut wtxn = self.env.write_txn()?;
|
let mut wtxn = self.env.write_txn()?;
|
||||||
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
|
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
|
||||||
swaps
|
swaps
|
||||||
@@ -1177,20 +1108,8 @@ impl IndexScheduler {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
|
for swap in swaps {
|
||||||
for (step, swap) in swaps.iter().enumerate() {
|
self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
|
||||||
progress.update_progress(VariableNameStep::new(
|
|
||||||
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
|
|
||||||
step as u32,
|
|
||||||
swaps.len() as u32,
|
|
||||||
));
|
|
||||||
self.apply_index_swap(
|
|
||||||
&mut wtxn,
|
|
||||||
&progress,
|
|
||||||
task.uid,
|
|
||||||
&swap.indexes.0,
|
|
||||||
&swap.indexes.1,
|
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
@@ -1200,15 +1119,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Swap the index `lhs` with the index `rhs`.
|
/// Swap the index `lhs` with the index `rhs`.
|
||||||
fn apply_index_swap(
|
fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
progress: &Progress,
|
|
||||||
task_id: u32,
|
|
||||||
lhs: &str,
|
|
||||||
rhs: &str,
|
|
||||||
) -> Result<()> {
|
|
||||||
progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
|
|
||||||
// 1. Verify that both lhs and rhs are existing indexes
|
// 1. Verify that both lhs and rhs are existing indexes
|
||||||
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
||||||
if !index_lhs_exists {
|
if !index_lhs_exists {
|
||||||
@@ -1226,21 +1137,14 @@ impl IndexScheduler {
|
|||||||
index_rhs_task_ids.remove_range(task_id..);
|
index_rhs_task_ids.remove_range(task_id..);
|
||||||
|
|
||||||
// 3. before_name -> new_name in the task's KindWithContent
|
// 3. before_name -> new_name in the task's KindWithContent
|
||||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
|
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
|
||||||
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
|
|
||||||
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
|
|
||||||
progress.update_progress(task_progress);
|
|
||||||
|
|
||||||
for task_id in tasks_to_update {
|
|
||||||
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||||
self.all_tasks.put(wtxn, &task_id, &task)?;
|
self.all_tasks.put(wtxn, &task_id, &task)?;
|
||||||
atomic.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. remove the task from indexuid = before_name
|
// 4. remove the task from indexuid = before_name
|
||||||
// 5. add the task to indexuid = after_name
|
// 5. add the task to indexuid = after_name
|
||||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
|
|
||||||
self.update_index(wtxn, lhs, |lhs_tasks| {
|
self.update_index(wtxn, lhs, |lhs_tasks| {
|
||||||
*lhs_tasks -= &index_lhs_task_ids;
|
*lhs_tasks -= &index_lhs_task_ids;
|
||||||
*lhs_tasks |= &index_rhs_task_ids;
|
*lhs_tasks |= &index_rhs_task_ids;
|
||||||
@@ -1262,7 +1166,7 @@ impl IndexScheduler {
|
|||||||
/// The list of processed tasks.
|
/// The list of processed tasks.
|
||||||
#[tracing::instrument(
|
#[tracing::instrument(
|
||||||
level = "trace",
|
level = "trace",
|
||||||
skip(self, index_wtxn, index, progress),
|
skip(self, index_wtxn, index),
|
||||||
target = "indexing::scheduler"
|
target = "indexing::scheduler"
|
||||||
)]
|
)]
|
||||||
fn apply_index_operation<'i>(
|
fn apply_index_operation<'i>(
|
||||||
@@ -1270,12 +1174,44 @@ impl IndexScheduler {
|
|||||||
index_wtxn: &mut RwTxn<'i>,
|
index_wtxn: &mut RwTxn<'i>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
progress: Progress,
|
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
|
|
||||||
let started_processing_at = std::time::Instant::now();
|
let started_processing_at = std::time::Instant::now();
|
||||||
|
let secs_since_started_processing_at = AtomicU64::new(0);
|
||||||
|
const PRINT_SECS_DELTA: u64 = 5;
|
||||||
|
|
||||||
|
let processing_tasks = self.processing_tasks.clone();
|
||||||
let must_stop_processing = self.must_stop_processing.clone();
|
let must_stop_processing = self.must_stop_processing.clone();
|
||||||
|
let send_progress = |progress| {
|
||||||
|
let now = std::time::Instant::now();
|
||||||
|
let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed);
|
||||||
|
let previous = started_processing_at + Duration::from_secs(elapsed);
|
||||||
|
let elapsed = now - previous;
|
||||||
|
|
||||||
|
if elapsed.as_secs() < PRINT_SECS_DELTA {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
secs_since_started_processing_at
|
||||||
|
.store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed);
|
||||||
|
|
||||||
|
let TaskProgress {
|
||||||
|
current_step,
|
||||||
|
finished_steps,
|
||||||
|
total_steps,
|
||||||
|
finished_substeps,
|
||||||
|
total_substeps,
|
||||||
|
} = processing_tasks.write().unwrap().update_progress(progress);
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
current_step,
|
||||||
|
finished_steps,
|
||||||
|
total_steps,
|
||||||
|
finished_substeps,
|
||||||
|
total_substeps
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
match operation {
|
match operation {
|
||||||
IndexOperation::DocumentClear { index_uid, mut tasks } => {
|
IndexOperation::DocumentClear { index_uid, mut tasks } => {
|
||||||
@@ -1307,7 +1243,6 @@ impl IndexScheduler {
|
|||||||
operations,
|
operations,
|
||||||
mut tasks,
|
mut tasks,
|
||||||
} => {
|
} => {
|
||||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
|
||||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||||
// to a fresh thread.
|
// to a fresh thread.
|
||||||
@@ -1316,7 +1251,9 @@ impl IndexScheduler {
|
|||||||
if let DocumentOperation::Add(content_uuid) = operation {
|
if let DocumentOperation::Add(content_uuid) = operation {
|
||||||
let content_file = self.file_store.get_update(*content_uuid)?;
|
let content_file = self.file_store.get_update(*content_uuid)?;
|
||||||
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
|
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
|
||||||
content_files.push(mmap);
|
if !mmap.is_empty() {
|
||||||
|
content_files.push(mmap);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1361,7 +1298,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
|
|
||||||
let (document_changes, operation_stats, primary_key) = indexer
|
let (document_changes, operation_stats, primary_key) = indexer
|
||||||
.into_changes(
|
.into_changes(
|
||||||
&indexer_alloc,
|
&indexer_alloc,
|
||||||
@@ -1370,13 +1306,13 @@ impl IndexScheduler {
|
|||||||
primary_key.as_deref(),
|
primary_key.as_deref(),
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
progress.clone(),
|
&send_progress,
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
let mut candidates_count = 0;
|
let mut addition = 0;
|
||||||
for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
|
for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
|
||||||
candidates_count += stats.document_count;
|
addition += stats.document_count;
|
||||||
match stats.error {
|
match stats.error {
|
||||||
Some(error) => {
|
Some(error) => {
|
||||||
task.status = Status::Failed;
|
task.status = Status::Failed;
|
||||||
@@ -1406,7 +1342,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
progress.update_progress(DocumentOperationProgress::Indexing);
|
|
||||||
if tasks.iter().any(|res| res.error.is_none()) {
|
if tasks.iter().any(|res| res.error.is_none()) {
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
@@ -1419,25 +1354,16 @@ impl IndexScheduler {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
&progress,
|
&send_progress,
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
let addition = DocumentAdditionResult {
|
|
||||||
indexed_documents: candidates_count,
|
|
||||||
number_of_documents: index
|
|
||||||
.number_of_documents(index_wtxn)
|
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
|
||||||
};
|
|
||||||
|
|
||||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
||||||
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
|
|
||||||
|
|
||||||
let (filter, code) = if let KindWithContent::DocumentEdition {
|
let (filter, code) = if let KindWithContent::DocumentEdition {
|
||||||
filter_expr,
|
filter_expr,
|
||||||
context: _,
|
context: _,
|
||||||
@@ -1510,8 +1436,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let candidates_count = candidates.len();
|
|
||||||
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
|
|
||||||
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||||
let document_changes = pool
|
let document_changes = pool
|
||||||
.install(|| {
|
.install(|| {
|
||||||
@@ -1525,7 +1449,6 @@ impl IndexScheduler {
|
|||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
@@ -1537,18 +1460,11 @@ impl IndexScheduler {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
&progress,
|
&send_progress,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
let addition = DocumentAdditionResult {
|
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||||
indexed_documents: candidates_count,
|
|
||||||
number_of_documents: index
|
|
||||||
.number_of_documents(index_wtxn)
|
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
|
||||||
};
|
|
||||||
|
|
||||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match result_count {
|
match result_count {
|
||||||
@@ -1578,8 +1494,6 @@ impl IndexScheduler {
|
|||||||
Ok(vec![task])
|
Ok(vec![task])
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
||||||
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
|
|
||||||
|
|
||||||
let mut to_delete = RoaringBitmap::new();
|
let mut to_delete = RoaringBitmap::new();
|
||||||
let external_documents_ids = index.external_documents_ids();
|
let external_documents_ids = index.external_documents_ids();
|
||||||
|
|
||||||
@@ -1670,9 +1584,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
|
|
||||||
let mut indexer = indexer::DocumentDeletion::new();
|
let mut indexer = indexer::DocumentDeletion::new();
|
||||||
let candidates_count = to_delete.len();
|
|
||||||
indexer.delete_documents_by_docids(to_delete);
|
indexer.delete_documents_by_docids(to_delete);
|
||||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||||
let embedders = index
|
let embedders = index
|
||||||
@@ -1680,7 +1592,6 @@ impl IndexScheduler {
|
|||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
@@ -1692,24 +1603,16 @@ impl IndexScheduler {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
&progress,
|
&send_progress,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
let addition = DocumentAdditionResult {
|
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||||
indexed_documents: candidates_count,
|
|
||||||
number_of_documents: index
|
|
||||||
.number_of_documents(index_wtxn)
|
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
|
||||||
};
|
|
||||||
|
|
||||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
||||||
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
|
|
||||||
let indexer_config = self.index_mapper.indexer_config();
|
let indexer_config = self.index_mapper.indexer_config();
|
||||||
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||||
|
|
||||||
@@ -1723,7 +1626,6 @@ impl IndexScheduler {
|
|||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
}
|
}
|
||||||
|
|
||||||
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
|
||||||
builder
|
builder
|
||||||
.execute(
|
.execute(
|
||||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||||
@@ -1746,14 +1648,12 @@ impl IndexScheduler {
|
|||||||
index_uid: index_uid.clone(),
|
index_uid: index_uid.clone(),
|
||||||
tasks: cleared_tasks,
|
tasks: cleared_tasks,
|
||||||
},
|
},
|
||||||
progress.clone(),
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let settings_tasks = self.apply_index_operation(
|
let settings_tasks = self.apply_index_operation(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||||
progress,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut tasks = settings_tasks;
|
let mut tasks = settings_tasks;
|
||||||
@@ -1770,18 +1670,15 @@ impl IndexScheduler {
|
|||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
matched_tasks: &RoaringBitmap,
|
matched_tasks: &RoaringBitmap,
|
||||||
progress: &Progress,
|
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
|
|
||||||
|
|
||||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||||
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
||||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||||
|
|
||||||
let all_task_ids = self.all_task_ids(wtxn)?;
|
let all_task_ids = self.all_task_ids(wtxn)?;
|
||||||
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
||||||
to_delete_tasks -= &**processing_tasks;
|
to_delete_tasks -= processing_tasks;
|
||||||
to_delete_tasks -= &enqueued_tasks;
|
to_delete_tasks -= enqueued_tasks;
|
||||||
|
|
||||||
// 2. We now have a list of tasks to delete, delete them
|
// 2. We now have a list of tasks to delete, delete them
|
||||||
|
|
||||||
@@ -1792,8 +1689,6 @@ impl IndexScheduler {
|
|||||||
// The tasks that have been removed *per batches*.
|
// The tasks that have been removed *per batches*.
|
||||||
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
||||||
|
|
||||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
|
||||||
progress.update_progress(task_progress);
|
|
||||||
for task_id in to_delete_tasks.iter() {
|
for task_id in to_delete_tasks.iter() {
|
||||||
let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
|
|
||||||
@@ -1817,35 +1712,22 @@ impl IndexScheduler {
|
|||||||
if let Some(batch_uid) = task.batch_uid {
|
if let Some(batch_uid) = task.batch_uid {
|
||||||
affected_batches.entry(batch_uid).or_default().insert(task_id);
|
affected_batches.entry(batch_uid).or_default().insert(task_id);
|
||||||
}
|
}
|
||||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
|
|
||||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(
|
|
||||||
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
|
|
||||||
);
|
|
||||||
progress.update_progress(task_progress);
|
|
||||||
for index in affected_indexes.iter() {
|
for index in affected_indexes.iter() {
|
||||||
self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
|
self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for status in affected_statuses.iter() {
|
for status in affected_statuses.iter() {
|
||||||
self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
|
self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for kind in affected_kinds.iter() {
|
for kind in affected_kinds.iter() {
|
||||||
self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
|
self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
progress.update_progress(TaskDeletionProgress::DeletingTasks);
|
|
||||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
|
||||||
progress.update_progress(task_progress);
|
|
||||||
for task in to_delete_tasks.iter() {
|
for task in to_delete_tasks.iter() {
|
||||||
self.all_tasks.delete(wtxn, &task)?;
|
self.all_tasks.delete(wtxn, &task)?;
|
||||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
for canceled_by in affected_canceled_by {
|
for canceled_by in affected_canceled_by {
|
||||||
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
|
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
|
||||||
@@ -1857,9 +1739,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
progress.update_progress(TaskDeletionProgress::DeletingBatches);
|
|
||||||
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
|
|
||||||
progress.update_progress(batch_progress);
|
|
||||||
for (batch_id, to_delete_tasks) in affected_batches {
|
for (batch_id, to_delete_tasks) in affected_batches {
|
||||||
if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
||||||
tasks -= &to_delete_tasks;
|
tasks -= &to_delete_tasks;
|
||||||
@@ -1901,7 +1780,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(to_delete_tasks)
|
Ok(to_delete_tasks)
|
||||||
@@ -1916,36 +1794,21 @@ impl IndexScheduler {
|
|||||||
cancel_task_id: TaskId,
|
cancel_task_id: TaskId,
|
||||||
current_batch: &mut ProcessingBatch,
|
current_batch: &mut ProcessingBatch,
|
||||||
matched_tasks: &RoaringBitmap,
|
matched_tasks: &RoaringBitmap,
|
||||||
progress: &Progress,
|
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
progress.update_progress(TaskCancelationProgress::RetrievingTasks);
|
|
||||||
|
|
||||||
// 1. Remove from this list the tasks that we are not allowed to cancel
|
// 1. Remove from this list the tasks that we are not allowed to cancel
|
||||||
// Notice that only the _enqueued_ ones are cancelable and we should
|
// Notice that only the _enqueued_ ones are cancelable and we should
|
||||||
// have already aborted the indexation of the _processing_ ones
|
// have already aborted the indexation of the _processing_ ones
|
||||||
let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?;
|
let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?;
|
||||||
let tasks_to_cancel = cancelable_tasks & matched_tasks;
|
let tasks_to_cancel = cancelable_tasks & matched_tasks;
|
||||||
|
|
||||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
|
||||||
progress.update_progress(progress_obj);
|
|
||||||
|
|
||||||
// 2. We now have a list of tasks to cancel, cancel them
|
// 2. We now have a list of tasks to cancel, cancel them
|
||||||
let mut tasks = self.get_existing_tasks(
|
let mut tasks = self.get_existing_tasks(rtxn, tasks_to_cancel.iter())?;
|
||||||
rtxn,
|
|
||||||
tasks_to_cancel.iter().inspect(|_| {
|
|
||||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
|
|
||||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
|
||||||
progress.update_progress(progress_obj);
|
|
||||||
for task in tasks.iter_mut() {
|
for task in tasks.iter_mut() {
|
||||||
task.status = Status::Canceled;
|
task.status = Status::Canceled;
|
||||||
task.canceled_by = Some(cancel_task_id);
|
task.canceled_by = Some(cancel_task_id);
|
||||||
task.details = task.details.as_ref().map(|d| d.to_failed());
|
task.details = task.details.as_ref().map(|d| d.to_failed());
|
||||||
current_batch.processing(Some(task));
|
current_batch.processing(Some(task));
|
||||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
|
|||||||
@@ -1,13 +1,12 @@
|
|||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use crate::TaskId;
|
||||||
use meilisearch_types::batches::BatchId;
|
use meilisearch_types::batches::BatchId;
|
||||||
use meilisearch_types::error::{Code, ErrorCode};
|
use meilisearch_types::error::{Code, ErrorCode};
|
||||||
use meilisearch_types::tasks::{Kind, Status};
|
use meilisearch_types::tasks::{Kind, Status};
|
||||||
use meilisearch_types::{heed, milli};
|
use meilisearch_types::{heed, milli};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::TaskId;
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum DateField {
|
pub enum DateField {
|
||||||
BeforeEnqueuedAt,
|
BeforeEnqueuedAt,
|
||||||
@@ -104,7 +103,7 @@ pub enum Error {
|
|||||||
)]
|
)]
|
||||||
InvalidTaskCanceledBy { canceled_by: String },
|
InvalidTaskCanceledBy { canceled_by: String },
|
||||||
#[error(
|
#[error(
|
||||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
|
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
|
||||||
)]
|
)]
|
||||||
InvalidIndexUid { index_uid: String },
|
InvalidIndexUid { index_uid: String },
|
||||||
#[error("Task `{0}` not found.")]
|
#[error("Task `{0}` not found.")]
|
||||||
|
|||||||
@@ -3,6 +3,10 @@ use std::sync::{Arc, RwLock};
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::{fs, thread};
|
use std::{fs, thread};
|
||||||
|
|
||||||
|
use self::index_map::IndexMap;
|
||||||
|
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||||
|
use crate::uuid_codec::UuidCodec;
|
||||||
|
use crate::{Error, Result};
|
||||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
@@ -13,11 +17,6 @@ use time::OffsetDateTime;
|
|||||||
use tracing::error;
|
use tracing::error;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use self::index_map::IndexMap;
|
|
||||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
|
||||||
use crate::uuid_codec::UuidCodec;
|
|
||||||
use crate::{Error, Result};
|
|
||||||
|
|
||||||
mod index_map;
|
mod index_map;
|
||||||
|
|
||||||
const INDEX_MAPPING: &str = "index-mapping";
|
const INDEX_MAPPING: &str = "index-mapping";
|
||||||
|
|||||||
@@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
|||||||
|
|
||||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||||
let mut snap = String::new();
|
let mut snap = String::new();
|
||||||
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
|
let Batch { uid, details, stats, started_at, finished_at } = batch;
|
||||||
if let Some(finished_at) = finished_at {
|
if let Some(finished_at) = finished_at {
|
||||||
assert!(finished_at > started_at);
|
assert!(finished_at > started_at);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ mod index_mapper;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod insta_snapshot;
|
mod insta_snapshot;
|
||||||
mod lru;
|
mod lru;
|
||||||
mod processing;
|
|
||||||
mod utils;
|
mod utils;
|
||||||
pub mod uuid_codec;
|
pub mod uuid_codec;
|
||||||
|
|
||||||
@@ -55,13 +54,14 @@ use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFea
|
|||||||
use meilisearch_types::heed::byteorder::BE;
|
use meilisearch_types::heed::byteorder::BE;
|
||||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||||
|
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||||
|
use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
use meilisearch_types::task_view::TaskView;
|
use meilisearch_types::task_view::TaskView;
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress};
|
||||||
use processing::ProcessingTasks;
|
|
||||||
use rayon::current_num_threads;
|
use rayon::current_num_threads;
|
||||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@@ -72,8 +72,7 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::index_mapper::IndexMapper;
|
use crate::index_mapper::IndexMapper;
|
||||||
use crate::processing::{AtomicTaskStep, BatchProgress};
|
use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch};
|
||||||
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
|
|
||||||
|
|
||||||
pub(crate) type BEI128 = I128<BE>;
|
pub(crate) type BEI128 = I128<BE>;
|
||||||
|
|
||||||
@@ -164,6 +163,48 @@ impl Query {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ProcessingTasks {
|
||||||
|
batch: Option<ProcessingBatch>,
|
||||||
|
/// The list of tasks ids that are currently running.
|
||||||
|
processing: RoaringBitmap,
|
||||||
|
/// The progress on processing tasks
|
||||||
|
progress: Option<TaskProgress>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ProcessingTasks {
|
||||||
|
/// Creates an empty `ProcessingAt` struct.
|
||||||
|
fn new() -> ProcessingTasks {
|
||||||
|
ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stores the currently processing tasks, and the date time at which it started.
|
||||||
|
fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) {
|
||||||
|
self.batch = Some(processing_batch);
|
||||||
|
self.processing = processing;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_progress(&mut self, progress: Progress) -> TaskProgress {
|
||||||
|
self.progress.get_or_insert_with(TaskProgress::default).update(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the processing tasks to an empty list
|
||||||
|
fn stop_processing(&mut self) -> Self {
|
||||||
|
self.progress = None;
|
||||||
|
|
||||||
|
Self {
|
||||||
|
batch: std::mem::take(&mut self.batch),
|
||||||
|
processing: std::mem::take(&mut self.processing),
|
||||||
|
progress: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||||
|
fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
||||||
|
!self.processing.is_disjoint(canceled_tasks)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone, Debug)]
|
#[derive(Default, Clone, Debug)]
|
||||||
struct MustStopProcessing(Arc<AtomicBool>);
|
struct MustStopProcessing(Arc<AtomicBool>);
|
||||||
|
|
||||||
@@ -772,7 +813,7 @@ impl IndexScheduler {
|
|||||||
let mut batch_tasks = RoaringBitmap::new();
|
let mut batch_tasks = RoaringBitmap::new();
|
||||||
for batch_uid in batch_uids {
|
for batch_uid in batch_uids {
|
||||||
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
||||||
batch_tasks |= &*processing_tasks;
|
batch_tasks |= &processing_tasks;
|
||||||
} else {
|
} else {
|
||||||
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
||||||
}
|
}
|
||||||
@@ -786,13 +827,13 @@ impl IndexScheduler {
|
|||||||
match status {
|
match status {
|
||||||
// special case for Processing tasks
|
// special case for Processing tasks
|
||||||
Status::Processing => {
|
Status::Processing => {
|
||||||
status_tasks |= &*processing_tasks;
|
status_tasks |= &processing_tasks;
|
||||||
}
|
}
|
||||||
status => status_tasks |= &self.get_status(rtxn, *status)?,
|
status => status_tasks |= &self.get_status(rtxn, *status)?,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
if !status.contains(&Status::Processing) {
|
if !status.contains(&Status::Processing) {
|
||||||
tasks -= &*processing_tasks;
|
tasks -= &processing_tasks;
|
||||||
}
|
}
|
||||||
tasks &= status_tasks;
|
tasks &= status_tasks;
|
||||||
}
|
}
|
||||||
@@ -841,7 +882,7 @@ impl IndexScheduler {
|
|||||||
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
||||||
tasks = {
|
tasks = {
|
||||||
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
||||||
(&tasks - &*processing_tasks, &tasks & &*processing_tasks);
|
(&tasks - &processing_tasks, &tasks & &processing_tasks);
|
||||||
|
|
||||||
// special case for Processing tasks
|
// special case for Processing tasks
|
||||||
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
||||||
@@ -1049,7 +1090,7 @@ impl IndexScheduler {
|
|||||||
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
||||||
batches = {
|
batches = {
|
||||||
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
||||||
(&batches - &*processing.processing, &batches & &*processing.processing);
|
(&batches - &processing.processing, &batches & &processing.processing);
|
||||||
|
|
||||||
// special case for Processing batches
|
// special case for Processing batches
|
||||||
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
||||||
@@ -1565,8 +1606,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
||||||
self.must_stop_processing.reset();
|
self.must_stop_processing.reset();
|
||||||
let progress = self
|
self.processing_tasks
|
||||||
.processing_tasks
|
|
||||||
.write()
|
.write()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
||||||
@@ -1579,12 +1619,11 @@ impl IndexScheduler {
|
|||||||
let res = {
|
let res = {
|
||||||
let cloned_index_scheduler = self.private_clone();
|
let cloned_index_scheduler = self.private_clone();
|
||||||
let processing_batch = &mut processing_batch;
|
let processing_batch = &mut processing_batch;
|
||||||
let progress = progress.clone();
|
|
||||||
std::thread::scope(|s| {
|
std::thread::scope(|s| {
|
||||||
let handle = std::thread::Builder::new()
|
let handle = std::thread::Builder::new()
|
||||||
.name(String::from("batch-operation"))
|
.name(String::from("batch-operation"))
|
||||||
.spawn_scoped(s, move || {
|
.spawn_scoped(s, move || {
|
||||||
cloned_index_scheduler.process_batch(batch, processing_batch, progress)
|
cloned_index_scheduler.process_batch(batch, processing_batch)
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||||
@@ -1597,7 +1636,6 @@ impl IndexScheduler {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||||
|
|
||||||
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
|
||||||
processing_batch.finished();
|
processing_batch.finished();
|
||||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||||
let mut canceled = RoaringBitmap::new();
|
let mut canceled = RoaringBitmap::new();
|
||||||
@@ -1607,15 +1645,12 @@ impl IndexScheduler {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.breakpoint(Breakpoint::ProcessBatchSucceeded);
|
self.breakpoint(Breakpoint::ProcessBatchSucceeded);
|
||||||
|
|
||||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
|
||||||
progress.update_progress(task_progress_obj);
|
|
||||||
let mut success = 0;
|
let mut success = 0;
|
||||||
let mut failure = 0;
|
let mut failure = 0;
|
||||||
let mut canceled_by = None;
|
let mut canceled_by = None;
|
||||||
|
|
||||||
#[allow(unused_variables)]
|
#[allow(unused_variables)]
|
||||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
processing_batch.update(&mut task);
|
processing_batch.update(&mut task);
|
||||||
if task.status == Status::Canceled {
|
if task.status == Status::Canceled {
|
||||||
canceled.insert(task.uid);
|
canceled.insert(task.uid);
|
||||||
@@ -1683,12 +1718,8 @@ impl IndexScheduler {
|
|||||||
Err(err) => {
|
Err(err) => {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.breakpoint(Breakpoint::ProcessBatchFailed);
|
self.breakpoint(Breakpoint::ProcessBatchFailed);
|
||||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
|
||||||
progress.update_progress(task_progress_obj);
|
|
||||||
|
|
||||||
let error: ResponseError = err.into();
|
let error: ResponseError = err.into();
|
||||||
for id in ids.iter() {
|
for id in ids.iter() {
|
||||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
|
||||||
let mut task = self
|
let mut task = self
|
||||||
.get_task(&wtxn, id)
|
.get_task(&wtxn, id)
|
||||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
|
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
|
||||||
@@ -2016,19 +2047,14 @@ impl<'a> Dump<'a> {
|
|||||||
task: TaskDump,
|
task: TaskDump,
|
||||||
content_file: Option<Box<UpdateFile>>,
|
content_file: Option<Box<UpdateFile>>,
|
||||||
) -> Result<Task> {
|
) -> Result<Task> {
|
||||||
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
|
|
||||||
|
|
||||||
let content_uuid = match content_file {
|
let content_uuid = match content_file {
|
||||||
Some(content_file) if task.status == Status::Enqueued => {
|
Some(content_file) if task.status == Status::Enqueued => {
|
||||||
let (uuid, file) = self.index_scheduler.create_update_file(false)?;
|
let (uuid, mut file) = self.index_scheduler.create_update_file(false)?;
|
||||||
let mut writer = io::BufWriter::new(file);
|
let mut builder = DocumentsBatchBuilder::new(&mut file);
|
||||||
for doc in content_file {
|
for doc in content_file {
|
||||||
let doc = doc?;
|
builder.append_json_object(&doc?)?;
|
||||||
serde_json::to_writer(&mut writer, &doc).map_err(|e| {
|
|
||||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
let file = writer.into_inner().map_err(|e| e.into_error())?;
|
builder.into_inner()?;
|
||||||
file.persist()?;
|
file.persist()?;
|
||||||
|
|
||||||
Some(uuid)
|
Some(uuid)
|
||||||
@@ -2036,12 +2062,6 @@ impl<'a> Dump<'a> {
|
|||||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||||
// in case we try to open it later.
|
// in case we try to open it later.
|
||||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||||
None if task.status == Status::Enqueued && task_has_no_docs => {
|
|
||||||
let (uuid, file) = self.index_scheduler.create_update_file(false)?;
|
|
||||||
file.persist()?;
|
|
||||||
|
|
||||||
Some(uuid)
|
|
||||||
}
|
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,316 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use enum_iterator::Sequence;
|
|
||||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
|
|
||||||
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use crate::utils::ProcessingBatch;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct ProcessingTasks {
|
|
||||||
pub batch: Option<Arc<ProcessingBatch>>,
|
|
||||||
/// The list of tasks ids that are currently running.
|
|
||||||
pub processing: Arc<RoaringBitmap>,
|
|
||||||
/// The progress on processing tasks
|
|
||||||
pub progress: Option<Progress>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ProcessingTasks {
|
|
||||||
/// Creates an empty `ProcessingAt` struct.
|
|
||||||
pub fn new() -> ProcessingTasks {
|
|
||||||
ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_progress_view(&self) -> Option<ProgressView> {
|
|
||||||
Some(self.progress.as_ref()?.as_progress_view())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Stores the currently processing tasks, and the date time at which it started.
|
|
||||||
pub fn start_processing(
|
|
||||||
&mut self,
|
|
||||||
processing_batch: ProcessingBatch,
|
|
||||||
processing: RoaringBitmap,
|
|
||||||
) -> Progress {
|
|
||||||
self.batch = Some(Arc::new(processing_batch));
|
|
||||||
self.processing = Arc::new(processing);
|
|
||||||
let progress = Progress::default();
|
|
||||||
progress.update_progress(BatchProgress::ProcessingTasks);
|
|
||||||
self.progress = Some(progress.clone());
|
|
||||||
|
|
||||||
progress
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the processing tasks to an empty list
|
|
||||||
pub fn stop_processing(&mut self) -> Self {
|
|
||||||
self.progress = None;
|
|
||||||
|
|
||||||
Self {
|
|
||||||
batch: std::mem::take(&mut self.batch),
|
|
||||||
processing: std::mem::take(&mut self.processing),
|
|
||||||
progress: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
|
||||||
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
|
||||||
!self.processing.is_disjoint(canceled_tasks)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum BatchProgress {
|
|
||||||
ProcessingTasks,
|
|
||||||
WritingTasksToDisk,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum TaskCancelationProgress {
|
|
||||||
RetrievingTasks,
|
|
||||||
UpdatingTasks,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum TaskDeletionProgress {
|
|
||||||
DeletingTasksDateTime,
|
|
||||||
DeletingTasksMetadata,
|
|
||||||
DeletingTasks,
|
|
||||||
DeletingBatches,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum SnapshotCreationProgress {
|
|
||||||
StartTheSnapshotCreation,
|
|
||||||
SnapshotTheIndexScheduler,
|
|
||||||
SnapshotTheUpdateFiles,
|
|
||||||
SnapshotTheIndexes,
|
|
||||||
SnapshotTheApiKeys,
|
|
||||||
CreateTheTarball,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum DumpCreationProgress {
|
|
||||||
StartTheDumpCreation,
|
|
||||||
DumpTheApiKeys,
|
|
||||||
DumpTheTasks,
|
|
||||||
DumpTheIndexes,
|
|
||||||
DumpTheExperimentalFeatures,
|
|
||||||
CompressTheDump,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum CreateIndexProgress {
|
|
||||||
CreatingTheIndex,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum UpdateIndexProgress {
|
|
||||||
UpdatingTheIndex,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum DeleteIndexProgress {
|
|
||||||
DeletingTheIndex,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum SwappingTheIndexes {
|
|
||||||
EnsuringCorrectnessOfTheSwap,
|
|
||||||
SwappingTheIndexes,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum InnerSwappingTwoIndexes {
|
|
||||||
RetrieveTheTasks,
|
|
||||||
UpdateTheTasks,
|
|
||||||
UpdateTheIndexesMetadata,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum DocumentOperationProgress {
|
|
||||||
RetrievingConfig,
|
|
||||||
ComputingDocumentChanges,
|
|
||||||
Indexing,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum DocumentEditionProgress {
|
|
||||||
RetrievingConfig,
|
|
||||||
ComputingDocumentChanges,
|
|
||||||
Indexing,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum DocumentDeletionProgress {
|
|
||||||
RetrievingConfig,
|
|
||||||
DeleteDocuments,
|
|
||||||
Indexing,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_enum_progress! {
|
|
||||||
pub enum SettingsProgress {
|
|
||||||
RetrievingAndMergingTheSettings,
|
|
||||||
ApplyTheSettings,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
|
||||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
|
||||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
|
||||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
|
||||||
|
|
||||||
pub struct VariableNameStep {
|
|
||||||
name: String,
|
|
||||||
current: u32,
|
|
||||||
total: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl VariableNameStep {
|
|
||||||
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
|
||||||
Self { name: name.into(), current, total }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Step for VariableNameStep {
|
|
||||||
fn name(&self) -> Cow<'static, str> {
|
|
||||||
self.name.clone().into()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn current(&self) -> u32 {
|
|
||||||
self.current
|
|
||||||
}
|
|
||||||
|
|
||||||
fn total(&self) -> u32 {
|
|
||||||
self.total
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use std::sync::atomic::Ordering;
|
|
||||||
|
|
||||||
use meili_snap::{json_string, snapshot};
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn one_level() {
|
|
||||||
let mut processing = ProcessingTasks::new();
|
|
||||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
|
||||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
|
||||||
{
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"currentStep": "processing tasks",
|
|
||||||
"finished": 0,
|
|
||||||
"total": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"percentage": 0.0
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
|
||||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
|
||||||
{
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"currentStep": "writing tasks to disk",
|
|
||||||
"finished": 1,
|
|
||||||
"total": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"percentage": 50.0
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn task_progress() {
|
|
||||||
let mut processing = ProcessingTasks::new();
|
|
||||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
|
||||||
let (atomic, tasks) = AtomicTaskStep::new(10);
|
|
||||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
|
||||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
|
||||||
{
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"currentStep": "processing tasks",
|
|
||||||
"finished": 0,
|
|
||||||
"total": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"currentStep": "task",
|
|
||||||
"finished": 0,
|
|
||||||
"total": 10
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"percentage": 0.0
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
atomic.fetch_add(6, Ordering::Relaxed);
|
|
||||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
|
||||||
{
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"currentStep": "processing tasks",
|
|
||||||
"finished": 0,
|
|
||||||
"total": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"currentStep": "task",
|
|
||||||
"finished": 6,
|
|
||||||
"total": 10
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"percentage": 30.000002
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
|
||||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
|
||||||
{
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"currentStep": "writing tasks to disk",
|
|
||||||
"finished": 1,
|
|
||||||
"total": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"percentage": 50.0
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
let (atomic, tasks) = AtomicTaskStep::new(5);
|
|
||||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
|
||||||
atomic.fetch_add(4, Ordering::Relaxed);
|
|
||||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
|
||||||
{
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"currentStep": "writing tasks to disk",
|
|
||||||
"finished": 1,
|
|
||||||
"total": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"currentStep": "task",
|
|
||||||
"finished": 4,
|
|
||||||
"total": 5
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"percentage": 90.0
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -134,7 +134,6 @@ impl ProcessingBatch {
|
|||||||
pub fn to_batch(&self) -> Batch {
|
pub fn to_batch(&self) -> Batch {
|
||||||
Batch {
|
Batch {
|
||||||
uid: self.uid,
|
uid: self.uid,
|
||||||
progress: None,
|
|
||||||
details: self.details.clone(),
|
details: self.details.clone(),
|
||||||
stats: self.stats.clone(),
|
stats: self.stats.clone(),
|
||||||
started_at: self.started_at,
|
started_at: self.started_at,
|
||||||
@@ -188,7 +187,6 @@ impl IndexScheduler {
|
|||||||
&batch.uid,
|
&batch.uid,
|
||||||
&Batch {
|
&Batch {
|
||||||
uid: batch.uid,
|
uid: batch.uid,
|
||||||
progress: None,
|
|
||||||
details: batch.details,
|
details: batch.details,
|
||||||
stats: batch.stats,
|
stats: batch.stats,
|
||||||
started_at: batch.started_at,
|
started_at: batch.started_at,
|
||||||
@@ -275,9 +273,7 @@ impl IndexScheduler {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|batch_id| {
|
.map(|batch_id| {
|
||||||
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
||||||
let mut batch = processing.batch.as_ref().unwrap().to_batch();
|
Ok(processing.batch.as_ref().unwrap().to_batch())
|
||||||
batch.progress = processing.get_progress_view();
|
|
||||||
Ok(batch)
|
|
||||||
} else {
|
} else {
|
||||||
self.get_batch(rtxn, batch_id)
|
self.get_batch(rtxn, batch_id)
|
||||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||||
@@ -291,10 +287,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
debug_assert!(old_task != *task);
|
debug_assert!(old_task != *task);
|
||||||
debug_assert_eq!(old_task.uid, task.uid);
|
debug_assert_eq!(old_task.uid, task.uid);
|
||||||
debug_assert!(
|
debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some());
|
||||||
old_task.batch_uid.is_none() && task.batch_uid.is_some(),
|
|
||||||
"\n==> old: {old_task:?}\n==> new: {task:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
if old_task.status != task.status {
|
if old_task.status != task.status {
|
||||||
self.update_status(wtxn, old_task.status, |bitmap| {
|
self.update_status(wtxn, old_task.status, |bitmap| {
|
||||||
|
|||||||
@@ -24,9 +24,8 @@ flate2 = "1.0.30"
|
|||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
memmap2 = "0.9.4"
|
memmap2 = "0.9.4"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
bumparaw-collections = "0.1.2"
|
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||||
roaring = { version = "0.10.7", features = ["serde"] }
|
roaring = { version = "0.10.7", features = ["serde"] }
|
||||||
rustc-hash = "2.1.0"
|
|
||||||
serde = { version = "1.0.204", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde-cs = "0.2.4"
|
serde-cs = "0.2.4"
|
||||||
serde_json = "1.0.120"
|
serde_json = "1.0.120"
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
use milli::progress::ProgressView;
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
|
|
||||||
use crate::batches::{Batch, BatchId, BatchStats};
|
use crate::{
|
||||||
use crate::task_view::DetailsView;
|
batches::{Batch, BatchId, BatchStats},
|
||||||
use crate::tasks::serialize_duration;
|
task_view::DetailsView,
|
||||||
|
tasks::serialize_duration,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct BatchView {
|
pub struct BatchView {
|
||||||
pub uid: BatchId,
|
pub uid: BatchId,
|
||||||
pub progress: Option<ProgressView>,
|
|
||||||
pub details: DetailsView,
|
pub details: DetailsView,
|
||||||
pub stats: BatchStats,
|
pub stats: BatchStats,
|
||||||
#[serde(serialize_with = "serialize_duration", default)]
|
#[serde(serialize_with = "serialize_duration", default)]
|
||||||
@@ -25,7 +25,6 @@ impl BatchView {
|
|||||||
pub fn from_batch(batch: &Batch) -> Self {
|
pub fn from_batch(batch: &Batch) -> Self {
|
||||||
Self {
|
Self {
|
||||||
uid: batch.uid,
|
uid: batch.uid,
|
||||||
progress: batch.progress.clone(),
|
|
||||||
details: batch.details.clone(),
|
details: batch.details.clone(),
|
||||||
stats: batch.stats.clone(),
|
stats: batch.stats.clone(),
|
||||||
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use milli::progress::ProgressView;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::task_view::DetailsView;
|
use crate::{
|
||||||
use crate::tasks::{Kind, Status};
|
task_view::DetailsView,
|
||||||
|
tasks::{Kind, Status},
|
||||||
|
};
|
||||||
|
|
||||||
pub type BatchId = u32;
|
pub type BatchId = u32;
|
||||||
|
|
||||||
@@ -14,8 +15,6 @@ pub type BatchId = u32;
|
|||||||
pub struct Batch {
|
pub struct Batch {
|
||||||
pub uid: BatchId,
|
pub uid: BatchId,
|
||||||
|
|
||||||
#[serde(skip)]
|
|
||||||
pub progress: Option<ProgressView>,
|
|
||||||
pub details: DetailsView,
|
pub details: DetailsView,
|
||||||
pub stats: BatchStats,
|
pub stats: BatchStats,
|
||||||
|
|
||||||
|
|||||||
@@ -4,11 +4,10 @@ use std::io::{self, BufWriter};
|
|||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::RawMap;
|
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use milli::documents::Error;
|
use milli::documents::Error;
|
||||||
use milli::Object;
|
use milli::Object;
|
||||||
use rustc_hash::FxBuildHasher;
|
use raw_collections::RawMap;
|
||||||
use serde::de::{SeqAccess, Visitor};
|
use serde::de::{SeqAccess, Visitor};
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use serde_json::error::Category;
|
use serde_json::error::Category;
|
||||||
@@ -221,7 +220,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
|
|||||||
let mut deserializer = serde_json::Deserializer::from_slice(&input);
|
let mut deserializer = serde_json::Deserializer::from_slice(&input);
|
||||||
let res = array_each(&mut deserializer, |obj: &RawValue| {
|
let res = array_each(&mut deserializer, |obj: &RawValue| {
|
||||||
doc_alloc.reset();
|
doc_alloc.reset();
|
||||||
let map = RawMap::from_raw_value_and_hasher(obj, FxBuildHasher, &doc_alloc)?;
|
let map = RawMap::from_raw_value(obj, &doc_alloc)?;
|
||||||
to_writer(&mut out, &map)
|
to_writer(&mut out, &map)
|
||||||
});
|
});
|
||||||
let count = match res {
|
let count = match res {
|
||||||
@@ -251,25 +250,26 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reads NDJSON from file and checks it.
|
/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
|
||||||
pub fn read_ndjson(input: &File) -> Result<u64> {
|
pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
|
||||||
// We memory map to be able to deserialize into a RawMap that
|
// We memory map to be able to deserialize into a RawMap that
|
||||||
// does not allocate when possible and only materialize the first/top level.
|
// does not allocate when possible and only materialize the first/top level.
|
||||||
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
|
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
|
||||||
|
let mut output = BufWriter::new(output);
|
||||||
|
|
||||||
let mut bump = Bump::with_capacity(1024 * 1024);
|
let mut bump = Bump::with_capacity(1024 * 1024);
|
||||||
|
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
for result in serde_json::Deserializer::from_slice(&input).into_iter() {
|
for result in serde_json::Deserializer::from_slice(&input).into_iter() {
|
||||||
bump.reset();
|
bump.reset();
|
||||||
match result {
|
count += 1;
|
||||||
Ok(raw) => {
|
result
|
||||||
|
.and_then(|raw: &RawValue| {
|
||||||
// try to deserialize as a map
|
// try to deserialize as a map
|
||||||
RawMap::from_raw_value_and_hasher(raw, FxBuildHasher, &bump)
|
let map = RawMap::from_raw_value(raw, &bump)?;
|
||||||
.map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
|
to_writer(&mut output, &map)
|
||||||
count += 1;
|
})
|
||||||
}
|
.map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
|
||||||
Err(e) => return Err(DocumentFormatError::from((PayloadType::Ndjson, e))),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(count)
|
Ok(count)
|
||||||
|
|||||||
@@ -550,7 +550,7 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
|
|||||||
"the value of `id` is invalid. \
|
"the value of `id` is invalid. \
|
||||||
A document identifier can be of type integer or string, \
|
A document identifier can be of type integer or string, \
|
||||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||||
and can not be more than 511 bytes."
|
and can not be more than 512 bytes."
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ use std::fmt::{Display, Write};
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
|
use milli::update::new::indexer::document_changes::Progress;
|
||||||
use milli::update::IndexDocumentsMethod;
|
use milli::update::IndexDocumentsMethod;
|
||||||
use milli::Object;
|
use milli::Object;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@@ -40,6 +41,62 @@ pub struct Task {
|
|||||||
pub kind: KindWithContent,
|
pub kind: KindWithContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct TaskProgress {
|
||||||
|
pub current_step: &'static str,
|
||||||
|
pub finished_steps: u16,
|
||||||
|
pub total_steps: u16,
|
||||||
|
pub finished_substeps: Option<u32>,
|
||||||
|
pub total_substeps: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for TaskProgress {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TaskProgress {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
current_step: "start",
|
||||||
|
finished_steps: 0,
|
||||||
|
total_steps: 1,
|
||||||
|
finished_substeps: None,
|
||||||
|
total_substeps: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update(&mut self, progress: Progress) -> TaskProgress {
|
||||||
|
if self.finished_steps > progress.finished_steps {
|
||||||
|
return *self;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.current_step != progress.step_name {
|
||||||
|
self.current_step = progress.step_name
|
||||||
|
}
|
||||||
|
|
||||||
|
self.total_steps = progress.total_steps;
|
||||||
|
|
||||||
|
if self.finished_steps < progress.finished_steps {
|
||||||
|
self.finished_substeps = None;
|
||||||
|
self.total_substeps = None;
|
||||||
|
}
|
||||||
|
self.finished_steps = progress.finished_steps;
|
||||||
|
if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep {
|
||||||
|
if let Some(task_finished_substeps) = self.finished_substeps {
|
||||||
|
if task_finished_substeps > finished_substeps {
|
||||||
|
return *self;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.finished_substeps = Some(finished_substeps);
|
||||||
|
self.total_substeps = Some(total_substeps);
|
||||||
|
}
|
||||||
|
*self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Task {
|
impl Task {
|
||||||
pub fn index_uid(&self) -> Option<&str> {
|
pub fn index_uid(&self) -> Option<&str> {
|
||||||
use KindWithContent::*;
|
use KindWithContent::*;
|
||||||
|
|||||||
@@ -129,11 +129,6 @@ async fn try_main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||||
|
|
||||||
tokio::spawn(async move {
|
|
||||||
tokio::signal::ctrl_c().await.unwrap();
|
|
||||||
std::process::exit(130);
|
|
||||||
});
|
|
||||||
|
|
||||||
run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;
|
run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -1,18 +1,18 @@
|
|||||||
use actix_web::web::{self, Data};
|
use actix_web::{
|
||||||
use actix_web::HttpResponse;
|
web::{self, Data},
|
||||||
|
HttpResponse,
|
||||||
|
};
|
||||||
use deserr::actix_web::AwebQueryParameter;
|
use deserr::actix_web::AwebQueryParameter;
|
||||||
use index_scheduler::{IndexScheduler, Query};
|
use index_scheduler::{IndexScheduler, Query};
|
||||||
use meilisearch_types::batch_view::BatchView;
|
use meilisearch_types::{
|
||||||
use meilisearch_types::batches::BatchId;
|
batch_view::BatchView, batches::BatchId, deserr::DeserrQueryParamError, error::ResponseError,
|
||||||
use meilisearch_types::deserr::DeserrQueryParamError;
|
keys::actions,
|
||||||
use meilisearch_types::error::ResponseError;
|
};
|
||||||
use meilisearch_types::keys::actions;
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use super::tasks::TasksFilterQuery;
|
use crate::extractors::{authentication::GuardedData, sequential_extractor::SeqHandler};
|
||||||
use super::ActionPolicy;
|
|
||||||
use crate::extractors::authentication::GuardedData;
|
use super::{tasks::TasksFilterQuery, ActionPolicy};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))
|
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::{ErrorKind, Seek as _};
|
use std::io::ErrorKind;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use actix_web::http::header::CONTENT_TYPE;
|
use actix_web::http::header::CONTENT_TYPE;
|
||||||
@@ -572,7 +572,7 @@ async fn document_addition(
|
|||||||
index_uid: IndexUid,
|
index_uid: IndexUid,
|
||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
csv_delimiter: Option<u8>,
|
csv_delimiter: Option<u8>,
|
||||||
body: Payload,
|
mut body: Payload,
|
||||||
method: IndexDocumentsMethod,
|
method: IndexDocumentsMethod,
|
||||||
task_id: Option<TaskId>,
|
task_id: Option<TaskId>,
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
@@ -609,60 +609,54 @@ async fn document_addition(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
|
let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
|
||||||
let documents_count = match format {
|
|
||||||
PayloadType::Ndjson => {
|
|
||||||
let (path, file) = update_file.into_parts();
|
|
||||||
let file = match file {
|
|
||||||
Some(file) => {
|
|
||||||
let (file, path) = file.into_parts();
|
|
||||||
let mut file = copy_body_to_file(file, body, format).await?;
|
|
||||||
file.rewind().map_err(|e| {
|
|
||||||
index_scheduler::Error::FileStore(file_store::Error::IoError(e))
|
|
||||||
})?;
|
|
||||||
Some(tempfile::NamedTempFile::from_parts(file, path))
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let documents_count = tokio::task::spawn_blocking(move || {
|
let temp_file = match tempfile() {
|
||||||
let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
|
Ok(file) => file,
|
||||||
read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
|
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||||
})?;
|
|
||||||
|
|
||||||
let update_file = file_store::File::from_parts(path, file);
|
|
||||||
update_file.persist()?;
|
|
||||||
|
|
||||||
Ok(documents_count)
|
|
||||||
})
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok(documents_count)
|
|
||||||
}
|
|
||||||
PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
|
|
||||||
let temp_file = match tempfile() {
|
|
||||||
Ok(file) => file,
|
|
||||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let read_file = copy_body_to_file(temp_file, body, format).await?;
|
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
let documents_count = match format {
|
|
||||||
PayloadType::Json => read_json(&read_file, &mut update_file)?,
|
|
||||||
PayloadType::Csv { delimiter } => {
|
|
||||||
read_csv(&read_file, &mut update_file, delimiter)?
|
|
||||||
}
|
|
||||||
PayloadType::Ndjson => {
|
|
||||||
unreachable!("We already wrote the user content into the update file")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
|
||||||
update_file.persist()?;
|
|
||||||
Ok(documents_count)
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let async_file = File::from_std(temp_file);
|
||||||
|
let mut buffer = BufWriter::new(async_file);
|
||||||
|
|
||||||
|
let mut buffer_write_size: usize = 0;
|
||||||
|
while let Some(result) = body.next().await {
|
||||||
|
let byte = result?;
|
||||||
|
|
||||||
|
if byte.is_empty() && buffer_write_size == 0 {
|
||||||
|
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||||
|
}
|
||||||
|
|
||||||
|
match buffer.write_all(&byte).await {
|
||||||
|
Ok(()) => buffer_write_size += 1,
|
||||||
|
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = buffer.flush().await {
|
||||||
|
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||||
|
}
|
||||||
|
|
||||||
|
if buffer_write_size == 0 {
|
||||||
|
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
||||||
|
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||||
|
}
|
||||||
|
|
||||||
|
let read_file = buffer.into_inner().into_std().await;
|
||||||
|
let documents_count = tokio::task::spawn_blocking(move || {
|
||||||
|
let documents_count = match format {
|
||||||
|
PayloadType::Json => read_json(&read_file, &mut update_file)?,
|
||||||
|
PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
|
||||||
|
PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
|
||||||
|
};
|
||||||
|
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||||
|
update_file.persist()?;
|
||||||
|
Ok(documents_count)
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
let documents_count = match documents_count {
|
let documents_count = match documents_count {
|
||||||
Ok(Ok(documents_count)) => documents_count,
|
Ok(Ok(documents_count)) => documents_count,
|
||||||
// in this case the file has not possibly be persisted.
|
// in this case the file has not possibly be persisted.
|
||||||
@@ -709,39 +703,6 @@ async fn document_addition(
|
|||||||
Ok(task.into())
|
Ok(task.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn copy_body_to_file(
|
|
||||||
output: std::fs::File,
|
|
||||||
mut body: Payload,
|
|
||||||
format: PayloadType,
|
|
||||||
) -> Result<std::fs::File, MeilisearchHttpError> {
|
|
||||||
let async_file = File::from_std(output);
|
|
||||||
let mut buffer = BufWriter::new(async_file);
|
|
||||||
let mut buffer_write_size: usize = 0;
|
|
||||||
while let Some(result) = body.next().await {
|
|
||||||
let byte = result?;
|
|
||||||
|
|
||||||
if byte.is_empty() && buffer_write_size == 0 {
|
|
||||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
|
||||||
}
|
|
||||||
|
|
||||||
match buffer.write_all(&byte).await {
|
|
||||||
Ok(()) => buffer_write_size += 1,
|
|
||||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if let Err(e) = buffer.flush().await {
|
|
||||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
|
||||||
}
|
|
||||||
if buffer_write_size == 0 {
|
|
||||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
|
||||||
}
|
|
||||||
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
|
||||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
|
||||||
}
|
|
||||||
let read_file = buffer.into_inner().into_std().await;
|
|
||||||
Ok(read_file)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn delete_documents_batch(
|
pub async fn delete_documents_batch(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
|
|||||||
@@ -284,7 +284,6 @@ async fn test_summarized_document_addition_or_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"receivedDocuments": 1,
|
"receivedDocuments": 1,
|
||||||
"indexedDocuments": 1
|
"indexedDocuments": 1
|
||||||
@@ -315,7 +314,6 @@ async fn test_summarized_document_addition_or_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"receivedDocuments": 1,
|
"receivedDocuments": 1,
|
||||||
"indexedDocuments": 1
|
"indexedDocuments": 1
|
||||||
@@ -351,7 +349,6 @@ async fn test_summarized_delete_documents_by_batch() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 3,
|
"providedIds": 3,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@@ -383,7 +380,6 @@ async fn test_summarized_delete_documents_by_batch() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 2,
|
"uid": 2,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 1,
|
"providedIds": 1,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@@ -420,7 +416,6 @@ async fn test_summarized_delete_documents_by_filter() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 0,
|
"providedIds": 0,
|
||||||
"deletedDocuments": 0,
|
"deletedDocuments": 0,
|
||||||
@@ -453,7 +448,6 @@ async fn test_summarized_delete_documents_by_filter() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 2,
|
"uid": 2,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 0,
|
"providedIds": 0,
|
||||||
"deletedDocuments": 0,
|
"deletedDocuments": 0,
|
||||||
@@ -486,7 +480,6 @@ async fn test_summarized_delete_documents_by_filter() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 4,
|
"uid": 4,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 0,
|
"providedIds": 0,
|
||||||
"deletedDocuments": 0,
|
"deletedDocuments": 0,
|
||||||
@@ -523,7 +516,6 @@ async fn test_summarized_delete_document_by_id() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 1,
|
"providedIds": 1,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@@ -555,7 +547,6 @@ async fn test_summarized_delete_document_by_id() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 2,
|
"uid": 2,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 1,
|
"providedIds": 1,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@@ -603,7 +594,6 @@ async fn test_summarized_settings_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"displayedAttributes": [
|
"displayedAttributes": [
|
||||||
"doggos",
|
"doggos",
|
||||||
@@ -648,7 +638,6 @@ async fn test_summarized_index_creation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {},
|
"details": {},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
@@ -676,7 +665,6 @@ async fn test_summarized_index_creation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"primaryKey": "doggos"
|
"primaryKey": "doggos"
|
||||||
},
|
},
|
||||||
@@ -821,7 +809,6 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {},
|
"details": {},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
@@ -849,7 +836,6 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"primaryKey": "bones"
|
"primaryKey": "bones"
|
||||||
},
|
},
|
||||||
@@ -882,7 +868,6 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 3,
|
"uid": 3,
|
||||||
"progress": null,
|
|
||||||
"details": {},
|
"details": {},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
@@ -910,7 +895,6 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 4,
|
"uid": 4,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"primaryKey": "bones"
|
"primaryKey": "bones"
|
||||||
},
|
},
|
||||||
@@ -948,7 +932,6 @@ async fn test_summarized_index_swap() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"swaps": [
|
"swaps": [
|
||||||
{
|
{
|
||||||
@@ -989,7 +972,6 @@ async fn test_summarized_index_swap() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 3,
|
"uid": 3,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"swaps": [
|
"swaps": [
|
||||||
{
|
{
|
||||||
@@ -1032,7 +1014,6 @@ async fn test_summarized_batch_cancelation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"matchedTasks": 1,
|
"matchedTasks": 1,
|
||||||
"canceledTasks": 0,
|
"canceledTasks": 0,
|
||||||
@@ -1070,7 +1051,6 @@ async fn test_summarized_batch_deletion() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"matchedTasks": 1,
|
"matchedTasks": 1,
|
||||||
"deletedTasks": 1,
|
"deletedTasks": 1,
|
||||||
@@ -1104,7 +1084,6 @@ async fn test_summarized_dump_creation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"progress": null,
|
|
||||||
"details": {
|
"details": {
|
||||||
"dumpUid": "[dumpUid]"
|
"dumpUid": "[dumpUid]"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1220,89 +1220,9 @@ async fn replace_document() {
|
|||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn add_no_documents() {
|
async fn add_no_documents() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("kefir");
|
let index = server.index("test");
|
||||||
let (task, code) = index.add_documents(json!([]), None).await;
|
let (_response, code) = index.add_documents(json!([]), None).await;
|
||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
let task = server.wait_task(task.uid()).await;
|
|
||||||
let task = task.succeeded();
|
|
||||||
snapshot!(task, @r#"
|
|
||||||
{
|
|
||||||
"uid": "[uid]",
|
|
||||||
"batchUid": "[batch_uid]",
|
|
||||||
"indexUid": "kefir",
|
|
||||||
"status": "succeeded",
|
|
||||||
"type": "documentAdditionOrUpdate",
|
|
||||||
"canceledBy": null,
|
|
||||||
"details": {
|
|
||||||
"receivedDocuments": 0,
|
|
||||||
"indexedDocuments": 0
|
|
||||||
},
|
|
||||||
"error": null,
|
|
||||||
"duration": "[duration]",
|
|
||||||
"enqueuedAt": "[date]",
|
|
||||||
"startedAt": "[date]",
|
|
||||||
"finishedAt": "[date]"
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
|
|
||||||
let (task, _code) = index.add_documents(json!([]), Some("kefkef")).await;
|
|
||||||
let task = server.wait_task(task.uid()).await;
|
|
||||||
let task = task.succeeded();
|
|
||||||
snapshot!(task, @r#"
|
|
||||||
{
|
|
||||||
"uid": "[uid]",
|
|
||||||
"batchUid": "[batch_uid]",
|
|
||||||
"indexUid": "kefir",
|
|
||||||
"status": "succeeded",
|
|
||||||
"type": "documentAdditionOrUpdate",
|
|
||||||
"canceledBy": null,
|
|
||||||
"details": {
|
|
||||||
"receivedDocuments": 0,
|
|
||||||
"indexedDocuments": 0
|
|
||||||
},
|
|
||||||
"error": null,
|
|
||||||
"duration": "[duration]",
|
|
||||||
"enqueuedAt": "[date]",
|
|
||||||
"startedAt": "[date]",
|
|
||||||
"finishedAt": "[date]"
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
|
|
||||||
let (task, _code) = index.add_documents(json!([{ "kefkef": 1 }]), None).await;
|
|
||||||
let task = server.wait_task(task.uid()).await;
|
|
||||||
let task = task.succeeded();
|
|
||||||
snapshot!(task, @r#"
|
|
||||||
{
|
|
||||||
"uid": "[uid]",
|
|
||||||
"batchUid": "[batch_uid]",
|
|
||||||
"indexUid": "kefir",
|
|
||||||
"status": "succeeded",
|
|
||||||
"type": "documentAdditionOrUpdate",
|
|
||||||
"canceledBy": null,
|
|
||||||
"details": {
|
|
||||||
"receivedDocuments": 1,
|
|
||||||
"indexedDocuments": 1
|
|
||||||
},
|
|
||||||
"error": null,
|
|
||||||
"duration": "[duration]",
|
|
||||||
"enqueuedAt": "[date]",
|
|
||||||
"startedAt": "[date]",
|
|
||||||
"finishedAt": "[date]"
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
let (documents, _status) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
|
||||||
snapshot!(documents, @r#"
|
|
||||||
{
|
|
||||||
"results": [
|
|
||||||
{
|
|
||||||
"kefkef": 1
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"offset": 0,
|
|
||||||
"limit": 20,
|
|
||||||
"total": 1
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@@ -1344,18 +1264,15 @@ async fn error_add_documents_bad_document_id() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
index.create(Some("docid")).await;
|
index.create(Some("docid")).await;
|
||||||
|
|
||||||
// unsupported characters
|
|
||||||
|
|
||||||
let documents = json!([
|
let documents = json!([
|
||||||
{
|
{
|
||||||
"docid": "foo & bar",
|
"docid": "foo & bar",
|
||||||
"content": "foobar"
|
"content": "foobar"
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
let (value, _code) = index.add_documents(documents, None).await;
|
index.add_documents(documents, None).await;
|
||||||
index.wait_task(value.uid()).await;
|
index.wait_task(1).await;
|
||||||
let (response, code) = index.get_task(value.uid()).await;
|
let (response, code) = index.get_task(1).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||||
@r###"
|
@r###"
|
||||||
@@ -1371,81 +1288,7 @@ async fn error_add_documents_bad_document_id() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||||
"code": "invalid_document_id",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
|
||||||
},
|
|
||||||
"duration": "[duration]",
|
|
||||||
"enqueuedAt": "[date]",
|
|
||||||
"startedAt": "[date]",
|
|
||||||
"finishedAt": "[date]"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// More than 512 bytes
|
|
||||||
let documents = json!([
|
|
||||||
{
|
|
||||||
"docid": "a".repeat(600),
|
|
||||||
"content": "foobar"
|
|
||||||
}
|
|
||||||
]);
|
|
||||||
let (value, _code) = index.add_documents(documents, None).await;
|
|
||||||
index.wait_task(value.uid()).await;
|
|
||||||
let (response, code) = index.get_task(value.uid()).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
|
||||||
@r###"
|
|
||||||
{
|
|
||||||
"uid": 2,
|
|
||||||
"batchUid": 2,
|
|
||||||
"indexUid": "test",
|
|
||||||
"status": "failed",
|
|
||||||
"type": "documentAdditionOrUpdate",
|
|
||||||
"canceledBy": null,
|
|
||||||
"details": {
|
|
||||||
"receivedDocuments": 1,
|
|
||||||
"indexedDocuments": 0
|
|
||||||
},
|
|
||||||
"error": {
|
|
||||||
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
|
||||||
"code": "invalid_document_id",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
|
||||||
},
|
|
||||||
"duration": "[duration]",
|
|
||||||
"enqueuedAt": "[date]",
|
|
||||||
"startedAt": "[date]",
|
|
||||||
"finishedAt": "[date]"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// Exactly 512 bytes
|
|
||||||
let documents = json!([
|
|
||||||
{
|
|
||||||
"docid": "a".repeat(512),
|
|
||||||
"content": "foobar"
|
|
||||||
}
|
|
||||||
]);
|
|
||||||
let (value, _code) = index.add_documents(documents, None).await;
|
|
||||||
index.wait_task(value.uid()).await;
|
|
||||||
let (response, code) = index.get_task(value.uid()).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
|
||||||
@r###"
|
|
||||||
{
|
|
||||||
"uid": 3,
|
|
||||||
"batchUid": 3,
|
|
||||||
"indexUid": "test",
|
|
||||||
"status": "failed",
|
|
||||||
"type": "documentAdditionOrUpdate",
|
|
||||||
"canceledBy": null,
|
|
||||||
"details": {
|
|
||||||
"receivedDocuments": 1,
|
|
||||||
"indexedDocuments": 0
|
|
||||||
},
|
|
||||||
"error": {
|
|
||||||
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
|
||||||
"code": "invalid_document_id",
|
"code": "invalid_document_id",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||||
|
|||||||
@@ -172,7 +172,7 @@ async fn error_update_documents_bad_document_id() {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
response["error"]["message"],
|
response["error"]["message"],
|
||||||
json!(
|
json!(
|
||||||
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes."#
|
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
||||||
|
|||||||
@@ -57,116 +57,6 @@ async fn simple_facet_search() {
|
|||||||
assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
|
assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn simple_facet_search_on_movies() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("test");
|
|
||||||
|
|
||||||
let documents = json!([
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"title": "Carol",
|
|
||||||
"genres": [
|
|
||||||
"Romance",
|
|
||||||
"Drama"
|
|
||||||
],
|
|
||||||
"color": [
|
|
||||||
"red"
|
|
||||||
],
|
|
||||||
"platforms": [
|
|
||||||
"MacOS",
|
|
||||||
"Linux",
|
|
||||||
"Windows"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 2,
|
|
||||||
"title": "Wonder Woman",
|
|
||||||
"genres": [
|
|
||||||
"Action",
|
|
||||||
"Adventure"
|
|
||||||
],
|
|
||||||
"color": [
|
|
||||||
"green"
|
|
||||||
],
|
|
||||||
"platforms": [
|
|
||||||
"MacOS"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"title": "Life of Pi",
|
|
||||||
"genres": [
|
|
||||||
"Adventure",
|
|
||||||
"Drama"
|
|
||||||
],
|
|
||||||
"color": [
|
|
||||||
"blue"
|
|
||||||
],
|
|
||||||
"platforms": [
|
|
||||||
"Windows"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 4,
|
|
||||||
"title": "Mad Max: Fury Road",
|
|
||||||
"genres": [
|
|
||||||
"Adventure",
|
|
||||||
"Science Fiction"
|
|
||||||
],
|
|
||||||
"color": [
|
|
||||||
"red"
|
|
||||||
],
|
|
||||||
"platforms": [
|
|
||||||
"MacOS",
|
|
||||||
"Linux"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 5,
|
|
||||||
"title": "Moana",
|
|
||||||
"genres": [
|
|
||||||
"Fantasy",
|
|
||||||
"Action"
|
|
||||||
],
|
|
||||||
"color": [
|
|
||||||
"red"
|
|
||||||
],
|
|
||||||
"platforms": [
|
|
||||||
"Windows"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 6,
|
|
||||||
"title": "Philadelphia",
|
|
||||||
"genres": [
|
|
||||||
"Drama"
|
|
||||||
],
|
|
||||||
"color": [
|
|
||||||
"blue"
|
|
||||||
],
|
|
||||||
"platforms": [
|
|
||||||
"MacOS",
|
|
||||||
"Linux",
|
|
||||||
"Windows"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]);
|
|
||||||
let (response, code) =
|
|
||||||
index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
|
|
||||||
assert_eq!(202, code, "{:?}", response);
|
|
||||||
index.wait_task(response.uid()).await;
|
|
||||||
|
|
||||||
let (response, _code) = index.add_documents(documents, None).await;
|
|
||||||
index.wait_task(response.uid()).await;
|
|
||||||
|
|
||||||
let (response, code) =
|
|
||||||
index.facet_search(json!({"facetQuery": "", "facetName": "genres", "q": "" })).await;
|
|
||||||
|
|
||||||
assert_eq!(code, 200, "{}", response);
|
|
||||||
snapshot!(response["facetHits"], @r###"[{"value":"Action","count":2},{"value":"Adventure","count":3},{"value":"Drama","count":3},{"value":"Fantasy","count":1},{"value":"Romance","count":1},{"value":"Science Fiction","count":1}]"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn advanced_facet_search() {
|
async fn advanced_facet_search() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
|||||||
@@ -1746,57 +1746,3 @@ async fn change_attributes_settings() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Modifying facets with different casing should work correctly
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn change_facet_casing() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("test");
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"filterableAttributes": ["dog"],
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
|
||||||
index.wait_task(response.uid()).await;
|
|
||||||
|
|
||||||
let (response, _code) = index
|
|
||||||
.add_documents(
|
|
||||||
json!([
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"dog": "Bouvier Bernois"
|
|
||||||
}
|
|
||||||
]),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
index.wait_task(response.uid()).await;
|
|
||||||
|
|
||||||
let (response, _code) = index
|
|
||||||
.add_documents(
|
|
||||||
json!([
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"dog": "bouvier bernois"
|
|
||||||
}
|
|
||||||
]),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
index.wait_task(response.uid()).await;
|
|
||||||
|
|
||||||
index
|
|
||||||
.search(json!({ "facets": ["dog"] }), |response, code| {
|
|
||||||
meili_snap::snapshot!(code, @"200 OK");
|
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["facetDistribution"]), @r###"
|
|
||||||
{
|
|
||||||
"dog": {
|
|
||||||
"bouvier bernois": 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ async fn similar_bad_id() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||||
"code": "invalid_similar_id",
|
"code": "invalid_similar_id",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||||
@@ -172,7 +172,7 @@ async fn similar_invalid_id() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||||
"code": "invalid_similar_id",
|
"code": "invalid_similar_id",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||||
|
|||||||
@@ -10,15 +10,12 @@ license.workspace = true
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
|
|
||||||
clap = { version = "4.5.9", features = ["derive"] }
|
clap = { version = "4.5.9", features = ["derive"] }
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
serde = { version = "1.0.209", features = ["derive"] }
|
serde = { version = "1.0.209", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.133", features = ["preserve_order"] }
|
|
||||||
tempfile = "3.14.0"
|
|
||||||
time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
|
time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
|
||||||
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
|
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
|
||||||
|
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ enum Command {
|
|||||||
///
|
///
|
||||||
/// Supported upgrade paths:
|
/// Supported upgrade paths:
|
||||||
///
|
///
|
||||||
/// - v1.9.x -> v1.10.x -> v1.11.x -> v1.12.x
|
/// - v1.9.x -> v1.10.x -> v1.11.x
|
||||||
OfflineUpgrade {
|
OfflineUpgrade {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
target_version: String,
|
target_version: String,
|
||||||
@@ -88,7 +88,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
match command {
|
match command {
|
||||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
Command::ClearTaskQueue => clear_task_queue(db_path),
|
||||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version)
|
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
||||||
}
|
}
|
||||||
Command::OfflineUpgrade { target_version } => {
|
Command::OfflineUpgrade { target_version } => {
|
||||||
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
|
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
|
||||||
@@ -187,7 +187,6 @@ fn export_a_dump(
|
|||||||
db_path: PathBuf,
|
db_path: PathBuf,
|
||||||
dump_dir: PathBuf,
|
dump_dir: PathBuf,
|
||||||
skip_enqueued_tasks: bool,
|
skip_enqueued_tasks: bool,
|
||||||
detected_version: (String, String, String),
|
|
||||||
) -> Result<(), anyhow::Error> {
|
) -> Result<(), anyhow::Error> {
|
||||||
let started_at = OffsetDateTime::now_utc();
|
let started_at = OffsetDateTime::now_utc();
|
||||||
|
|
||||||
@@ -239,6 +238,9 @@ fn export_a_dump(
|
|||||||
if skip_enqueued_tasks {
|
if skip_enqueued_tasks {
|
||||||
eprintln!("Skip dumping the enqueued tasks...");
|
eprintln!("Skip dumping the enqueued tasks...");
|
||||||
} else {
|
} else {
|
||||||
|
eprintln!("Dumping the enqueued tasks...");
|
||||||
|
|
||||||
|
// 3. dump the tasks
|
||||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
for ret in all_tasks.iter(&rtxn)? {
|
for ret in all_tasks.iter(&rtxn)? {
|
||||||
@@ -252,39 +254,18 @@ fn export_a_dump(
|
|||||||
if status == Status::Enqueued {
|
if status == Status::Enqueued {
|
||||||
let content_file = file_store.get_update(content_file_uuid)?;
|
let content_file = file_store.get_update(content_file_uuid)?;
|
||||||
|
|
||||||
if (
|
let reader =
|
||||||
detected_version.0.as_str(),
|
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||||
detected_version.1.as_str(),
|
format!("While reading content file {:?}", content_file_uuid)
|
||||||
detected_version.2.as_str(),
|
})?;
|
||||||
) < ("1", "12", "0")
|
|
||||||
{
|
|
||||||
eprintln!("Dumping the enqueued tasks reading them in obkv format...");
|
|
||||||
let reader =
|
|
||||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
|
||||||
format!("While reading content file {:?}", content_file_uuid)
|
|
||||||
})?;
|
|
||||||
let (mut cursor, documents_batch_index) =
|
|
||||||
reader.into_cursor_and_fields_index();
|
|
||||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
|
||||||
format!("While iterating on content file {:?}", content_file_uuid)
|
|
||||||
})? {
|
|
||||||
dump_content_file
|
|
||||||
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
eprintln!(
|
|
||||||
"Dumping the enqueued tasks reading them in JSON stream format..."
|
|
||||||
);
|
|
||||||
for document in
|
|
||||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
|
||||||
{
|
|
||||||
let document = document.with_context(|| {
|
|
||||||
format!("While reading content file {:?}", content_file_uuid)
|
|
||||||
})?;
|
|
||||||
dump_content_file.push_document(&document)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||||
|
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||||
|
format!("While iterating on content file {:?}", content_file_uuid)
|
||||||
|
})? {
|
||||||
|
dump_content_file
|
||||||
|
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
|
||||||
|
}
|
||||||
dump_content_file.flush()?;
|
dump_content_file.flush()?;
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
mod v1_10;
|
mod v1_10;
|
||||||
mod v1_11;
|
mod v1_11;
|
||||||
mod v1_12;
|
|
||||||
mod v1_9;
|
mod v1_9;
|
||||||
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
use meilisearch_types::versioning::create_version_file;
|
use meilisearch_types::versioning::create_version_file;
|
||||||
|
|
||||||
use v1_10::v1_9_to_v1_10;
|
use v1_10::v1_9_to_v1_10;
|
||||||
use v1_12::{v1_11_to_v1_12, v1_12_to_v1_12_3};
|
|
||||||
|
|
||||||
use crate::upgrade::v1_11::v1_10_to_v1_11;
|
use crate::upgrade::v1_11::v1_10_to_v1_11;
|
||||||
|
|
||||||
@@ -20,48 +19,11 @@ pub struct OfflineUpgrade {
|
|||||||
|
|
||||||
impl OfflineUpgrade {
|
impl OfflineUpgrade {
|
||||||
pub fn upgrade(self) -> anyhow::Result<()> {
|
pub fn upgrade(self) -> anyhow::Result<()> {
|
||||||
// Adding a version?
|
|
||||||
//
|
|
||||||
// 1. Update the LAST_SUPPORTED_UPGRADE_FROM_VERSION and LAST_SUPPORTED_UPGRADE_TO_VERSION.
|
|
||||||
// 2. Add new version to the upgrade list if necessary
|
|
||||||
// 3. Use `no_upgrade` as index for versions that are compatible.
|
|
||||||
|
|
||||||
if self.current_version == self.target_version {
|
|
||||||
println!("Database is already at the target version. Exiting.");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.current_version > self.target_version {
|
|
||||||
bail!(
|
|
||||||
"Cannot downgrade from {}.{}.{} to {}.{}.{}. Downgrade not supported",
|
|
||||||
self.current_version.0,
|
|
||||||
self.current_version.1,
|
|
||||||
self.current_version.2,
|
|
||||||
self.target_version.0,
|
|
||||||
self.target_version.1,
|
|
||||||
self.target_version.2
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const FIRST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.9.0";
|
|
||||||
const LAST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.12.7";
|
|
||||||
const FIRST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.10.0";
|
|
||||||
const LAST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.12.7";
|
|
||||||
|
|
||||||
let upgrade_list = [
|
let upgrade_list = [
|
||||||
(
|
(v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
|
||||||
v1_9_to_v1_10 as fn(&Path, &str, &str, &str) -> Result<(), anyhow::Error>,
|
|
||||||
"1",
|
|
||||||
"10",
|
|
||||||
"0",
|
|
||||||
),
|
|
||||||
(v1_10_to_v1_11, "1", "11", "0"),
|
(v1_10_to_v1_11, "1", "11", "0"),
|
||||||
(v1_11_to_v1_12, "1", "12", "0"),
|
|
||||||
(v1_12_to_v1_12_3, "1", "12", "3"),
|
|
||||||
];
|
];
|
||||||
|
|
||||||
let no_upgrade: usize = upgrade_list.len();
|
|
||||||
|
|
||||||
let (current_major, current_minor, current_patch) = &self.current_version;
|
let (current_major, current_minor, current_patch) = &self.current_version;
|
||||||
|
|
||||||
let start_at = match (
|
let start_at = match (
|
||||||
@@ -71,13 +33,8 @@ impl OfflineUpgrade {
|
|||||||
) {
|
) {
|
||||||
("1", "9", _) => 0,
|
("1", "9", _) => 0,
|
||||||
("1", "10", _) => 1,
|
("1", "10", _) => 1,
|
||||||
("1", "11", _) => 2,
|
|
||||||
("1", "12", "0" | "1" | "2") => 3,
|
|
||||||
("1", "12", "3" | "4" | "5" | "6" | "7") => no_upgrade,
|
|
||||||
_ => {
|
_ => {
|
||||||
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from versions in range [{}-{}]",
|
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10")
|
||||||
FIRST_SUPPORTED_UPGRADE_FROM_VERSION,
|
|
||||||
LAST_SUPPORTED_UPGRADE_FROM_VERSION);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -86,32 +43,20 @@ impl OfflineUpgrade {
|
|||||||
let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
|
let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
|
||||||
("1", "10", _) => 0,
|
("1", "10", _) => 0,
|
||||||
("1", "11", _) => 1,
|
("1", "11", _) => 1,
|
||||||
("1", "12", "0" | "1" | "2") => 2,
|
|
||||||
("1", "12", "3" | "4" | "5" | "6" | "7") => 3,
|
|
||||||
(major, _, _) if major.starts_with('v') => {
|
(major, _, _) if major.starts_with('v') => {
|
||||||
bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
|
bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to versions in range [{}-{}]",
|
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10 and v1.11")
|
||||||
FIRST_SUPPORTED_UPGRADE_TO_VERSION,
|
|
||||||
LAST_SUPPORTED_UPGRADE_TO_VERSION);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
|
println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
|
||||||
|
|
||||||
if start_at == no_upgrade {
|
|
||||||
println!("No upgrade operation to perform, writing VERSION file");
|
|
||||||
create_version_file(&self.db_path, target_major, target_minor, target_patch)
|
|
||||||
.context("while writing VERSION file after the upgrade")?;
|
|
||||||
println!("Success");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(clippy::needless_range_loop)]
|
#[allow(clippy::needless_range_loop)]
|
||||||
for index in start_at..=ends_at {
|
for index in start_at..=ends_at {
|
||||||
let (func, major, minor, patch) = upgrade_list[index];
|
let (func, major, minor, patch) = upgrade_list[index];
|
||||||
(func)(&self.db_path, current_major, current_minor, current_patch)?;
|
(func)(&self.db_path)?;
|
||||||
println!("Done");
|
println!("Done");
|
||||||
// We're writing the version file just in case an issue arise _while_ upgrading.
|
// We're writing the version file just in case an issue arise _while_ upgrading.
|
||||||
// We don't want the DB to fail in an unknown state.
|
// We don't want the DB to fail in an unknown state.
|
||||||
|
|||||||
@@ -1,13 +1,18 @@
|
|||||||
|
use anyhow::bail;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::Context;
|
||||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
use meilisearch_types::{
|
||||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
heed::{
|
||||||
use meilisearch_types::milli::index::{db_name, main_key};
|
types::{SerdeJson, Str},
|
||||||
|
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
|
||||||
|
},
|
||||||
|
milli::index::{db_name, main_key},
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
|
||||||
|
|
||||||
use super::v1_9;
|
use super::v1_9;
|
||||||
use crate::uuid_codec::UuidCodec;
|
|
||||||
use crate::{try_opening_database, try_opening_poly_database};
|
|
||||||
|
|
||||||
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
|
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
|
||||||
|
|
||||||
@@ -151,12 +156,7 @@ fn date_round_trip(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn v1_9_to_v1_10(
|
pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> {
|
||||||
db_path: &Path,
|
|
||||||
_origin_major: &str,
|
|
||||||
_origin_minor: &str,
|
|
||||||
_origin_patch: &str,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
println!("Upgrading from v1.9.0 to v1.10.0");
|
println!("Upgrading from v1.9.0 to v1.10.0");
|
||||||
// 2 changes here
|
// 2 changes here
|
||||||
|
|
||||||
|
|||||||
@@ -7,19 +7,14 @@
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use meilisearch_types::heed::types::Str;
|
use meilisearch_types::{
|
||||||
use meilisearch_types::heed::{Database, EnvOpenOptions};
|
heed::{types::Str, Database, EnvOpenOptions},
|
||||||
use meilisearch_types::milli::index::db_name;
|
milli::index::db_name,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::uuid_codec::UuidCodec;
|
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
|
||||||
use crate::{try_opening_database, try_opening_poly_database};
|
|
||||||
|
|
||||||
pub fn v1_10_to_v1_11(
|
pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
|
||||||
db_path: &Path,
|
|
||||||
_origin_major: &str,
|
|
||||||
_origin_minor: &str,
|
|
||||||
_origin_patch: &str,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
println!("Upgrading from v1.10.0 to v1.11.0");
|
println!("Upgrading from v1.10.0 to v1.11.0");
|
||||||
|
|
||||||
let index_scheduler_path = db_path.join("tasks");
|
let index_scheduler_path = db_path.join("tasks");
|
||||||
|
|||||||
@@ -1,298 +0,0 @@
|
|||||||
//! The breaking changes that happened between the v1.11 and the v1.12 are:
|
|
||||||
//! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
|
|
||||||
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::io::BufWriter;
|
|
||||||
use std::path::Path;
|
|
||||||
use std::sync::atomic::AtomicBool;
|
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use file_store::FileStore;
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
use meilisearch_types::milli::documents::DocumentsBatchReader;
|
|
||||||
use meilisearch_types::milli::heed::types::{SerdeJson, Str};
|
|
||||||
use meilisearch_types::milli::heed::{Database, EnvOpenOptions, RoTxn, RwTxn};
|
|
||||||
use meilisearch_types::milli::progress::Step;
|
|
||||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
|
||||||
use serde::Serialize;
|
|
||||||
use serde_json::value::RawValue;
|
|
||||||
use tempfile::NamedTempFile;
|
|
||||||
use time::OffsetDateTime;
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use crate::try_opening_database;
|
|
||||||
use crate::uuid_codec::UuidCodec;
|
|
||||||
|
|
||||||
pub fn v1_11_to_v1_12(
|
|
||||||
db_path: &Path,
|
|
||||||
_origin_major: &str,
|
|
||||||
_origin_minor: &str,
|
|
||||||
_origin_patch: &str,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
println!("Upgrading from v1.11.0 to v1.12.0");
|
|
||||||
|
|
||||||
convert_update_files(db_path)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn v1_12_to_v1_12_3(
|
|
||||||
db_path: &Path,
|
|
||||||
origin_major: &str,
|
|
||||||
origin_minor: &str,
|
|
||||||
origin_patch: &str,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
println!("Upgrading from v1.12.{{0, 1, 2}} to v1.12.3");
|
|
||||||
|
|
||||||
if origin_minor == "12" {
|
|
||||||
rebuild_field_distribution(db_path)?;
|
|
||||||
} else {
|
|
||||||
println!("Not rebuilding field distribution as it wasn't corrupted coming from v{origin_major}.{origin_minor}.{origin_patch}");
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert the update files from OBKV to ndjson format.
|
|
||||||
///
|
|
||||||
/// 1) List all the update files using the file store.
|
|
||||||
/// 2) For each update file, read the update file into a DocumentsBatchReader.
|
|
||||||
/// 3) For each document in the update file, convert the document to a JSON object.
|
|
||||||
/// 4) Write the JSON object to a tmp file in the update files directory.
|
|
||||||
/// 5) Persist the tmp file replacing the old update file.
|
|
||||||
fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
|
|
||||||
let update_files_dir_path = db_path.join("update_files");
|
|
||||||
let file_store = FileStore::new(&update_files_dir_path).with_context(|| {
|
|
||||||
format!("while creating file store for update files dir {update_files_dir_path:?}")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
for uuid in file_store.all_uuids().context("while retrieving uuids from file store")? {
|
|
||||||
let uuid = uuid.context("while retrieving uuid from file store")?;
|
|
||||||
let update_file_path = file_store.get_update_path(uuid);
|
|
||||||
let update_file = file_store
|
|
||||||
.get_update(uuid)
|
|
||||||
.with_context(|| format!("while getting update file for uuid {uuid:?}"))?;
|
|
||||||
|
|
||||||
let mut file =
|
|
||||||
NamedTempFile::new_in(&update_files_dir_path).map(BufWriter::new).with_context(
|
|
||||||
|| format!("while creating bufwriter for update file {update_file_path:?}"),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let reader = DocumentsBatchReader::from_reader(update_file).with_context(|| {
|
|
||||||
format!("while creating documents batch reader for update file {update_file_path:?}")
|
|
||||||
})?;
|
|
||||||
let (mut cursor, index) = reader.into_cursor_and_fields_index();
|
|
||||||
|
|
||||||
while let Some(document) = cursor.next_document().with_context(|| {
|
|
||||||
format!(
|
|
||||||
"while reading documents from batch reader for update file {update_file_path:?}"
|
|
||||||
)
|
|
||||||
})? {
|
|
||||||
let mut json_document = IndexMap::new();
|
|
||||||
for (fid, value) in document {
|
|
||||||
let field_name = index
|
|
||||||
.name(fid)
|
|
||||||
.with_context(|| format!("while getting field name for fid {fid} for update file {update_file_path:?}"))?;
|
|
||||||
let value: &RawValue = serde_json::from_slice(value)?;
|
|
||||||
json_document.insert(field_name, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
serde_json::to_writer(&mut file, &json_document)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let file = file.into_inner().map_err(|e| e.into_error()).context(format!(
|
|
||||||
"while flushing update file bufwriter for update file {update_file_path:?}"
|
|
||||||
))?;
|
|
||||||
let _ = file
|
|
||||||
// atomically replace the obkv file with the rewritten NDJSON file
|
|
||||||
.persist(&update_file_path)
|
|
||||||
.with_context(|| format!("while persisting update file {update_file_path:?}"))?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3
|
|
||||||
fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
|
|
||||||
let index_scheduler_path = db_path.join("tasks");
|
|
||||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
|
||||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
|
||||||
|
|
||||||
let mut sched_wtxn = env.write_txn()?;
|
|
||||||
|
|
||||||
let index_mapping: Database<Str, UuidCodec> =
|
|
||||||
try_opening_database(&env, &sched_wtxn, "index-mapping")?;
|
|
||||||
let stats_db: Database<UuidCodec, SerdeJson<IndexStats>> =
|
|
||||||
try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
|
|
||||||
format!("While trying to open {:?}", index_scheduler_path.display())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let index_count =
|
|
||||||
index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
|
|
||||||
|
|
||||||
// FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
|
|
||||||
// 1. immutably for the iteration
|
|
||||||
// 2. mutably for updating index stats
|
|
||||||
let indexes: Vec<_> = index_mapping
|
|
||||||
.iter(&sched_wtxn)?
|
|
||||||
.map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let progress = meilisearch_types::milli::progress::Progress::default();
|
|
||||||
let finished = AtomicBool::new(false);
|
|
||||||
|
|
||||||
std::thread::scope(|scope| {
|
|
||||||
let display_progress = std::thread::Builder::new()
|
|
||||||
.name("display_progress".into())
|
|
||||||
.spawn_scoped(scope, || {
|
|
||||||
while !finished.load(std::sync::atomic::Ordering::Relaxed) {
|
|
||||||
std::thread::sleep(std::time::Duration::from_secs(5));
|
|
||||||
let view = progress.as_progress_view();
|
|
||||||
let Ok(view) = serde_json::to_string(&view) else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
println!("{view}");
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
for (index_index, result) in indexes.into_iter().enumerate() {
|
|
||||||
let (uid, uuid) = result?;
|
|
||||||
progress.update_progress(VariableNameStep::new(
|
|
||||||
&uid,
|
|
||||||
index_index as u32,
|
|
||||||
index_count as u32,
|
|
||||||
));
|
|
||||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"[{}/{index_count}]Updating index `{uid}` at `{}`",
|
|
||||||
index_index + 1,
|
|
||||||
index_path.display()
|
|
||||||
);
|
|
||||||
|
|
||||||
println!("\t- Rebuilding field distribution");
|
|
||||||
|
|
||||||
let index = meilisearch_types::milli::Index::new(EnvOpenOptions::new(), &index_path)
|
|
||||||
.with_context(|| {
|
|
||||||
format!("while opening index {uid} at '{}'", index_path.display())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let mut index_txn = index.write_txn()?;
|
|
||||||
|
|
||||||
meilisearch_types::milli::update::new::reindex::field_distribution(
|
|
||||||
&index,
|
|
||||||
&mut index_txn,
|
|
||||||
&progress,
|
|
||||||
)
|
|
||||||
.context("while rebuilding field distribution")?;
|
|
||||||
|
|
||||||
let stats = IndexStats::new(&index, &index_txn)
|
|
||||||
.with_context(|| format!("computing stats for index `{uid}`"))?;
|
|
||||||
store_stats_of(stats_db, uuid, &mut sched_wtxn, &uid, &stats)?;
|
|
||||||
|
|
||||||
index_txn.commit().context("while committing the write txn for the updated index")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
|
|
||||||
|
|
||||||
finished.store(true, std::sync::atomic::Ordering::Relaxed);
|
|
||||||
|
|
||||||
if let Err(panic) = display_progress.join() {
|
|
||||||
let msg = match panic.downcast_ref::<&'static str>() {
|
|
||||||
Some(s) => *s,
|
|
||||||
None => match panic.downcast_ref::<String>() {
|
|
||||||
Some(s) => &s[..],
|
|
||||||
None => "Box<dyn Any>",
|
|
||||||
},
|
|
||||||
};
|
|
||||||
eprintln!("WARN: the display thread panicked with {msg}");
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("Upgrading database succeeded");
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct VariableNameStep {
|
|
||||||
name: String,
|
|
||||||
current: u32,
|
|
||||||
total: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl VariableNameStep {
|
|
||||||
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
|
||||||
Self { name: name.into(), current, total }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Step for VariableNameStep {
|
|
||||||
fn name(&self) -> Cow<'static, str> {
|
|
||||||
self.name.clone().into()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn current(&self) -> u32 {
|
|
||||||
self.current
|
|
||||||
}
|
|
||||||
|
|
||||||
fn total(&self) -> u32 {
|
|
||||||
self.total
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn store_stats_of(
|
|
||||||
stats_db: Database<UuidCodec, SerdeJson<IndexStats>>,
|
|
||||||
index_uuid: Uuid,
|
|
||||||
sched_wtxn: &mut RwTxn,
|
|
||||||
index_uid: &str,
|
|
||||||
stats: &IndexStats,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
stats_db
|
|
||||||
.put(sched_wtxn, &index_uuid, stats)
|
|
||||||
.with_context(|| format!("storing stats for index `{index_uid}`"))?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The statistics that can be computed from an `Index` object.
|
|
||||||
#[derive(Serialize, Debug)]
|
|
||||||
pub struct IndexStats {
|
|
||||||
/// Number of documents in the index.
|
|
||||||
pub number_of_documents: u64,
|
|
||||||
/// Size taken up by the index' DB, in bytes.
|
|
||||||
///
|
|
||||||
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
|
||||||
/// are not returned to the disk after a deletion, this number is typically larger than
|
|
||||||
/// `used_database_size` that only includes the size of the used pages.
|
|
||||||
pub database_size: u64,
|
|
||||||
/// Size taken by the used pages of the index' DB, in bytes.
|
|
||||||
///
|
|
||||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
|
||||||
/// this value is typically smaller than `database_size`.
|
|
||||||
pub used_database_size: u64,
|
|
||||||
/// Association of every field name with the number of times it occurs in the documents.
|
|
||||||
pub field_distribution: FieldDistribution,
|
|
||||||
/// Creation date of the index.
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
pub created_at: OffsetDateTime,
|
|
||||||
/// Date of the last update of the index.
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
pub updated_at: OffsetDateTime,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IndexStats {
|
|
||||||
/// Compute the stats of an index
|
|
||||||
///
|
|
||||||
/// # Parameters
|
|
||||||
///
|
|
||||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
|
||||||
pub fn new(index: &Index, rtxn: &RoTxn) -> meilisearch_types::milli::Result<Self> {
|
|
||||||
Ok(IndexStats {
|
|
||||||
number_of_documents: index.number_of_documents(rtxn)?,
|
|
||||||
database_size: index.on_disk_size()?,
|
|
||||||
used_database_size: index.used_size()?,
|
|
||||||
field_distribution: index.field_distribution(rtxn)?,
|
|
||||||
created_at: index.created_at(rtxn)?,
|
|
||||||
updated_at: index.updated_at(rtxn)?,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -91,8 +91,8 @@ ureq = { version = "2.10.0", features = ["json"] }
|
|||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
rayon-par-bridge = "0.1.0"
|
rayon-par-bridge = "0.1.0"
|
||||||
hashbrown = "0.15.0"
|
hashbrown = "0.15.0"
|
||||||
|
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.16.0"
|
||||||
bumparaw-collections = "0.1.2"
|
|
||||||
thread_local = "1.1.8"
|
thread_local = "1.1.8"
|
||||||
allocator-api2 = "0.2.18"
|
allocator-api2 = "0.2.18"
|
||||||
rustc-hash = "2.0.0"
|
rustc-hash = "2.0.0"
|
||||||
|
|||||||
@@ -280,7 +280,7 @@ fn starts_with(selector: &str, key: &str) -> bool {
|
|||||||
|
|
||||||
pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
|
pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
|
||||||
if document_id.is_empty()
|
if document_id.is_empty()
|
||||||
|| document_id.len() >= 512
|
|| document_id.len() > 512
|
||||||
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
||||||
{
|
{
|
||||||
None
|
None
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ pub enum UserError {
|
|||||||
"Document identifier `{}` is invalid. \
|
"Document identifier `{}` is invalid. \
|
||||||
A document identifier can be of type integer or string, \
|
A document identifier can be of type integer or string, \
|
||||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||||
and can not be more than 511 bytes.", .document_id.to_string()
|
and can not be more than 512 bytes.", .document_id.to_string()
|
||||||
)]
|
)]
|
||||||
InvalidDocumentId { document_id: Value },
|
InvalidDocumentId { document_id: Value },
|
||||||
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
||||||
|
|||||||
@@ -1734,7 +1734,6 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
use crate::error::{Error, InternalError};
|
use crate::error::{Error, InternalError};
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
use crate::progress::Progress;
|
|
||||||
use crate::update::new::indexer;
|
use crate::update::new::indexer;
|
||||||
use crate::update::settings::InnerIndexSettings;
|
use crate::update::settings::InnerIndexSettings;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
@@ -1811,7 +1810,7 @@ pub(crate) mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||||
@@ -1830,7 +1829,7 @@ pub(crate) mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
@@ -1902,7 +1901,7 @@ pub(crate) mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||||
@@ -1921,7 +1920,7 @@ pub(crate) mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
@@ -1983,7 +1982,7 @@ pub(crate) mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2002,7 +2001,7 @@ pub(crate) mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| should_abort.load(Relaxed),
|
&|| should_abort.load(Relaxed),
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ pub mod vector;
|
|||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod snapshot_tests;
|
pub mod snapshot_tests;
|
||||||
mod fieldids_weights_map;
|
mod fieldids_weights_map;
|
||||||
pub mod progress;
|
|
||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
|
|||||||
@@ -1,152 +0,0 @@
|
|||||||
use std::any::TypeId;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::sync::atomic::{AtomicU32, Ordering};
|
|
||||||
use std::sync::{Arc, RwLock};
|
|
||||||
|
|
||||||
use serde::Serialize;
|
|
||||||
|
|
||||||
pub trait Step: 'static + Send + Sync {
|
|
||||||
fn name(&self) -> Cow<'static, str>;
|
|
||||||
fn current(&self) -> u32;
|
|
||||||
fn total(&self) -> u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Default)]
|
|
||||||
pub struct Progress {
|
|
||||||
steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Progress {
|
|
||||||
pub fn update_progress<P: Step>(&self, sub_progress: P) {
|
|
||||||
let mut steps = self.steps.write().unwrap();
|
|
||||||
let step_type = TypeId::of::<P>();
|
|
||||||
if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
|
|
||||||
steps.truncate(idx);
|
|
||||||
}
|
|
||||||
steps.push((step_type, Box::new(sub_progress)));
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
|
|
||||||
pub fn as_progress_view(&self) -> ProgressView {
|
|
||||||
let steps = self.steps.read().unwrap();
|
|
||||||
|
|
||||||
let mut percentage = 0.0;
|
|
||||||
let mut prev_factors = 1.0;
|
|
||||||
|
|
||||||
let mut step_view = Vec::with_capacity(steps.len());
|
|
||||||
for (_, step) in steps.iter() {
|
|
||||||
prev_factors *= step.total() as f32;
|
|
||||||
percentage += step.current() as f32 / prev_factors;
|
|
||||||
|
|
||||||
step_view.push(ProgressStepView {
|
|
||||||
current_step: step.name(),
|
|
||||||
finished: step.current(),
|
|
||||||
total: step.total(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
ProgressView { steps: step_view, percentage: percentage * 100.0 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This trait lets you use the AtomicSubStep defined right below.
|
|
||||||
/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
|
|
||||||
/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
|
|
||||||
pub trait NamedStep: 'static + Send + Sync + Default {
|
|
||||||
fn name(&self) -> &'static str;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Structure to quickly define steps that need very quick, lockless updating of their current step.
|
|
||||||
/// You can use this struct if:
|
|
||||||
/// - The name of the step doesn't change
|
|
||||||
/// - The total number of steps doesn't change
|
|
||||||
pub struct AtomicSubStep<Name: NamedStep> {
|
|
||||||
unit_name: Name,
|
|
||||||
current: Arc<AtomicU32>,
|
|
||||||
total: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Name: NamedStep> AtomicSubStep<Name> {
|
|
||||||
pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
|
|
||||||
let current = Arc::new(AtomicU32::new(0));
|
|
||||||
(current.clone(), Self { current, total, unit_name: Name::default() })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Name: NamedStep> Step for AtomicSubStep<Name> {
|
|
||||||
fn name(&self) -> Cow<'static, str> {
|
|
||||||
self.unit_name.name().into()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn current(&self) -> u32 {
|
|
||||||
self.current.load(Ordering::Relaxed)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn total(&self) -> u32 {
|
|
||||||
self.total
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! make_enum_progress {
|
|
||||||
($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
|
|
||||||
#[repr(u8)]
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
|
||||||
#[allow(clippy::enum_variant_names)]
|
|
||||||
$visibility enum $name {
|
|
||||||
$($variant),+
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Step for $name {
|
|
||||||
fn name(&self) -> Cow<'static, str> {
|
|
||||||
use convert_case::Casing;
|
|
||||||
|
|
||||||
match self {
|
|
||||||
$(
|
|
||||||
$name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
|
|
||||||
),+
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn current(&self) -> u32 {
|
|
||||||
*self as u32
|
|
||||||
}
|
|
||||||
|
|
||||||
fn total(&self) -> u32 {
|
|
||||||
Self::CARDINALITY as u32
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! make_atomic_progress {
|
|
||||||
($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
|
||||||
pub struct $struct_name {}
|
|
||||||
impl NamedStep for $struct_name {
|
|
||||||
fn name(&self) -> &'static str {
|
|
||||||
$step_name
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
|
||||||
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Clone)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct ProgressView {
|
|
||||||
pub steps: Vec<ProgressStepView>,
|
|
||||||
pub percentage: f32,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Clone)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct ProgressStepView {
|
|
||||||
pub current_step: Cow<'static, str>,
|
|
||||||
pub finished: u32,
|
|
||||||
pub total: u32,
|
|
||||||
}
|
|
||||||
@@ -3,13 +3,12 @@ use std::collections::BTreeMap;
|
|||||||
use std::fmt::{self, Debug};
|
use std::fmt::{self, Debug};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::{RawMap, RawVec, Value};
|
|
||||||
use liquid::model::{
|
use liquid::model::{
|
||||||
ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
|
ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
|
||||||
Value as LiquidValue,
|
Value as LiquidValue,
|
||||||
};
|
};
|
||||||
use liquid::{ObjectView, ValueView};
|
use liquid::{ObjectView, ValueView};
|
||||||
use rustc_hash::FxBuildHasher;
|
use raw_collections::{RawMap, RawVec};
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||||
@@ -196,7 +195,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
|
impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
|
||||||
fn as_debug(&self) -> &dyn Debug {
|
fn as_debug(&self) -> &dyn fmt::Debug {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||||
@@ -244,13 +243,14 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
struct ParseableValue<'doc> {
|
struct ParseableValue<'doc> {
|
||||||
value: Value<'doc, FxBuildHasher>,
|
value: raw_collections::Value<'doc>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> ParseableValue<'doc> {
|
impl<'doc> ParseableValue<'doc> {
|
||||||
pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
|
pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
|
||||||
let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap();
|
let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
|
||||||
Self { value }
|
Self { value }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// transparent newtype for implementing ValueView
|
// transparent newtype for implementing ValueView
|
||||||
#[derive(Debug)]
|
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>);
|
#[derive(Debug)]
|
||||||
|
struct ParseableMap<'doc>(RawMap<'doc>);
|
||||||
|
|
||||||
// transparent newtype for implementing ValueView
|
// transparent newtype for implementing ValueView
|
||||||
#[derive(Debug)]
|
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
|
#[derive(Debug)]
|
||||||
struct ParseableArray<'doc>(RawVec<'doc>);
|
struct ParseableArray<'doc>(RawVec<'doc>);
|
||||||
|
|
||||||
impl<'doc> ParseableMap<'doc> {
|
impl<'doc> ParseableMap<'doc> {
|
||||||
pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> {
|
pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> {
|
||||||
// SAFETY: repr(transparent)
|
// SAFETY: repr(transparent)
|
||||||
unsafe { &*(map as *const RawMap<FxBuildHasher> as *const Self) }
|
unsafe { &*(map as *const RawMap as *const Self) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -447,9 +447,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn render(&self) -> DisplayCow<'_> {
|
fn render(&self) -> DisplayCow<'_> {
|
||||||
use bumparaw_collections::value::Number;
|
use raw_collections::value::Number;
|
||||||
use bumparaw_collections::Value;
|
use raw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil.render(),
|
Value::Null => LiquidValue::Nil.render(),
|
||||||
Value::Bool(v) => v.render(),
|
Value::Bool(v) => v.render(),
|
||||||
@@ -465,9 +464,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn source(&self) -> DisplayCow<'_> {
|
fn source(&self) -> DisplayCow<'_> {
|
||||||
use bumparaw_collections::value::Number;
|
use raw_collections::value::Number;
|
||||||
use bumparaw_collections::Value;
|
use raw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil.source(),
|
Value::Null => LiquidValue::Nil.source(),
|
||||||
Value::Bool(v) => ValueView::source(v),
|
Value::Bool(v) => ValueView::source(v),
|
||||||
@@ -483,9 +481,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn type_name(&self) -> &'static str {
|
fn type_name(&self) -> &'static str {
|
||||||
use bumparaw_collections::value::Number;
|
use raw_collections::value::Number;
|
||||||
use bumparaw_collections::Value;
|
use raw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil.type_name(),
|
Value::Null => LiquidValue::Nil.type_name(),
|
||||||
Value::Bool(v) => v.type_name(),
|
Value::Bool(v) => v.type_name(),
|
||||||
@@ -501,8 +498,7 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn query_state(&self, state: State) -> bool {
|
fn query_state(&self, state: State) -> bool {
|
||||||
use bumparaw_collections::Value;
|
use raw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
|
Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
|
||||||
Value::Bool(v) => ValueView::query_state(v, state),
|
Value::Bool(v) => ValueView::query_state(v, state),
|
||||||
@@ -519,8 +515,7 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn to_kstr(&self) -> KStringCow<'_> {
|
fn to_kstr(&self) -> KStringCow<'_> {
|
||||||
use bumparaw_collections::Value;
|
use raw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
|
Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
|
||||||
Value::Bool(v) => ValueView::to_kstr(v),
|
Value::Bool(v) => ValueView::to_kstr(v),
|
||||||
@@ -532,14 +527,12 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn to_value(&self) -> LiquidValue {
|
fn to_value(&self) -> LiquidValue {
|
||||||
use bumparaw_collections::value::Number;
|
use raw_collections::Value;
|
||||||
use bumparaw_collections::Value;
|
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil,
|
Value::Null => LiquidValue::Nil,
|
||||||
Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
|
Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
|
||||||
Value::Number(number) => match number {
|
Value::Number(number) => match number {
|
||||||
Number::PosInt(number) => {
|
raw_collections::value::Number::PosInt(number) => {
|
||||||
let number: i64 = match (*number).try_into() {
|
let number: i64 = match (*number).try_into() {
|
||||||
Ok(number) => number,
|
Ok(number) => number,
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
@@ -548,8 +541,12 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
};
|
};
|
||||||
LiquidValue::Scalar(ScalarCow::new(number))
|
LiquidValue::Scalar(ScalarCow::new(number))
|
||||||
}
|
}
|
||||||
Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
raw_collections::value::Number::NegInt(number) => {
|
||||||
Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
LiquidValue::Scalar(ScalarCow::new(*number))
|
||||||
|
}
|
||||||
|
raw_collections::value::Number::Finite(number) => {
|
||||||
|
LiquidValue::Scalar(ScalarCow::new(*number))
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
|
Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
|
||||||
Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
|
Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
|
||||||
@@ -558,9 +555,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
|
fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
|
||||||
use bumparaw_collections::value::Number;
|
use raw_collections::value::Number;
|
||||||
use bumparaw_collections::Value;
|
use raw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
|
Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
|
||||||
Value::Number(number) => match number {
|
Value::Number(number) => match number {
|
||||||
@@ -580,41 +576,34 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn is_scalar(&self) -> bool {
|
fn is_scalar(&self) -> bool {
|
||||||
use bumparaw_collections::Value;
|
use raw_collections::Value;
|
||||||
|
|
||||||
matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
|
matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
|
fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
|
||||||
if let Value::Array(array) = &self.value {
|
if let raw_collections::Value::Array(array) = &self.value {
|
||||||
return Some(ParseableArray::as_parseable(array) as _);
|
return Some(ParseableArray::as_parseable(array) as _);
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_array(&self) -> bool {
|
fn is_array(&self) -> bool {
|
||||||
matches!(&self.value, bumparaw_collections::Value::Array(_))
|
matches!(&self.value, raw_collections::Value::Array(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||||
if let Value::Object(object) = &self.value {
|
if let raw_collections::Value::Object(object) = &self.value {
|
||||||
return Some(ParseableMap::as_parseable(object) as _);
|
return Some(ParseableMap::as_parseable(object) as _);
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_object(&self) -> bool {
|
fn is_object(&self) -> bool {
|
||||||
matches!(&self.value, bumparaw_collections::Value::Object(_))
|
matches!(&self.value, raw_collections::Value::Object(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_nil(&self) -> bool {
|
fn is_nil(&self) -> bool {
|
||||||
matches!(&self.value, bumparaw_collections::Value::Null)
|
matches!(&self.value, raw_collections::Value::Null)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Debug for ParseableValue<'_> {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
f.debug_struct("ParseableValue").field("value", &self.value).finish()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -38,16 +38,6 @@ pub struct RenderPromptError {
|
|||||||
pub fault: FaultSource,
|
pub fault: FaultSource,
|
||||||
}
|
}
|
||||||
impl RenderPromptError {
|
impl RenderPromptError {
|
||||||
pub(crate) fn missing_context_with_external_docid(
|
|
||||||
external_docid: String,
|
|
||||||
inner: liquid::Error,
|
|
||||||
) -> RenderPromptError {
|
|
||||||
Self {
|
|
||||||
kind: RenderPromptErrorKind::MissingContextWithExternalDocid(external_docid, inner),
|
|
||||||
fault: FaultSource::User,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
|
pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
|
||||||
Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
|
Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
|
||||||
}
|
}
|
||||||
@@ -57,8 +47,6 @@ impl RenderPromptError {
|
|||||||
pub enum RenderPromptErrorKind {
|
pub enum RenderPromptErrorKind {
|
||||||
#[error("missing field in document: {0}")]
|
#[error("missing field in document: {0}")]
|
||||||
MissingContext(liquid::Error),
|
MissingContext(liquid::Error),
|
||||||
#[error("missing field in document `{0}`: {1}")]
|
|
||||||
MissingContextWithExternalDocid(String, liquid::Error),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<RenderPromptError> for crate::Error {
|
impl From<RenderPromptError> for crate::Error {
|
||||||
|
|||||||
@@ -119,7 +119,6 @@ impl Prompt {
|
|||||||
'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents
|
'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents
|
||||||
>(
|
>(
|
||||||
&self,
|
&self,
|
||||||
external_docid: &str,
|
|
||||||
document: impl crate::update::new::document::Document<'a> + Debug,
|
document: impl crate::update::new::document::Document<'a> + Debug,
|
||||||
field_id_map: &RefCell<GlobalFieldsIdsMap>,
|
field_id_map: &RefCell<GlobalFieldsIdsMap>,
|
||||||
doc_alloc: &'doc Bump,
|
doc_alloc: &'doc Bump,
|
||||||
@@ -131,12 +130,9 @@ impl Prompt {
|
|||||||
self.max_bytes.unwrap_or_else(default_max_bytes).get(),
|
self.max_bytes.unwrap_or_else(default_max_bytes).get(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
);
|
);
|
||||||
self.template.render_to(&mut rendered, &context).map_err(|liquid_error| {
|
self.template
|
||||||
RenderPromptError::missing_context_with_external_docid(
|
.render_to(&mut rendered, &context)
|
||||||
external_docid.to_owned(),
|
.map_err(RenderPromptError::missing_context)?;
|
||||||
liquid_error,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
Ok(std::str::from_utf8(rendered.into_bump_slice())
|
Ok(std::str::from_utf8(rendered.into_bump_slice())
|
||||||
.expect("render can only write UTF-8 because all inputs and processing preserve utf-8"))
|
.expect("render can only write UTF-8 because all inputs and processing preserve utf-8"))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -219,19 +219,12 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
||||||
|
|
||||||
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
|
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
|
||||||
let optional_original_string =
|
let original_string = self
|
||||||
self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?;
|
.index
|
||||||
|
.field_id_docid_facet_strings
|
||||||
let original_string = match optional_original_string {
|
.get(self.rtxn, &key)?
|
||||||
Some(original_string) => original_string.to_owned(),
|
.unwrap()
|
||||||
None => {
|
.to_owned();
|
||||||
tracing::error!(
|
|
||||||
"Missing original facet string. Using the normalized facet {} instead",
|
|
||||||
facet_key
|
|
||||||
);
|
|
||||||
facet_key.to_string()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
distribution.insert(original_string, nbr_docids);
|
distribution.insert(original_string, nbr_docids);
|
||||||
if distribution.len() == self.max_values_per_facet {
|
if distribution.len() == self.max_values_per_facet {
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ use bumpalo::Bump;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{btreemap, hashset};
|
use maplit::{btreemap, hashset};
|
||||||
|
|
||||||
use crate::progress::Progress;
|
|
||||||
use crate::update::new::indexer;
|
use crate::update::new::indexer;
|
||||||
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use crate::vector::EmbeddingConfigs;
|
use crate::vector::EmbeddingConfigs;
|
||||||
@@ -73,7 +72,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -92,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -79,29 +79,22 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
|||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
use std::ops::Bound;
|
|
||||||
|
|
||||||
use grenad::Merger;
|
use grenad::Merger;
|
||||||
use heed::types::{Bytes, DecodeIgnore};
|
use heed::types::{Bytes, DecodeIgnore};
|
||||||
use heed::BytesDecode as _;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use self::incremental::FacetsUpdateIncremental;
|
use self::incremental::FacetsUpdateIncremental;
|
||||||
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
|
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
|
|
||||||
};
|
|
||||||
use crate::heed_codec::BytesRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::search::facet::get_highest_level;
|
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||||
use crate::{try_split_array_at, FieldId, Index, Result};
|
use crate::{try_split_array_at, FieldId, Index, Result};
|
||||||
|
|
||||||
pub mod bulk;
|
pub mod bulk;
|
||||||
pub mod incremental;
|
pub mod incremental;
|
||||||
pub mod new_incremental;
|
|
||||||
|
|
||||||
/// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
|
/// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
|
||||||
///
|
///
|
||||||
@@ -653,194 +646,3 @@ mod comparison_bench {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run sanity checks on the specified fid tree
|
|
||||||
///
|
|
||||||
/// 1. No "orphan" child value, any child value has a parent
|
|
||||||
/// 2. Any docid in the child appears in the parent
|
|
||||||
/// 3. No docid in the parent is missing from all its children
|
|
||||||
/// 4. no group is bigger than max_group_size
|
|
||||||
/// 5. Less than 50% of groups are bigger than group_size
|
|
||||||
/// 6. group size matches the number of children
|
|
||||||
/// 7. max_level is < 255
|
|
||||||
pub(crate) fn sanity_checks(
|
|
||||||
index: &Index,
|
|
||||||
rtxn: &heed::RoTxn,
|
|
||||||
field_id: FieldId,
|
|
||||||
facet_type: FacetType,
|
|
||||||
group_size: usize,
|
|
||||||
_min_level_size: usize, // might add a check on level size later
|
|
||||||
max_group_size: usize,
|
|
||||||
) -> Result<()> {
|
|
||||||
tracing::info!(%field_id, ?facet_type, "performing sanity checks");
|
|
||||||
let database = match facet_type {
|
|
||||||
FacetType::String => {
|
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
|
||||||
}
|
|
||||||
FacetType::Number => {
|
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let leaf_prefix: FacetGroupKey<&[u8]> = FacetGroupKey { field_id, level: 0, left_bound: &[] };
|
|
||||||
|
|
||||||
let leaf_it = database.prefix_iter(rtxn, &leaf_prefix)?;
|
|
||||||
|
|
||||||
let max_level = get_highest_level(rtxn, database, field_id)?;
|
|
||||||
if max_level == u8::MAX {
|
|
||||||
panic!("max_level == 255");
|
|
||||||
}
|
|
||||||
|
|
||||||
for leaf in leaf_it {
|
|
||||||
let (leaf_facet_value, leaf_docids) = leaf?;
|
|
||||||
let mut current_level = 0;
|
|
||||||
|
|
||||||
let mut current_parent_facet_value: Option<FacetGroupKey<&[u8]>> = None;
|
|
||||||
let mut current_parent_docids: Option<crate::heed_codec::facet::FacetGroupValue> = None;
|
|
||||||
loop {
|
|
||||||
current_level += 1;
|
|
||||||
if current_level >= max_level {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let parent_key_right_bound = FacetGroupKey {
|
|
||||||
field_id,
|
|
||||||
level: current_level,
|
|
||||||
left_bound: leaf_facet_value.left_bound,
|
|
||||||
};
|
|
||||||
let (parent_facet_value, parent_docids) = database
|
|
||||||
.get_lower_than_or_equal_to(rtxn, &parent_key_right_bound)?
|
|
||||||
.expect("no parent found");
|
|
||||||
if parent_facet_value.level != current_level {
|
|
||||||
panic!(
|
|
||||||
"wrong parent level, found_level={}, expected_level={}",
|
|
||||||
parent_facet_value.level, current_level
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if parent_facet_value.field_id != field_id {
|
|
||||||
panic!("wrong parent fid");
|
|
||||||
}
|
|
||||||
if parent_facet_value.left_bound > leaf_facet_value.left_bound {
|
|
||||||
panic!("wrong parent left bound");
|
|
||||||
}
|
|
||||||
|
|
||||||
if !leaf_docids.bitmap.is_subset(&parent_docids.bitmap) {
|
|
||||||
panic!(
|
|
||||||
"missing docids from leaf in parent, current_level={}, parent={}, child={}, missing={missing:?}, child_len={}, child={:?}",
|
|
||||||
current_level,
|
|
||||||
facet_to_string(parent_facet_value.left_bound, facet_type),
|
|
||||||
facet_to_string(leaf_facet_value.left_bound, facet_type),
|
|
||||||
leaf_docids.bitmap.len(),
|
|
||||||
leaf_docids.bitmap.clone(),
|
|
||||||
missing=leaf_docids.bitmap - parent_docids.bitmap,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(current_parent_facet_value) = current_parent_facet_value {
|
|
||||||
if current_parent_facet_value.field_id != parent_facet_value.field_id {
|
|
||||||
panic!("wrong parent parent fid");
|
|
||||||
}
|
|
||||||
if current_parent_facet_value.level + 1 != parent_facet_value.level {
|
|
||||||
panic!("wrong parent parent level");
|
|
||||||
}
|
|
||||||
if current_parent_facet_value.left_bound < parent_facet_value.left_bound {
|
|
||||||
panic!("wrong parent parent left bound");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(current_parent_docids) = current_parent_docids {
|
|
||||||
if !current_parent_docids.bitmap.is_subset(&parent_docids.bitmap) {
|
|
||||||
panic!("missing docids from intermediate node in parent, parent_level={}, parent={}, intermediate={}, missing={missing:?}, intermediate={:?}",
|
|
||||||
parent_facet_value.level,
|
|
||||||
facet_to_string(parent_facet_value.left_bound, facet_type),
|
|
||||||
facet_to_string(current_parent_facet_value.unwrap().left_bound, facet_type),
|
|
||||||
current_parent_docids.bitmap.clone(),
|
|
||||||
missing=current_parent_docids.bitmap - parent_docids.bitmap,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
current_parent_facet_value = Some(parent_facet_value);
|
|
||||||
current_parent_docids = Some(parent_docids);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tracing::info!(%field_id, ?facet_type, "checked all leaves");
|
|
||||||
|
|
||||||
let mut current_level = max_level;
|
|
||||||
let mut greater_than_group = 0usize;
|
|
||||||
let mut total = 0usize;
|
|
||||||
loop {
|
|
||||||
if current_level == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let child_level = current_level - 1;
|
|
||||||
tracing::info!(%field_id, ?facet_type, %current_level, "checked groups for level");
|
|
||||||
let level_groups_prefix: FacetGroupKey<&[u8]> =
|
|
||||||
FacetGroupKey { field_id, level: current_level, left_bound: &[] };
|
|
||||||
let mut level_groups_it = database.prefix_iter(rtxn, &level_groups_prefix)?.peekable();
|
|
||||||
|
|
||||||
'group_it: loop {
|
|
||||||
let Some(group) = level_groups_it.next() else { break 'group_it };
|
|
||||||
|
|
||||||
let (group_facet_value, group_docids) = group?;
|
|
||||||
let child_left_bound = group_facet_value.left_bound.to_owned();
|
|
||||||
let mut expected_docids = RoaringBitmap::new();
|
|
||||||
let mut expected_size = 0usize;
|
|
||||||
let right_bound = level_groups_it
|
|
||||||
.peek()
|
|
||||||
.and_then(|res| res.as_ref().ok())
|
|
||||||
.map(|(key, _)| key.left_bound);
|
|
||||||
let child_left_bound = FacetGroupKey {
|
|
||||||
field_id,
|
|
||||||
level: child_level,
|
|
||||||
left_bound: child_left_bound.as_slice(),
|
|
||||||
};
|
|
||||||
let child_left_bound = Bound::Included(&child_left_bound);
|
|
||||||
let child_right_bound;
|
|
||||||
let child_right_bound = if let Some(right_bound) = right_bound {
|
|
||||||
child_right_bound =
|
|
||||||
FacetGroupKey { field_id, level: child_level, left_bound: right_bound };
|
|
||||||
Bound::Excluded(&child_right_bound)
|
|
||||||
} else {
|
|
||||||
Bound::Unbounded
|
|
||||||
};
|
|
||||||
let children = database.range(rtxn, &(child_left_bound, child_right_bound))?;
|
|
||||||
for child in children {
|
|
||||||
let (child_facet_value, child_docids) = child?;
|
|
||||||
if child_facet_value.field_id != field_id {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if child_facet_value.level != child_level {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
expected_size += 1;
|
|
||||||
expected_docids |= &child_docids.bitmap;
|
|
||||||
}
|
|
||||||
assert_eq!(expected_size, group_docids.size as usize);
|
|
||||||
assert!(expected_size <= max_group_size);
|
|
||||||
assert_eq!(expected_docids, group_docids.bitmap);
|
|
||||||
total += 1;
|
|
||||||
if expected_size > group_size {
|
|
||||||
greater_than_group += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
current_level -= 1;
|
|
||||||
}
|
|
||||||
if greater_than_group * 2 > total {
|
|
||||||
panic!("too many groups have a size > group_size");
|
|
||||||
}
|
|
||||||
|
|
||||||
tracing::info!("sanity checks OK");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn facet_to_string(facet_value: &[u8], facet_type: FacetType) -> String {
|
|
||||||
match facet_type {
|
|
||||||
FacetType::String => bstr::BStr::new(facet_value).to_string(),
|
|
||||||
FacetType::Number => match OrderedF64Codec::bytes_decode(facet_value) {
|
|
||||||
Ok(value) => value.to_string(),
|
|
||||||
Err(e) => format!("error: {e} (bytes: {facet_value:?}"),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,498 +0,0 @@
|
|||||||
use std::ops::Bound;
|
|
||||||
|
|
||||||
use heed::types::{Bytes, DecodeIgnore};
|
|
||||||
use heed::{BytesDecode as _, Database, RwTxn};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::heed_codec::facet::{
|
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
|
||||||
};
|
|
||||||
use crate::heed_codec::BytesRefCodec;
|
|
||||||
use crate::search::facet::get_highest_level;
|
|
||||||
use crate::update::valid_facet_value;
|
|
||||||
use crate::{FieldId, Index, Result};
|
|
||||||
|
|
||||||
pub struct FacetsUpdateIncremental {
|
|
||||||
inner: FacetsUpdateIncrementalInner,
|
|
||||||
delta_data: Vec<FacetFieldIdChange>,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct FacetsUpdateIncrementalInner {
|
|
||||||
db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
|
||||||
field_id: FieldId,
|
|
||||||
group_size: u8,
|
|
||||||
min_level_size: u8,
|
|
||||||
max_group_size: u8,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FacetsUpdateIncremental {
|
|
||||||
pub fn new(
|
|
||||||
index: &Index,
|
|
||||||
facet_type: FacetType,
|
|
||||||
field_id: FieldId,
|
|
||||||
delta_data: Vec<FacetFieldIdChange>,
|
|
||||||
group_size: u8,
|
|
||||||
min_level_size: u8,
|
|
||||||
max_group_size: u8,
|
|
||||||
) -> Self {
|
|
||||||
FacetsUpdateIncremental {
|
|
||||||
inner: FacetsUpdateIncrementalInner {
|
|
||||||
db: match facet_type {
|
|
||||||
FacetType::String => index
|
|
||||||
.facet_id_string_docids
|
|
||||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
|
||||||
FacetType::Number => index
|
|
||||||
.facet_id_f64_docids
|
|
||||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
|
||||||
},
|
|
||||||
field_id,
|
|
||||||
group_size,
|
|
||||||
min_level_size,
|
|
||||||
max_group_size,
|
|
||||||
},
|
|
||||||
|
|
||||||
delta_data,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")]
|
|
||||||
pub fn execute(mut self, wtxn: &mut RwTxn) -> Result<()> {
|
|
||||||
if self.delta_data.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
self.delta_data.sort_unstable_by(
|
|
||||||
|FacetFieldIdChange { facet_value: left, .. },
|
|
||||||
FacetFieldIdChange { facet_value: right, .. }| {
|
|
||||||
left.cmp(right)
|
|
||||||
// sort in **reverse** lexicographic order
|
|
||||||
.reverse()
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
self.inner.find_changed_parents(wtxn, self.delta_data)?;
|
|
||||||
|
|
||||||
self.inner.add_or_delete_level(wtxn)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FacetsUpdateIncrementalInner {
|
|
||||||
/// WARNING: `changed_children` must be sorted in **reverse** lexicographic order.
|
|
||||||
fn find_changed_parents(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
mut changed_children: Vec<FacetFieldIdChange>,
|
|
||||||
) -> Result<()> {
|
|
||||||
let mut changed_parents = vec![];
|
|
||||||
for child_level in 0u8..u8::MAX {
|
|
||||||
// child_level < u8::MAX by construction
|
|
||||||
let parent_level = child_level + 1;
|
|
||||||
let parent_level_left_bound: FacetGroupKey<&[u8]> =
|
|
||||||
FacetGroupKey { field_id: self.field_id, level: parent_level, left_bound: &[] };
|
|
||||||
|
|
||||||
let mut last_parent: Option<Box<[u8]>> = None;
|
|
||||||
let mut child_it = changed_children
|
|
||||||
// drain all changed children
|
|
||||||
.drain(..)
|
|
||||||
// keep only children whose value is valid in the LMDB sense
|
|
||||||
.filter(|child| valid_facet_value(&child.facet_value));
|
|
||||||
// `while let` rather than `for` because we advance `child_it` inside of the loop
|
|
||||||
'current_level: while let Some(child) = child_it.next() {
|
|
||||||
if let Some(last_parent) = &last_parent {
|
|
||||||
if &child.facet_value >= last_parent {
|
|
||||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
|
||||||
continue 'current_level;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// need to find a new parent
|
|
||||||
let parent_key_prefix = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: parent_level,
|
|
||||||
left_bound: &*child.facet_value,
|
|
||||||
};
|
|
||||||
|
|
||||||
let parent = self
|
|
||||||
.db
|
|
||||||
.remap_data_type::<DecodeIgnore>()
|
|
||||||
.rev_range(
|
|
||||||
wtxn,
|
|
||||||
&(
|
|
||||||
Bound::Excluded(&parent_level_left_bound),
|
|
||||||
Bound::Included(&parent_key_prefix),
|
|
||||||
),
|
|
||||||
)?
|
|
||||||
.next();
|
|
||||||
|
|
||||||
match parent {
|
|
||||||
Some(Ok((parent_key, _parent_value))) => {
|
|
||||||
// found parent, cache it for next keys
|
|
||||||
last_parent = Some(parent_key.left_bound.to_owned().into_boxed_slice());
|
|
||||||
|
|
||||||
// add to modified list for parent level
|
|
||||||
changed_parents.push(FacetFieldIdChange {
|
|
||||||
facet_value: parent_key.left_bound.to_owned().into_boxed_slice(),
|
|
||||||
});
|
|
||||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
|
||||||
}
|
|
||||||
Some(Err(err)) => return Err(err.into()),
|
|
||||||
None => {
|
|
||||||
// no parent for that key
|
|
||||||
let mut parent_it = self
|
|
||||||
.db
|
|
||||||
.remap_data_type::<DecodeIgnore>()
|
|
||||||
.prefix_iter_mut(wtxn, &parent_level_left_bound)?;
|
|
||||||
match parent_it.next() {
|
|
||||||
// 1. left of the current left bound, or
|
|
||||||
Some(Ok((first_key, _first_value))) => {
|
|
||||||
// make sure we don't spill on the neighboring fid (level also included defensively)
|
|
||||||
if first_key.field_id != self.field_id
|
|
||||||
|| first_key.level != parent_level
|
|
||||||
{
|
|
||||||
// max level reached, exit
|
|
||||||
drop(parent_it);
|
|
||||||
self.compute_parent_group(
|
|
||||||
wtxn,
|
|
||||||
child_level,
|
|
||||||
child.facet_value,
|
|
||||||
)?;
|
|
||||||
for child in child_it.by_ref() {
|
|
||||||
self.compute_parent_group(
|
|
||||||
wtxn,
|
|
||||||
child_level,
|
|
||||||
child.facet_value,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
// remove old left bound
|
|
||||||
unsafe { parent_it.del_current()? };
|
|
||||||
drop(parent_it);
|
|
||||||
changed_parents.push(FacetFieldIdChange {
|
|
||||||
facet_value: child.facet_value.clone(),
|
|
||||||
});
|
|
||||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
|
||||||
// pop all elements in order to visit the new left bound
|
|
||||||
let new_left_bound =
|
|
||||||
&mut changed_parents.last_mut().unwrap().facet_value;
|
|
||||||
for child in child_it.by_ref() {
|
|
||||||
new_left_bound.clone_from(&child.facet_value);
|
|
||||||
|
|
||||||
self.compute_parent_group(
|
|
||||||
wtxn,
|
|
||||||
child_level,
|
|
||||||
child.facet_value,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(Err(err)) => return Err(err.into()),
|
|
||||||
// 2. max level reached, exit
|
|
||||||
None => {
|
|
||||||
drop(parent_it);
|
|
||||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
|
||||||
for child in child_it.by_ref() {
|
|
||||||
self.compute_parent_group(
|
|
||||||
wtxn,
|
|
||||||
child_level,
|
|
||||||
child.facet_value,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if changed_parents.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
drop(child_it);
|
|
||||||
std::mem::swap(&mut changed_children, &mut changed_parents);
|
|
||||||
// changed_parents is now empty because changed_children was emptied by the drain
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn compute_parent_group(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn<'_>,
|
|
||||||
parent_level: u8,
|
|
||||||
parent_left_bound: Box<[u8]>,
|
|
||||||
) -> Result<()> {
|
|
||||||
let mut range_left_bound: Vec<u8> = parent_left_bound.into();
|
|
||||||
if parent_level == 0 {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let child_level = parent_level - 1;
|
|
||||||
|
|
||||||
let parent_key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: parent_level,
|
|
||||||
left_bound: &*range_left_bound,
|
|
||||||
};
|
|
||||||
let child_right_bound = self
|
|
||||||
.db
|
|
||||||
.remap_data_type::<DecodeIgnore>()
|
|
||||||
.get_greater_than(wtxn, &parent_key)?
|
|
||||||
.and_then(
|
|
||||||
|(
|
|
||||||
FacetGroupKey {
|
|
||||||
level: right_level,
|
|
||||||
field_id: right_fid,
|
|
||||||
left_bound: right_bound,
|
|
||||||
},
|
|
||||||
_,
|
|
||||||
)| {
|
|
||||||
if parent_level != right_level || self.field_id != right_fid {
|
|
||||||
// there was a greater key, but with a greater level or fid, so not a sibling to the parent: ignore
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some(right_bound.to_owned())
|
|
||||||
},
|
|
||||||
);
|
|
||||||
let child_right_bound = match &child_right_bound {
|
|
||||||
Some(right_bound) => Bound::Excluded(FacetGroupKey {
|
|
||||||
left_bound: right_bound.as_slice(),
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: child_level,
|
|
||||||
}),
|
|
||||||
None => Bound::Unbounded,
|
|
||||||
};
|
|
||||||
|
|
||||||
let child_left_key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: child_level,
|
|
||||||
left_bound: &*range_left_bound,
|
|
||||||
};
|
|
||||||
let mut child_left_bound = Bound::Included(child_left_key);
|
|
||||||
|
|
||||||
loop {
|
|
||||||
// do a first pass on the range to find the number of children
|
|
||||||
let child_count = self
|
|
||||||
.db
|
|
||||||
.remap_data_type::<DecodeIgnore>()
|
|
||||||
.range(wtxn, &(child_left_bound, child_right_bound))?
|
|
||||||
.take(self.max_group_size as usize * 2)
|
|
||||||
.count();
|
|
||||||
let mut child_it = self.db.range(wtxn, &(child_left_bound, child_right_bound))?;
|
|
||||||
|
|
||||||
// pick the right group_size depending on the number of children
|
|
||||||
let group_size = if child_count >= self.max_group_size as usize * 2 {
|
|
||||||
// more than twice the max_group_size => there will be space for at least 2 groups of max_group_size
|
|
||||||
self.max_group_size as usize
|
|
||||||
} else if child_count >= self.group_size as usize {
|
|
||||||
// size in [group_size, max_group_size * 2[
|
|
||||||
// divided by 2 it is between [group_size / 2, max_group_size[
|
|
||||||
// this ensures that the tree is balanced
|
|
||||||
child_count / 2
|
|
||||||
} else {
|
|
||||||
// take everything
|
|
||||||
child_count
|
|
||||||
};
|
|
||||||
|
|
||||||
let res: Result<_> = child_it
|
|
||||||
.by_ref()
|
|
||||||
.take(group_size)
|
|
||||||
// stop if we go to the next level or field id
|
|
||||||
.take_while(|res| match res {
|
|
||||||
Ok((child_key, _)) => {
|
|
||||||
child_key.field_id == self.field_id && child_key.level == child_level
|
|
||||||
}
|
|
||||||
Err(_) => true,
|
|
||||||
})
|
|
||||||
.try_fold(
|
|
||||||
(None, FacetGroupValue { size: 0, bitmap: Default::default() }),
|
|
||||||
|(bounds, mut group_value), child_res| {
|
|
||||||
let (child_key, child_value) = child_res?;
|
|
||||||
let bounds = match bounds {
|
|
||||||
Some((left_bound, _)) => Some((left_bound, child_key.left_bound)),
|
|
||||||
None => Some((child_key.left_bound, child_key.left_bound)),
|
|
||||||
};
|
|
||||||
// max_group_size <= u8::MAX
|
|
||||||
group_value.size += 1;
|
|
||||||
group_value.bitmap |= &child_value.bitmap;
|
|
||||||
Ok((bounds, group_value))
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
let (bounds, group_value) = res?;
|
|
||||||
|
|
||||||
let Some((group_left_bound, right_bound)) = bounds else {
|
|
||||||
let update_key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: parent_level,
|
|
||||||
left_bound: &*range_left_bound,
|
|
||||||
};
|
|
||||||
drop(child_it);
|
|
||||||
if let Bound::Included(_) = child_left_bound {
|
|
||||||
self.db.delete(wtxn, &update_key)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
|
|
||||||
drop(child_it);
|
|
||||||
let current_left_bound = group_left_bound.to_owned();
|
|
||||||
|
|
||||||
let delete_old_bound = match child_left_bound {
|
|
||||||
Bound::Included(bound) => {
|
|
||||||
if bound.left_bound != current_left_bound {
|
|
||||||
Some(range_left_bound.clone())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
range_left_bound.clear();
|
|
||||||
range_left_bound.extend_from_slice(right_bound);
|
|
||||||
let child_left_key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: child_level,
|
|
||||||
left_bound: range_left_bound.as_slice(),
|
|
||||||
};
|
|
||||||
child_left_bound = Bound::Excluded(child_left_key);
|
|
||||||
|
|
||||||
if let Some(old_bound) = delete_old_bound {
|
|
||||||
let update_key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: parent_level,
|
|
||||||
left_bound: old_bound.as_slice(),
|
|
||||||
};
|
|
||||||
self.db.delete(wtxn, &update_key)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let update_key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: parent_level,
|
|
||||||
left_bound: current_left_bound.as_slice(),
|
|
||||||
};
|
|
||||||
if group_value.bitmap.is_empty() {
|
|
||||||
self.db.delete(wtxn, &update_key)?;
|
|
||||||
} else {
|
|
||||||
self.db.put(wtxn, &update_key, &group_value)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the highest level has exceeded `min_level_size` * `self.group_size`.
|
|
||||||
/// If it has, we must build an addition level above it.
|
|
||||||
/// Then check whether the highest level is under `min_level_size`.
|
|
||||||
/// If it has, we must remove the complete level.
|
|
||||||
pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn<'_>) -> Result<()> {
|
|
||||||
let highest_level = get_highest_level(txn, self.db, self.field_id)?;
|
|
||||||
let mut highest_level_prefix = vec![];
|
|
||||||
highest_level_prefix.extend_from_slice(&self.field_id.to_be_bytes());
|
|
||||||
highest_level_prefix.push(highest_level);
|
|
||||||
|
|
||||||
let size_highest_level =
|
|
||||||
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
|
|
||||||
|
|
||||||
if size_highest_level >= self.group_size as usize * self.min_level_size as usize {
|
|
||||||
self.add_level(txn, highest_level, &highest_level_prefix, size_highest_level)
|
|
||||||
} else if size_highest_level < self.min_level_size as usize && highest_level != 0 {
|
|
||||||
self.delete_level(txn, &highest_level_prefix)
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Delete a level.
|
|
||||||
fn delete_level(&self, txn: &mut RwTxn<'_>, highest_level_prefix: &[u8]) -> Result<()> {
|
|
||||||
let mut to_delete = vec![];
|
|
||||||
let mut iter =
|
|
||||||
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, highest_level_prefix)?;
|
|
||||||
for el in iter.by_ref() {
|
|
||||||
let (k, _) = el?;
|
|
||||||
to_delete.push(
|
|
||||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
|
|
||||||
.map_err(heed::Error::Encoding)?
|
|
||||||
.into_owned(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
drop(iter);
|
|
||||||
for k in to_delete {
|
|
||||||
self.db.delete(txn, &k.as_ref())?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build an additional level for the field id.
|
|
||||||
fn add_level(
|
|
||||||
&self,
|
|
||||||
txn: &mut RwTxn<'_>,
|
|
||||||
highest_level: u8,
|
|
||||||
highest_level_prefix: &[u8],
|
|
||||||
size_highest_level: usize,
|
|
||||||
) -> Result<()> {
|
|
||||||
let mut groups_iter = self
|
|
||||||
.db
|
|
||||||
.remap_types::<Bytes, FacetGroupValueCodec>()
|
|
||||||
.prefix_iter(txn, highest_level_prefix)?;
|
|
||||||
|
|
||||||
let nbr_new_groups = size_highest_level / self.group_size as usize;
|
|
||||||
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
|
|
||||||
|
|
||||||
let mut to_add = vec![];
|
|
||||||
for _ in 0..nbr_new_groups {
|
|
||||||
let mut first_key = None;
|
|
||||||
let mut values = RoaringBitmap::new();
|
|
||||||
for _ in 0..self.group_size {
|
|
||||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
|
||||||
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
|
||||||
.map_err(heed::Error::Encoding)?;
|
|
||||||
|
|
||||||
if first_key.is_none() {
|
|
||||||
first_key = Some(key_i);
|
|
||||||
}
|
|
||||||
values |= value_i.bitmap;
|
|
||||||
}
|
|
||||||
let key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: highest_level + 1,
|
|
||||||
left_bound: first_key.unwrap().left_bound,
|
|
||||||
};
|
|
||||||
let value = FacetGroupValue { size: self.group_size, bitmap: values };
|
|
||||||
to_add.push((key.into_owned(), value));
|
|
||||||
}
|
|
||||||
// now we add the rest of the level, in case its size is > group_size * min_level_size
|
|
||||||
// this can indeed happen if the min_level_size parameter changes between two calls to `insert`
|
|
||||||
if nbr_leftover_elements > 0 {
|
|
||||||
let mut first_key = None;
|
|
||||||
let mut values = RoaringBitmap::new();
|
|
||||||
for _ in 0..nbr_leftover_elements {
|
|
||||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
|
||||||
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
|
||||||
.map_err(heed::Error::Encoding)?;
|
|
||||||
|
|
||||||
if first_key.is_none() {
|
|
||||||
first_key = Some(key_i);
|
|
||||||
}
|
|
||||||
values |= value_i.bitmap;
|
|
||||||
}
|
|
||||||
let key = FacetGroupKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: highest_level + 1,
|
|
||||||
left_bound: first_key.unwrap().left_bound,
|
|
||||||
};
|
|
||||||
// Note: nbr_leftover_elements can be casted to a u8 since it is bounded by `max_group_size`
|
|
||||||
// when it is created above.
|
|
||||||
let value = FacetGroupValue { size: nbr_leftover_elements as u8, bitmap: values };
|
|
||||||
to_add.push((key.into_owned(), value));
|
|
||||||
}
|
|
||||||
|
|
||||||
drop(groups_iter);
|
|
||||||
for (key, value) in to_add {
|
|
||||||
self.db.put(txn, &key.as_ref(), &value)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct FacetFieldIdChange {
|
|
||||||
pub facet_value: Box<[u8]>,
|
|
||||||
}
|
|
||||||
@@ -10,14 +10,10 @@ use fst::{IntoStreamer, Streamer};
|
|||||||
pub use grenad_helpers::*;
|
pub use grenad_helpers::*;
|
||||||
pub use merge_functions::*;
|
pub use merge_functions::*;
|
||||||
|
|
||||||
use crate::MAX_LMDB_KEY_LENGTH;
|
use crate::MAX_WORD_LENGTH;
|
||||||
|
|
||||||
pub fn valid_lmdb_key(key: impl AsRef<[u8]>) -> bool {
|
pub fn valid_lmdb_key(key: impl AsRef<[u8]>) -> bool {
|
||||||
key.as_ref().len() <= MAX_LMDB_KEY_LENGTH - 3 && !key.as_ref().is_empty()
|
key.as_ref().len() <= MAX_WORD_LENGTH * 2 && !key.as_ref().is_empty()
|
||||||
}
|
|
||||||
|
|
||||||
pub fn valid_facet_value(facet_value: impl AsRef<[u8]>) -> bool {
|
|
||||||
facet_value.as_ref().len() <= MAX_LMDB_KEY_LENGTH - 3 && !facet_value.as_ref().is_empty()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Divides one slice into two at an index, returns `None` if mid is out of bounds.
|
/// Divides one slice into two at an index, returns `None` if mid is out of bounds.
|
||||||
|
|||||||
@@ -766,7 +766,6 @@ mod tests {
|
|||||||
use crate::documents::mmap_from_objects;
|
use crate::documents::mmap_from_objects;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::progress::Progress;
|
|
||||||
use crate::search::TermsMatchingStrategy;
|
use crate::search::TermsMatchingStrategy;
|
||||||
use crate::update::new::indexer;
|
use crate::update::new::indexer;
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
@@ -1965,7 +1964,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2149,7 +2148,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2164,7 +2163,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2211,7 +2210,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2226,7 +2225,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2264,7 +2263,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2279,7 +2278,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2316,7 +2315,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2331,7 +2330,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2370,7 +2369,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2385,7 +2384,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2429,7 +2428,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2444,7 +2443,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2481,7 +2480,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2496,7 +2495,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2533,7 +2532,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2548,7 +2547,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2727,7 +2726,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2742,7 +2741,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2786,7 +2785,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2801,7 +2800,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -2842,7 +2841,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -2857,7 +2856,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@@ -3334,44 +3333,6 @@ mod tests {
|
|||||||
rtxn.commit().unwrap();
|
rtxn.commit().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn incremental_update_without_changing_facet_distribution() {
|
|
||||||
let index = TempIndex::new();
|
|
||||||
index
|
|
||||||
.add_documents(documents!([
|
|
||||||
{"id": 0, "some_field": "aaa", "other_field": "aaa" },
|
|
||||||
{"id": 1, "some_field": "bbb", "other_field": "bbb" },
|
|
||||||
]))
|
|
||||||
.unwrap();
|
|
||||||
{
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
|
||||||
// count field distribution
|
|
||||||
let results = index.field_distribution(&rtxn).unwrap();
|
|
||||||
assert_eq!(Some(&2), results.get("id"));
|
|
||||||
assert_eq!(Some(&2), results.get("some_field"));
|
|
||||||
assert_eq!(Some(&2), results.get("other_field"));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut index = index;
|
|
||||||
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
|
||||||
|
|
||||||
index
|
|
||||||
.add_documents(documents!([
|
|
||||||
{"id": 0, "other_field": "bbb" },
|
|
||||||
{"id": 1, "some_field": "ccc" },
|
|
||||||
]))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
{
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
|
||||||
// count field distribution
|
|
||||||
let results = index.field_distribution(&rtxn).unwrap();
|
|
||||||
assert_eq!(Some(&2), results.get("id"));
|
|
||||||
assert_eq!(Some(&2), results.get("some_field"));
|
|
||||||
assert_eq!(Some(&2), results.get("other_field"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn delete_words_exact_attributes() {
|
fn delete_words_exact_attributes() {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
source: crates/milli/src/update/index_documents/mod.rs
|
source: milli/src/update/index_documents/mod.rs
|
||||||
---
|
---
|
||||||
3 0 48.9021 1 [19, ]
|
3 0 48.9021 1 [19, ]
|
||||||
3 0 49.9314 1 [17, ]
|
3 0 49.9314 1 [17, ]
|
||||||
@@ -15,11 +15,6 @@ source: crates/milli/src/update/index_documents/mod.rs
|
|||||||
3 0 50.7453 1 [7, ]
|
3 0 50.7453 1 [7, ]
|
||||||
3 0 50.8466 1 [10, ]
|
3 0 50.8466 1 [10, ]
|
||||||
3 0 51.0537 1 [9, ]
|
3 0 51.0537 1 [9, ]
|
||||||
3 1 48.9021 2 [17, 19, ]
|
|
||||||
3 1 50.1793 3 [13, 14, 15, ]
|
|
||||||
3 1 50.4502 4 [0, 3, 8, 12, ]
|
|
||||||
3 1 50.6312 2 [1, 2, ]
|
|
||||||
3 1 50.7453 3 [7, 9, 10, ]
|
|
||||||
4 0 2.271 1 [17, ]
|
4 0 2.271 1 [17, ]
|
||||||
4 0 2.3708 1 [19, ]
|
4 0 2.3708 1 [19, ]
|
||||||
4 0 2.7637 1 [14, ]
|
4 0 2.7637 1 [14, ]
|
||||||
@@ -33,3 +28,4 @@ source: crates/milli/src/update/index_documents/mod.rs
|
|||||||
4 0 3.6957 1 [9, ]
|
4 0 3.6957 1 [9, ]
|
||||||
4 0 3.9623 1 [12, ]
|
4 0 3.9623 1 [12, ]
|
||||||
4 0 4.337 1 [10, ]
|
4 0 4.337 1 [10, ]
|
||||||
|
|
||||||
|
|||||||
@@ -21,17 +21,15 @@ use super::ref_cell_ext::RefCellExt;
|
|||||||
use super::thread_local::{FullySend, ThreadLocal};
|
use super::thread_local::{FullySend, ThreadLocal};
|
||||||
use super::StdResult;
|
use super::StdResult;
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
||||||
|
use crate::heed_codec::StrBEU16Codec;
|
||||||
use crate::index::db_name;
|
use crate::index::db_name;
|
||||||
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
|
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
|
||||||
use crate::update::new::KvReaderFieldId;
|
use crate::update::new::KvReaderFieldId;
|
||||||
use crate::vector::Embedding;
|
use crate::vector::Embedding;
|
||||||
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
|
use crate::{
|
||||||
|
CboRoaringBitmapCodec, DocumentId, Error, FieldIdWordCountCodec, Index, InternalError,
|
||||||
/// Note that the FrameProducer requires up to 9 bytes to
|
U8StrStrCodec,
|
||||||
/// encode the length, the max grant has been computed accordingly.
|
};
|
||||||
///
|
|
||||||
/// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
|
|
||||||
const MAX_FRAME_HEADER_SIZE: usize = 9;
|
|
||||||
|
|
||||||
/// Creates a tuple of senders/receiver to be used by
|
/// Creates a tuple of senders/receiver to be used by
|
||||||
/// the extractors and the writer loop.
|
/// the extractors and the writer loop.
|
||||||
@@ -59,9 +57,8 @@ pub fn extractor_writer_bbqueue(
|
|||||||
bbbuffers.resize_with(current_num_threads, || BBBuffer::new(bbbuffer_capacity));
|
bbbuffers.resize_with(current_num_threads, || BBBuffer::new(bbbuffer_capacity));
|
||||||
|
|
||||||
let capacity = bbbuffers.first().unwrap().capacity();
|
let capacity = bbbuffers.first().unwrap().capacity();
|
||||||
// 1. Due to fragmentation in the bbbuffer, we can only accept up to half the capacity in a single message.
|
// Read the field description to understand this
|
||||||
// 2. Read the documentation for `MAX_FRAME_HEADER_SIZE` for more information about why it is here.
|
let capacity = capacity.checked_sub(9).unwrap();
|
||||||
let max_grant = capacity.saturating_div(2).checked_sub(MAX_FRAME_HEADER_SIZE).unwrap();
|
|
||||||
|
|
||||||
let producers = ThreadLocal::with_capacity(bbbuffers.len());
|
let producers = ThreadLocal::with_capacity(bbbuffers.len());
|
||||||
let consumers = rayon::broadcast(|bi| {
|
let consumers = rayon::broadcast(|bi| {
|
||||||
@@ -72,7 +69,7 @@ pub fn extractor_writer_bbqueue(
|
|||||||
});
|
});
|
||||||
|
|
||||||
let (sender, receiver) = flume::bounded(channel_capacity);
|
let (sender, receiver) = flume::bounded(channel_capacity);
|
||||||
let sender = ExtractorBbqueueSender { sender, producers, max_grant };
|
let sender = ExtractorBbqueueSender { sender, producers, capacity };
|
||||||
let receiver = WriterBbqueueReceiver {
|
let receiver = WriterBbqueueReceiver {
|
||||||
receiver,
|
receiver,
|
||||||
look_at_consumer: (0..consumers.len()).cycle(),
|
look_at_consumer: (0..consumers.len()).cycle(),
|
||||||
@@ -88,10 +85,13 @@ pub struct ExtractorBbqueueSender<'a> {
|
|||||||
/// A memory buffer, one by thread, is used to serialize
|
/// A memory buffer, one by thread, is used to serialize
|
||||||
/// the entries directly in this shared, lock-free space.
|
/// the entries directly in this shared, lock-free space.
|
||||||
producers: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
|
producers: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
|
||||||
/// The maximum frame grant that a producer can reserve.
|
/// The capacity of this frame producer, will never be able to store more than that.
|
||||||
/// It will never be able to store more than that as the
|
///
|
||||||
/// buffer cannot split data into two parts.
|
/// Note that the FrameProducer requires up to 9 bytes to encode the length,
|
||||||
max_grant: usize,
|
/// the capacity has been shrunk accordingly.
|
||||||
|
///
|
||||||
|
/// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
|
||||||
|
capacity: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct WriterBbqueueReceiver<'a> {
|
pub struct WriterBbqueueReceiver<'a> {
|
||||||
@@ -407,6 +407,32 @@ impl Database {
|
|||||||
Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S,
|
Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn stringify_key(&self, key: &[u8]) -> String {
|
||||||
|
use heed::types::*;
|
||||||
|
|
||||||
|
match self {
|
||||||
|
Database::WordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
|
||||||
|
Database::WordFidDocids => format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap()),
|
||||||
|
Database::WordPositionDocids => {
|
||||||
|
format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap())
|
||||||
|
}
|
||||||
|
Database::WordPairProximityDocids => {
|
||||||
|
format!("{:?}", U8StrStrCodec::bytes_decode(key).unwrap())
|
||||||
|
}
|
||||||
|
Database::ExactWordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
|
||||||
|
Database::FidWordCountDocids => {
|
||||||
|
format!("{:?}", FieldIdWordCountCodec::bytes_decode(key).unwrap())
|
||||||
|
}
|
||||||
|
Database::FieldIdDocidFacetStrings => {
|
||||||
|
format!("{:?}", FieldDocIdFacetStringCodec::bytes_decode(key).unwrap())
|
||||||
|
}
|
||||||
|
Database::FieldIdDocidFacetF64s => {
|
||||||
|
format!("{:?}", FieldDocIdFacetF64Codec::bytes_decode(key).unwrap())
|
||||||
|
}
|
||||||
|
d => unimplemented!("stringify_key for {:?}", d),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<FacetKind> for Database {
|
impl From<FacetKind> for Database {
|
||||||
@@ -447,14 +473,14 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
|
fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
|
||||||
let max_grant = self.max_grant;
|
let capacity = self.capacity;
|
||||||
let refcell = self.producers.get().unwrap();
|
let refcell = self.producers.get().unwrap();
|
||||||
let mut producer = refcell.0.borrow_mut_or_yield();
|
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||||
|
|
||||||
let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
|
let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
|
||||||
let total_length = EntryHeader::total_delete_vector_size();
|
let total_length = EntryHeader::total_delete_vector_size();
|
||||||
if total_length > max_grant {
|
if total_length > capacity {
|
||||||
panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)");
|
panic!("The entry is larger ({total_length} bytes) than the BBQueue capacity ({capacity} bytes)");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Spin loop to have a frame the size we requested.
|
// Spin loop to have a frame the size we requested.
|
||||||
@@ -472,7 +498,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
|||||||
embedder_id: u8,
|
embedder_id: u8,
|
||||||
embeddings: &[Vec<f32>],
|
embeddings: &[Vec<f32>],
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let max_grant = self.max_grant;
|
let capacity = self.capacity;
|
||||||
let refcell = self.producers.get().unwrap();
|
let refcell = self.producers.get().unwrap();
|
||||||
let mut producer = refcell.0.borrow_mut_or_yield();
|
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||||
|
|
||||||
@@ -483,7 +509,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
|||||||
let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
|
let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
|
||||||
let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
|
let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
|
||||||
let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
|
let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
|
||||||
if total_length > max_grant {
|
if total_length > capacity {
|
||||||
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
|
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
|
||||||
for embedding in embeddings {
|
for embedding in embeddings {
|
||||||
let mut embedding_bytes = bytemuck::cast_slice(embedding);
|
let mut embedding_bytes = bytemuck::cast_slice(embedding);
|
||||||
@@ -544,14 +570,14 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
|||||||
where
|
where
|
||||||
F: FnOnce(&mut [u8], &mut [u8]) -> crate::Result<()>,
|
F: FnOnce(&mut [u8], &mut [u8]) -> crate::Result<()>,
|
||||||
{
|
{
|
||||||
let max_grant = self.max_grant;
|
let capacity = self.capacity;
|
||||||
let refcell = self.producers.get().unwrap();
|
let refcell = self.producers.get().unwrap();
|
||||||
let mut producer = refcell.0.borrow_mut_or_yield();
|
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||||
|
|
||||||
let operation = DbOperation { database, key_length: Some(key_length) };
|
let operation = DbOperation { database, key_length: Some(key_length) };
|
||||||
let payload_header = EntryHeader::DbOperation(operation);
|
let payload_header = EntryHeader::DbOperation(operation);
|
||||||
let total_length = EntryHeader::total_key_value_size(key_length, value_length);
|
let total_length = EntryHeader::total_key_value_size(key_length, value_length);
|
||||||
if total_length > max_grant {
|
if total_length > capacity {
|
||||||
let mut key_buffer = vec![0; key_length.get() as usize].into_boxed_slice();
|
let mut key_buffer = vec![0; key_length.get() as usize].into_boxed_slice();
|
||||||
let value_file = tempfile::tempfile()?;
|
let value_file = tempfile::tempfile()?;
|
||||||
value_file.set_len(value_length.try_into().unwrap())?;
|
value_file.set_len(value_length.try_into().unwrap())?;
|
||||||
@@ -605,7 +631,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
|||||||
where
|
where
|
||||||
F: FnOnce(&mut [u8]) -> crate::Result<()>,
|
F: FnOnce(&mut [u8]) -> crate::Result<()>,
|
||||||
{
|
{
|
||||||
let max_grant = self.max_grant;
|
let capacity = self.capacity;
|
||||||
let refcell = self.producers.get().unwrap();
|
let refcell = self.producers.get().unwrap();
|
||||||
let mut producer = refcell.0.borrow_mut_or_yield();
|
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||||
|
|
||||||
@@ -614,8 +640,8 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
|||||||
let operation = DbOperation { database, key_length: None };
|
let operation = DbOperation { database, key_length: None };
|
||||||
let payload_header = EntryHeader::DbOperation(operation);
|
let payload_header = EntryHeader::DbOperation(operation);
|
||||||
let total_length = EntryHeader::total_key_size(key_length);
|
let total_length = EntryHeader::total_key_size(key_length);
|
||||||
if total_length > max_grant {
|
if total_length > capacity {
|
||||||
panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)");
|
panic!("The entry is larger ({total_length} bytes) than the BBQueue capacity ({capacity} bytes)");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Spin loop to have a frame the size we requested.
|
// Spin loop to have a frame the size we requested.
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
use std::collections::{BTreeMap, BTreeSet};
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
use bumparaw_collections::RawMap;
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rustc_hash::FxBuildHasher;
|
use raw_collections::RawMap;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::vector_document::VectorDocument;
|
use super::vector_document::VectorDocument;
|
||||||
@@ -386,12 +385,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue);
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Versions<'doc> {
|
pub struct Versions<'doc> {
|
||||||
data: RawMap<'doc, FxBuildHasher>,
|
data: RawMap<'doc>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> Versions<'doc> {
|
impl<'doc> Versions<'doc> {
|
||||||
pub fn multiple(
|
pub fn multiple(
|
||||||
mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
|
mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
|
||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let Some(data) = versions.next() else { return Ok(None) };
|
let Some(data) = versions.next() else { return Ok(None) };
|
||||||
let mut data = data?;
|
let mut data = data?;
|
||||||
@@ -404,7 +403,7 @@ impl<'doc> Versions<'doc> {
|
|||||||
Ok(Some(Self::single(data)))
|
Ok(Some(Self::single(data)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
|
pub fn single(version: RawMap<'doc>) -> Self {
|
||||||
Self { data: version }
|
Self { data: version }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -69,12 +69,12 @@ use std::io::BufReader;
|
|||||||
use std::{io, iter, mem};
|
use std::{io, iter, mem};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::bbbul::{BitPacker, BitPacker4x};
|
|
||||||
use bumparaw_collections::map::FrozenMap;
|
|
||||||
use bumparaw_collections::{Bbbul, FrozenBbbul};
|
|
||||||
use grenad::ReaderCursor;
|
use grenad::ReaderCursor;
|
||||||
use hashbrown::hash_map::RawEntryMut;
|
use hashbrown::hash_map::RawEntryMut;
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
|
use raw_collections::bbbul::{BitPacker, BitPacker4x};
|
||||||
|
use raw_collections::map::FrozenMap;
|
||||||
|
use raw_collections::{Bbbul, FrozenBbbul};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use rustc_hash::FxBuildHasher;
|
use rustc_hash::FxBuildHasher;
|
||||||
|
|
||||||
@@ -177,12 +177,12 @@ impl<'extractor> BalancedCaches<'extractor> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn freeze(&mut self, source_id: usize) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
|
pub fn freeze(&mut self) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
|
||||||
match &mut self.caches {
|
match &mut self.caches {
|
||||||
InnerCaches::Normal(NormalCaches { caches }) => caches
|
InnerCaches::Normal(NormalCaches { caches }) => caches
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(bucket_id, map)| {
|
.map(|(bucket, map)| {
|
||||||
// safety: we are transmuting the Bbbul into a FrozenBbbul
|
// safety: we are transmuting the Bbbul into a FrozenBbbul
|
||||||
// that are the same size.
|
// that are the same size.
|
||||||
let map = unsafe {
|
let map = unsafe {
|
||||||
@@ -201,19 +201,14 @@ impl<'extractor> BalancedCaches<'extractor> {
|
|||||||
>,
|
>,
|
||||||
>(map)
|
>(map)
|
||||||
};
|
};
|
||||||
Ok(FrozenCache {
|
Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled: Vec::new() })
|
||||||
source_id,
|
|
||||||
bucket_id,
|
|
||||||
cache: FrozenMap::new(map),
|
|
||||||
spilled: Vec::new(),
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches
|
InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.zip(mem::take(spilled_entries))
|
.zip(mem::take(spilled_entries))
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(bucket_id, (map, sorter))| {
|
.map(|(bucket, (map, sorter))| {
|
||||||
let spilled = sorter
|
let spilled = sorter
|
||||||
.into_reader_cursors()?
|
.into_reader_cursors()?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@@ -239,7 +234,7 @@ impl<'extractor> BalancedCaches<'extractor> {
|
|||||||
>,
|
>,
|
||||||
>(map)
|
>(map)
|
||||||
};
|
};
|
||||||
Ok(FrozenCache { source_id, bucket_id, cache: FrozenMap::new(map), spilled })
|
Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled })
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
}
|
}
|
||||||
@@ -445,8 +440,7 @@ fn spill_entry_to_sorter(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct FrozenCache<'a, 'extractor> {
|
pub struct FrozenCache<'a, 'extractor> {
|
||||||
bucket_id: usize,
|
bucket: usize,
|
||||||
source_id: usize,
|
|
||||||
cache: FrozenMap<
|
cache: FrozenMap<
|
||||||
'a,
|
'a,
|
||||||
'extractor,
|
'extractor,
|
||||||
@@ -463,9 +457,9 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
|
|||||||
let width = caches.first().map(BalancedCaches::buckets).unwrap_or(0);
|
let width = caches.first().map(BalancedCaches::buckets).unwrap_or(0);
|
||||||
let mut bucket_caches: Vec<_> = iter::repeat_with(Vec::new).take(width).collect();
|
let mut bucket_caches: Vec<_> = iter::repeat_with(Vec::new).take(width).collect();
|
||||||
|
|
||||||
for (thread_index, thread_cache) in caches.iter_mut().enumerate() {
|
for thread_cache in caches {
|
||||||
for frozen in thread_cache.freeze(thread_index)? {
|
for frozen in thread_cache.freeze()? {
|
||||||
bucket_caches[frozen.bucket_id].push(frozen);
|
bucket_caches[frozen.bucket].push(frozen);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -483,16 +477,21 @@ where
|
|||||||
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
||||||
{
|
{
|
||||||
let mut maps = Vec::new();
|
let mut maps = Vec::new();
|
||||||
let mut heap = BinaryHeap::new();
|
let mut readers = Vec::new();
|
||||||
let mut current_bucket = None;
|
let mut current_bucket = None;
|
||||||
for FrozenCache { source_id, bucket_id, cache, spilled } in frozen {
|
for FrozenCache { bucket, cache, ref mut spilled } in frozen {
|
||||||
assert_eq!(*current_bucket.get_or_insert(bucket_id), bucket_id);
|
assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
|
||||||
maps.push((source_id, cache));
|
maps.push(cache);
|
||||||
for reader in spilled {
|
readers.append(spilled);
|
||||||
let mut cursor = reader.into_cursor()?;
|
}
|
||||||
if cursor.move_on_next()?.is_some() {
|
|
||||||
heap.push(Entry { cursor, source_id });
|
// First manage the spilled entries by looking into the HashMaps,
|
||||||
}
|
// merge them and mark them as dummy.
|
||||||
|
let mut heap = BinaryHeap::new();
|
||||||
|
for (source_index, source) in readers.into_iter().enumerate() {
|
||||||
|
let mut cursor = source.into_cursor()?;
|
||||||
|
if cursor.move_on_next()?.is_some() {
|
||||||
|
heap.push(Entry { cursor, source_index });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -509,29 +508,25 @@ where
|
|||||||
|
|
||||||
let mut output = DelAddRoaringBitmap::from_bytes(first_value)?;
|
let mut output = DelAddRoaringBitmap::from_bytes(first_value)?;
|
||||||
while let Some(mut entry) = heap.peek_mut() {
|
while let Some(mut entry) = heap.peek_mut() {
|
||||||
if let Some((key, value)) = entry.cursor.current() {
|
if let Some((key, _value)) = entry.cursor.current() {
|
||||||
if first_key != key {
|
if first_key == key {
|
||||||
|
let new = DelAddRoaringBitmap::from_bytes(first_value)?;
|
||||||
|
output = output.merge(new);
|
||||||
|
// When we are done we the current value of this entry move make
|
||||||
|
// it move forward and let the heap reorganize itself (on drop)
|
||||||
|
if entry.cursor.move_on_next()?.is_none() {
|
||||||
|
PeekMut::pop(entry);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
let new = DelAddRoaringBitmap::from_bytes(value)?;
|
|
||||||
output = output.merge(new);
|
|
||||||
// When we are done we the current value of this entry move make
|
|
||||||
// it move forward and let the heap reorganize itself (on drop)
|
|
||||||
if entry.cursor.move_on_next()?.is_none() {
|
|
||||||
PeekMut::pop(entry);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Once we merged all of the spilled bitmaps we must also
|
// Once we merged all of the spilled bitmaps we must also
|
||||||
// fetch the entries from the non-spilled entries (the HashMaps).
|
// fetch the entries from the non-spilled entries (the HashMaps).
|
||||||
for (source_id, map) in maps.iter_mut() {
|
for (map_index, map) in maps.iter_mut().enumerate() {
|
||||||
debug_assert!(
|
if first_entry.source_index != map_index {
|
||||||
!(map.get(first_key).is_some() && first_entry.source_id == *source_id),
|
|
||||||
"A thread should not have spiled a key that has been inserted in the cache"
|
|
||||||
);
|
|
||||||
if first_entry.source_id != *source_id {
|
|
||||||
if let Some(new) = map.get_mut(first_key) {
|
if let Some(new) = map.get_mut(first_key) {
|
||||||
output.union_and_clear_bbbul(new);
|
output.union_and_clear_bbbul(new);
|
||||||
}
|
}
|
||||||
@@ -543,12 +538,12 @@ where
|
|||||||
|
|
||||||
// Don't forget to put the first entry back into the heap.
|
// Don't forget to put the first entry back into the heap.
|
||||||
if first_entry.cursor.move_on_next()?.is_some() {
|
if first_entry.cursor.move_on_next()?.is_some() {
|
||||||
heap.push(first_entry);
|
heap.push(first_entry)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
||||||
while let Some((_, mut map)) = maps.pop() {
|
while let Some(mut map) = maps.pop() {
|
||||||
// Make sure we don't try to work with entries already managed by the spilled
|
// Make sure we don't try to work with entries already managed by the spilled
|
||||||
let mut ordered_entries: Vec<_> =
|
let mut ordered_entries: Vec<_> =
|
||||||
map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
|
map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
|
||||||
@@ -558,7 +553,7 @@ where
|
|||||||
let mut output = DelAddRoaringBitmap::empty();
|
let mut output = DelAddRoaringBitmap::empty();
|
||||||
output.union_and_clear_bbbul(bbbul);
|
output.union_and_clear_bbbul(bbbul);
|
||||||
|
|
||||||
for (_, rhs) in maps.iter_mut() {
|
for rhs in maps.iter_mut() {
|
||||||
if let Some(new) = rhs.get_mut(key) {
|
if let Some(new) = rhs.get_mut(key) {
|
||||||
output.union_and_clear_bbbul(new);
|
output.union_and_clear_bbbul(new);
|
||||||
}
|
}
|
||||||
@@ -574,14 +569,14 @@ where
|
|||||||
|
|
||||||
struct Entry<R> {
|
struct Entry<R> {
|
||||||
cursor: ReaderCursor<R>,
|
cursor: ReaderCursor<R>,
|
||||||
source_id: usize,
|
source_index: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R> Ord for Entry<R> {
|
impl<R> Ord for Entry<R> {
|
||||||
fn cmp(&self, other: &Entry<R>) -> Ordering {
|
fn cmp(&self, other: &Entry<R>) -> Ordering {
|
||||||
let skey = self.cursor.current().map(|(k, _)| k);
|
let skey = self.cursor.current().map(|(k, _)| k);
|
||||||
let okey = other.cursor.current().map(|(k, _)| k);
|
let okey = other.cursor.current().map(|(k, _)| k);
|
||||||
skey.cmp(&okey).then(self.source_id.cmp(&other.source_id)).reverse()
|
skey.cmp(&okey).then(self.source_index.cmp(&other.source_index)).reverse()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -89,8 +89,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
|
|||||||
.or_default();
|
.or_default();
|
||||||
*entry -= 1;
|
*entry -= 1;
|
||||||
}
|
}
|
||||||
let content =
|
let content = update.updated();
|
||||||
update.merged(&context.rtxn, context.index, &context.db_fields_ids_map)?;
|
|
||||||
let geo_iter =
|
let geo_iter =
|
||||||
content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
||||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
for res in content.iter_top_level_fields().chain(geo_iter) {
|
||||||
|
|||||||
@@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd;
|
|||||||
use crate::update::new::channel::FieldIdDocidFacetSender;
|
use crate::update::new::channel::FieldIdDocidFacetSender;
|
||||||
use crate::update::new::extract::perm_json_p;
|
use crate::update::new::extract::perm_json_p;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
||||||
};
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::Step;
|
||||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
@@ -283,60 +283,42 @@ impl FacetedDocidsExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct DelAddFacetValue<'doc> {
|
struct DelAddFacetValue<'doc> {
|
||||||
strings: HashMap<
|
strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||||
(FieldId, &'doc str),
|
|
||||||
Option<BVec<'doc, u8>>,
|
|
||||||
hashbrown::DefaultHashBuilder,
|
|
||||||
&'doc Bump,
|
|
||||||
>,
|
|
||||||
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||||
doc_alloc: &'doc Bump,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> DelAddFacetValue<'doc> {
|
impl<'doc> DelAddFacetValue<'doc> {
|
||||||
fn new(doc_alloc: &'doc Bump) -> Self {
|
fn new(doc_alloc: &'doc Bump) -> Self {
|
||||||
Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc), doc_alloc }
|
Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
||||||
match kind {
|
let cache = match kind {
|
||||||
FacetKind::Number => {
|
FacetKind::String => &mut self.strings,
|
||||||
let key = (fid, value);
|
FacetKind::Number => &mut self.f64s,
|
||||||
if let Some(DelAdd::Deletion) = self.f64s.get(&key) {
|
_ => return,
|
||||||
self.f64s.remove(&key);
|
};
|
||||||
} else {
|
|
||||||
self.f64s.insert(key, DelAdd::Addition);
|
let key = (fid, value);
|
||||||
}
|
if let Some(DelAdd::Deletion) = cache.get(&key) {
|
||||||
}
|
cache.remove(&key);
|
||||||
FacetKind::String => {
|
} else {
|
||||||
if let Ok(s) = std::str::from_utf8(&value) {
|
cache.insert(key, DelAdd::Addition);
|
||||||
let normalized = crate::normalize_facet(s);
|
|
||||||
let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
|
|
||||||
self.strings.insert((fid, truncated), Some(value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
||||||
match kind {
|
let cache = match kind {
|
||||||
FacetKind::Number => {
|
FacetKind::String => &mut self.strings,
|
||||||
let key = (fid, value);
|
FacetKind::Number => &mut self.f64s,
|
||||||
if let Some(DelAdd::Addition) = self.f64s.get(&key) {
|
_ => return,
|
||||||
self.f64s.remove(&key);
|
};
|
||||||
} else {
|
|
||||||
self.f64s.insert(key, DelAdd::Deletion);
|
let key = (fid, value);
|
||||||
}
|
if let Some(DelAdd::Addition) = cache.get(&key) {
|
||||||
}
|
cache.remove(&key);
|
||||||
FacetKind::String => {
|
} else {
|
||||||
if let Ok(s) = std::str::from_utf8(&value) {
|
cache.insert(key, DelAdd::Deletion);
|
||||||
let normalized = crate::normalize_facet(s);
|
|
||||||
let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
|
|
||||||
self.strings.insert((fid, truncated), None);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -347,14 +329,18 @@ impl<'doc> DelAddFacetValue<'doc> {
|
|||||||
doc_alloc: &Bump,
|
doc_alloc: &Bump,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
for ((fid, truncated), value) in self.strings {
|
for ((fid, value), deladd) in self.strings {
|
||||||
buffer.clear();
|
if let Ok(s) = std::str::from_utf8(&value) {
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.clear();
|
||||||
buffer.extend_from_slice(&docid.to_be_bytes());
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
buffer.extend_from_slice(truncated.as_bytes());
|
buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
match &value {
|
let normalized = crate::normalize_facet(s);
|
||||||
Some(value) => sender.write_facet_string(&buffer, value)?,
|
let truncated = truncate_str(&normalized);
|
||||||
None => sender.delete_facet_string(&buffer)?,
|
buffer.extend_from_slice(truncated.as_bytes());
|
||||||
|
match deladd {
|
||||||
|
DelAdd::Deletion => sender.delete_facet_string(&buffer)?,
|
||||||
|
DelAdd::Addition => sender.write_facet_string(&buffer, &value)?,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -387,16 +373,26 @@ fn truncate_str(s: &str) -> &str {
|
|||||||
|
|
||||||
impl FacetedDocidsExtractor {
|
impl FacetedDocidsExtractor {
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
||||||
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
pub fn run_extraction<
|
||||||
|
'pl,
|
||||||
|
'fid,
|
||||||
|
'indexer,
|
||||||
|
'index,
|
||||||
|
'extractor,
|
||||||
|
DC: DocumentChanges<'pl>,
|
||||||
|
MSP,
|
||||||
|
SP,
|
||||||
|
>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
sender: &FieldIdDocidFacetSender,
|
sender: &FieldIdDocidFacetSender,
|
||||||
step: IndexingStep,
|
step: Step,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
let index = indexing_context.index;
|
let index = indexing_context.index;
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
|||||||
@@ -15,22 +15,23 @@ pub use geo::*;
|
|||||||
pub use searchable::*;
|
pub use searchable::*;
|
||||||
pub use vectors::EmbeddingExtractor;
|
pub use vectors::EmbeddingExtractor;
|
||||||
|
|
||||||
use super::indexer::document_changes::{DocumentChanges, IndexingContext};
|
use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress};
|
||||||
use super::steps::IndexingStep;
|
use super::steps::Step;
|
||||||
use super::thread_local::{FullySend, ThreadLocal};
|
use super::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub trait DocidsExtractor {
|
pub trait DocidsExtractor {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: IndexingStep,
|
step: Step,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync;
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO move in permissive json pointer
|
/// TODO move in permissive json pointer
|
||||||
|
|||||||
@@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
|||||||
use crate::update::new::extract::cache::BalancedCaches;
|
use crate::update::new::extract::cache::BalancedCaches;
|
||||||
use crate::update::new::extract::perm_json_p::contained_in;
|
use crate::update::new::extract::perm_json_p::contained_in;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
||||||
};
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::Step;
|
||||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
@@ -239,15 +239,25 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
|
|||||||
pub struct WordDocidsExtractors;
|
pub struct WordDocidsExtractors;
|
||||||
|
|
||||||
impl WordDocidsExtractors {
|
impl WordDocidsExtractors {
|
||||||
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
pub fn run_extraction<
|
||||||
|
'pl,
|
||||||
|
'fid,
|
||||||
|
'indexer,
|
||||||
|
'index,
|
||||||
|
'extractor,
|
||||||
|
DC: DocumentChanges<'pl>,
|
||||||
|
MSP,
|
||||||
|
SP,
|
||||||
|
>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: IndexingStep,
|
step: Step,
|
||||||
) -> Result<WordDocidsCaches<'extractor>>
|
) -> Result<WordDocidsCaches<'extractor>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
let index = indexing_context.index;
|
let index = indexing_context.index;
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
|||||||
@@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
|||||||
use super::cache::BalancedCaches;
|
use super::cache::BalancedCaches;
|
||||||
use super::DocidsExtractor;
|
use super::DocidsExtractor;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
||||||
};
|
};
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::Step;
|
||||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
@@ -56,15 +56,16 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub trait SearchableExtractor: Sized + Sync {
|
pub trait SearchableExtractor: Sized + Sync {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: IndexingStep,
|
step: Step,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
let rtxn = indexing_context.index.read_txn()?;
|
let rtxn = indexing_context.index.read_txn()?;
|
||||||
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||||
@@ -133,15 +134,16 @@ pub trait SearchableExtractor: Sized + Sync {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: SearchableExtractor> DocidsExtractor for T {
|
impl<T: SearchableExtractor> DocidsExtractor for T {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: IndexingStep,
|
step: Step,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
Self::run_extraction(
|
Self::run_extraction(
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
|
|||||||
@@ -176,10 +176,9 @@ pub fn tokenizer_builder<'a>(
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::RawMap;
|
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
use rustc_hash::FxBuildHasher;
|
use raw_collections::RawMap;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@@ -235,7 +234,7 @@ mod test {
|
|||||||
|
|
||||||
let bump = Bump::new();
|
let bump = Bump::new();
|
||||||
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
||||||
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap();
|
let document = RawMap::from_raw_value(document, &bump).unwrap();
|
||||||
|
|
||||||
let document = Versions::single(document);
|
let document = Versions::single(document);
|
||||||
let document = DocumentFromVersions::new(&document);
|
let document = DocumentFromVersions::new(&document);
|
||||||
|
|||||||
@@ -130,7 +130,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
);
|
);
|
||||||
} else if new_vectors.regenerate {
|
} else if new_vectors.regenerate {
|
||||||
let new_rendered = prompt.render_document(
|
let new_rendered = prompt.render_document(
|
||||||
update.external_document_id(),
|
|
||||||
update.current(
|
update.current(
|
||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
@@ -140,7 +139,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let old_rendered = prompt.render_document(
|
let old_rendered = prompt.render_document(
|
||||||
update.external_document_id(),
|
|
||||||
update.merged(
|
update.merged(
|
||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
@@ -160,7 +158,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
}
|
}
|
||||||
} else if old_vectors.regenerate {
|
} else if old_vectors.regenerate {
|
||||||
let old_rendered = prompt.render_document(
|
let old_rendered = prompt.render_document(
|
||||||
update.external_document_id(),
|
|
||||||
update.current(
|
update.current(
|
||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
@@ -170,7 +167,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let new_rendered = prompt.render_document(
|
let new_rendered = prompt.render_document(
|
||||||
update.external_document_id(),
|
|
||||||
update.merged(
|
update.merged(
|
||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
@@ -220,7 +216,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
);
|
);
|
||||||
} else if new_vectors.regenerate {
|
} else if new_vectors.regenerate {
|
||||||
let rendered = prompt.render_document(
|
let rendered = prompt.render_document(
|
||||||
insertion.external_document_id(),
|
|
||||||
insertion.inserted(),
|
insertion.inserted(),
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
@@ -234,7 +229,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let rendered = prompt.render_document(
|
let rendered = prompt.render_document(
|
||||||
insertion.external_document_id(),
|
|
||||||
insertion.inserted(),
|
insertion.inserted(),
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
|
|||||||
@@ -103,8 +103,6 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
|||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_fst")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_fst")]
|
||||||
pub fn merge_and_write(self, index: &Index, wtxn: &mut RwTxn, rtxn: &RoTxn) -> Result<()> {
|
pub fn merge_and_write(self, index: &Index, wtxn: &mut RwTxn, rtxn: &RoTxn) -> Result<()> {
|
||||||
tracing::trace!("merge facet strings for facet search: {:?}", self.registered_facets);
|
|
||||||
|
|
||||||
let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?;
|
let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?;
|
||||||
let mut builder = grenad::MergerBuilder::new(MergeDeladdBtreesetString);
|
let mut builder = grenad::MergerBuilder::new(MergeDeladdBtreesetString);
|
||||||
builder.extend(reader);
|
builder.extend(reader);
|
||||||
@@ -120,15 +118,12 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
|||||||
BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;
|
BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;
|
||||||
|
|
||||||
if current_field_id != Some(field_id) {
|
if current_field_id != Some(field_id) {
|
||||||
if let (Some(current_field_id), Some(fst_merger_builder)) =
|
if let Some(fst_merger_builder) = fst_merger_builder {
|
||||||
(current_field_id, fst_merger_builder)
|
|
||||||
{
|
|
||||||
let mmap = fst_merger_builder.build(&mut callback)?;
|
let mmap = fst_merger_builder.build(&mut callback)?;
|
||||||
index.facet_id_string_fst.remap_data_type::<Bytes>().put(
|
index
|
||||||
wtxn,
|
.facet_id_string_fst
|
||||||
¤t_field_id,
|
.remap_data_type::<Bytes>()
|
||||||
&mmap,
|
.put(wtxn, &field_id, &mmap)?;
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fst = index.facet_id_string_fst.get(rtxn, &field_id)?;
|
fst = index.facet_id_string_fst.get(rtxn, &field_id)?;
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::RawVec;
|
|
||||||
use rustc_hash::FxBuildHasher;
|
|
||||||
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
|
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@@ -362,7 +360,7 @@ impl<'a> DeserrRawValue<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawVec<'a> {
|
pub struct DeserrRawVec<'a> {
|
||||||
vec: RawVec<'a>,
|
vec: raw_collections::RawVec<'a>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -381,7 +379,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawVecIter<'a> {
|
pub struct DeserrRawVecIter<'a> {
|
||||||
it: bumparaw_collections::vec::iter::IntoIter<'a>,
|
it: raw_collections::vec::iter::IntoIter<'a>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -395,7 +393,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawMap<'a> {
|
pub struct DeserrRawMap<'a> {
|
||||||
map: bumparaw_collections::RawMap<'a, FxBuildHasher>,
|
map: raw_collections::RawMap<'a>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -418,7 +416,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawMapIter<'a> {
|
pub struct DeserrRawMapIter<'a> {
|
||||||
it: bumparaw_collections::map::iter::IntoIter<'a>,
|
it: raw_collections::map::iter::IntoIter<'a>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -617,7 +615,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> {
|
|||||||
where
|
where
|
||||||
A: serde::de::SeqAccess<'de>,
|
A: serde::de::SeqAccess<'de>,
|
||||||
{
|
{
|
||||||
let mut raw_vec = RawVec::new_in(self.alloc);
|
let mut raw_vec = raw_collections::RawVec::new_in(self.alloc);
|
||||||
while let Some(next) = seq.next_element()? {
|
while let Some(next) = seq.next_element()? {
|
||||||
raw_vec.push(next);
|
raw_vec.push(next);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use std::cell::{Cell, RefCell};
|
use std::cell::{Cell, RefCell};
|
||||||
use std::sync::atomic::Ordering;
|
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@@ -8,9 +7,8 @@ use rayon::iter::IndexedParallelIterator;
|
|||||||
|
|
||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||||
use crate::progress::{AtomicDocumentStep, Progress};
|
|
||||||
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
|
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::Step;
|
||||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};
|
||||||
|
|
||||||
@@ -135,8 +133,10 @@ pub struct IndexingContext<
|
|||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
|
SP,
|
||||||
> where
|
> where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
pub index: &'index Index,
|
pub index: &'index Index,
|
||||||
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||||
@@ -144,7 +144,7 @@ pub struct IndexingContext<
|
|||||||
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
|
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
|
||||||
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
|
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
|
||||||
pub must_stop_processing: &'indexer MSP,
|
pub must_stop_processing: &'indexer MSP,
|
||||||
pub progress: &'indexer Progress,
|
pub send_progress: &'indexer SP,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<
|
impl<
|
||||||
@@ -152,15 +152,18 @@ impl<
|
|||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
|
SP,
|
||||||
> Copy
|
> Copy
|
||||||
for IndexingContext<
|
for IndexingContext<
|
||||||
'fid, // invariant lifetime of fields ids map
|
'fid, // invariant lifetime of fields ids map
|
||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
|
SP,
|
||||||
>
|
>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -169,15 +172,18 @@ impl<
|
|||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
|
SP,
|
||||||
> Clone
|
> Clone
|
||||||
for IndexingContext<
|
for IndexingContext<
|
||||||
'fid, // invariant lifetime of fields ids map
|
'fid, // invariant lifetime of fields ids map
|
||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
|
SP,
|
||||||
>
|
>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
*self
|
*self
|
||||||
@@ -196,6 +202,7 @@ pub fn extract<
|
|||||||
EX,
|
EX,
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
MSP,
|
MSP,
|
||||||
|
SP,
|
||||||
>(
|
>(
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
extractor: &EX,
|
extractor: &EX,
|
||||||
@@ -206,18 +213,18 @@ pub fn extract<
|
|||||||
doc_allocs,
|
doc_allocs,
|
||||||
fields_ids_map_store,
|
fields_ids_map_store,
|
||||||
must_stop_processing,
|
must_stop_processing,
|
||||||
progress,
|
send_progress,
|
||||||
}: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
}: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
datastore: &'data ThreadLocal<EX::Data>,
|
datastore: &'data ThreadLocal<EX::Data>,
|
||||||
step: IndexingStep,
|
step: Step,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
EX: Extractor<'extractor>,
|
EX: Extractor<'extractor>,
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
tracing::trace!("We are resetting the extractor allocators");
|
tracing::trace!("We are resetting the extractor allocators");
|
||||||
progress.update_progress(step);
|
|
||||||
// Clean up and reuse the extractor allocs
|
// Clean up and reuse the extractor allocs
|
||||||
for extractor_alloc in extractor_allocs.iter_mut() {
|
for extractor_alloc in extractor_allocs.iter_mut() {
|
||||||
tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
|
tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
|
||||||
@@ -225,11 +232,9 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
let total_documents = document_changes.len() as u32;
|
let total_documents = document_changes.len() as u32;
|
||||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
|
||||||
progress.update_progress(progress_step);
|
|
||||||
|
|
||||||
let pi = document_changes.iter(CHUNK_SIZE);
|
let pi = document_changes.iter(CHUNK_SIZE);
|
||||||
pi.try_arc_for_each_try_init(
|
pi.enumerate().try_arc_for_each_try_init(
|
||||||
|| {
|
|| {
|
||||||
DocumentChangeContext::new(
|
DocumentChangeContext::new(
|
||||||
index,
|
index,
|
||||||
@@ -242,10 +247,13 @@ where
|
|||||||
move |index_alloc| extractor.init_data(index_alloc),
|
move |index_alloc| extractor.init_data(index_alloc),
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
|context, items| {
|
|context, (finished_documents, items)| {
|
||||||
if (must_stop_processing)() {
|
if (must_stop_processing)() {
|
||||||
return Err(Arc::new(InternalError::AbortedIndexation.into()));
|
return Err(Arc::new(InternalError::AbortedIndexation.into()));
|
||||||
}
|
}
|
||||||
|
let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
|
||||||
|
|
||||||
|
(send_progress)(Progress::from_step_substep(step, finished_documents, total_documents));
|
||||||
|
|
||||||
// Clean up and reuse the document-specific allocator
|
// Clean up and reuse the document-specific allocator
|
||||||
context.doc_alloc.reset();
|
context.doc_alloc.reset();
|
||||||
@@ -256,7 +264,6 @@ where
|
|||||||
});
|
});
|
||||||
|
|
||||||
let res = extractor.process(changes, context).map_err(Arc::new);
|
let res = extractor.process(changes, context).map_err(Arc::new);
|
||||||
step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
|
|
||||||
|
|
||||||
// send back the doc_alloc in the pool
|
// send back the doc_alloc in the pool
|
||||||
context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
|
context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
|
||||||
@@ -264,7 +271,32 @@ where
|
|||||||
res
|
res
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
step.store(total_documents, Ordering::Relaxed);
|
|
||||||
|
(send_progress)(Progress::from_step_substep(step, total_documents, total_documents));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct Progress {
|
||||||
|
pub finished_steps: u16,
|
||||||
|
pub total_steps: u16,
|
||||||
|
pub step_name: &'static str,
|
||||||
|
pub finished_total_substep: Option<(u32, u32)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Progress {
|
||||||
|
pub fn from_step(step: Step) -> Self {
|
||||||
|
Self {
|
||||||
|
finished_steps: step.finished_steps(),
|
||||||
|
total_steps: Step::total_steps(),
|
||||||
|
step_name: step.name(),
|
||||||
|
finished_total_substep: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self {
|
||||||
|
Self {
|
||||||
|
finished_total_substep: Some((finished_substep, total_substep)),
|
||||||
|
..Progress::from_step(step)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -92,12 +92,11 @@ mod test {
|
|||||||
|
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::progress::Progress;
|
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, Extractor, IndexingContext,
|
extract, DocumentChangeContext, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
use crate::update::new::indexer::DocumentDeletion;
|
use crate::update::new::indexer::DocumentDeletion;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::Step;
|
||||||
use crate::update::new::thread_local::{MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{MostlySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
@@ -165,7 +164,7 @@ mod test {
|
|||||||
doc_allocs: &doc_allocs,
|
doc_allocs: &doc_allocs,
|
||||||
fields_ids_map_store: &fields_ids_map_store,
|
fields_ids_map_store: &fields_ids_map_store,
|
||||||
must_stop_processing: &(|| false),
|
must_stop_processing: &(|| false),
|
||||||
progress: &Progress::default(),
|
send_progress: &(|_progress| {}),
|
||||||
};
|
};
|
||||||
|
|
||||||
for _ in 0..3 {
|
for _ in 0..3 {
|
||||||
@@ -177,7 +176,7 @@ mod test {
|
|||||||
context,
|
context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
IndexingStep::ExtractingDocuments,
|
Step::ExtractingDocuments,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,19 @@
|
|||||||
use std::sync::atomic::Ordering;
|
|
||||||
|
|
||||||
use bumpalo::collections::CollectIn;
|
use bumpalo::collections::CollectIn;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::RawMap;
|
|
||||||
use hashbrown::hash_map::Entry;
|
use hashbrown::hash_map::Entry;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
|
use raw_collections::RawMap;
|
||||||
use rayon::slice::ParallelSlice;
|
use rayon::slice::ParallelSlice;
|
||||||
use rustc_hash::FxBuildHasher;
|
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
use serde_json::Deserializer;
|
use serde_json::Deserializer;
|
||||||
|
|
||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress};
|
||||||
use super::retrieve_or_guess_primary_key;
|
use super::retrieve_or_guess_primary_key;
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::progress::{AtomicPayloadStep, Progress};
|
|
||||||
use crate::update::new::document::Versions;
|
use crate::update::new::document::Versions;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::Step;
|
||||||
use crate::update::new::thread_local::MostlySend;
|
use crate::update::new::thread_local::MostlySend;
|
||||||
use crate::update::new::{Deletion, Insertion, Update};
|
use crate::update::new::{Deletion, Insertion, Update};
|
||||||
use crate::update::{AvailableIds, IndexDocumentsMethod};
|
use crate::update::{AvailableIds, IndexDocumentsMethod};
|
||||||
@@ -48,7 +44,7 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
|
||||||
pub fn into_changes<MSP>(
|
pub fn into_changes<MSP, SP>(
|
||||||
self,
|
self,
|
||||||
indexer: &'pl Bump,
|
indexer: &'pl Bump,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
@@ -56,12 +52,12 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
primary_key_from_op: Option<&'pl str>,
|
primary_key_from_op: Option<&'pl str>,
|
||||||
new_fields_ids_map: &mut FieldsIdsMap,
|
new_fields_ids_map: &mut FieldsIdsMap,
|
||||||
must_stop_processing: &MSP,
|
must_stop_processing: &MSP,
|
||||||
progress: Progress,
|
send_progress: &SP,
|
||||||
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
|
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool,
|
MSP: Fn() -> bool,
|
||||||
|
SP: Fn(Progress),
|
||||||
{
|
{
|
||||||
progress.update_progress(IndexingStep::PreparingPayloads);
|
|
||||||
let Self { operations, method } = self;
|
let Self { operations, method } = self;
|
||||||
|
|
||||||
let documents_ids = index.documents_ids(rtxn)?;
|
let documents_ids = index.documents_ids(rtxn)?;
|
||||||
@@ -71,14 +67,16 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
let mut primary_key = None;
|
let mut primary_key = None;
|
||||||
|
|
||||||
let payload_count = operations.len();
|
let payload_count = operations.len();
|
||||||
let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
|
|
||||||
progress.update_progress(progress_step);
|
|
||||||
|
|
||||||
for (payload_index, operation) in operations.into_iter().enumerate() {
|
for (payload_index, operation) in operations.into_iter().enumerate() {
|
||||||
if must_stop_processing() {
|
if must_stop_processing() {
|
||||||
return Err(InternalError::AbortedIndexation.into());
|
return Err(InternalError::AbortedIndexation.into());
|
||||||
}
|
}
|
||||||
step.store(payload_index as u32, Ordering::Relaxed);
|
send_progress(Progress::from_step_substep(
|
||||||
|
Step::PreparingPayloads,
|
||||||
|
payload_index as u32,
|
||||||
|
payload_count as u32,
|
||||||
|
));
|
||||||
|
|
||||||
let mut bytes = 0;
|
let mut bytes = 0;
|
||||||
let result = match operation {
|
let result = match operation {
|
||||||
@@ -119,7 +117,12 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
};
|
};
|
||||||
operations_stats.push(PayloadStats { document_count, bytes, error });
|
operations_stats.push(PayloadStats { document_count, bytes, error });
|
||||||
}
|
}
|
||||||
step.store(payload_count as u32, Ordering::Relaxed);
|
|
||||||
|
send_progress(Progress::from_step_substep(
|
||||||
|
Step::PreparingPayloads,
|
||||||
|
payload_count as u32,
|
||||||
|
payload_count as u32,
|
||||||
|
));
|
||||||
|
|
||||||
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
||||||
let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
|
let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
|
||||||
@@ -163,9 +166,8 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
|
|
||||||
// Only guess the primary key if it is the first document
|
// Only guess the primary key if it is the first document
|
||||||
let retrieved_primary_key = if previous_offset == 0 {
|
let retrieved_primary_key = if previous_offset == 0 {
|
||||||
let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer)
|
let doc =
|
||||||
.map(Some)
|
RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
|
||||||
.map_err(UserError::SerdeJson)?;
|
|
||||||
|
|
||||||
let result = retrieve_or_guess_primary_key(
|
let result = retrieve_or_guess_primary_key(
|
||||||
rtxn,
|
rtxn,
|
||||||
@@ -252,24 +254,6 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
previous_offset = iter.byte_offset();
|
previous_offset = iter.byte_offset();
|
||||||
}
|
}
|
||||||
|
|
||||||
if payload.is_empty() {
|
|
||||||
let result = retrieve_or_guess_primary_key(
|
|
||||||
rtxn,
|
|
||||||
index,
|
|
||||||
new_fields_ids_map,
|
|
||||||
primary_key_from_op,
|
|
||||||
None,
|
|
||||||
);
|
|
||||||
match result {
|
|
||||||
Ok(Ok((pk, _))) => {
|
|
||||||
primary_key.get_or_insert(pk);
|
|
||||||
}
|
|
||||||
Ok(Err(UserError::NoPrimaryKeyCandidateFound)) => (),
|
|
||||||
Ok(Err(user_error)) => return Err(Error::UserError(user_error)),
|
|
||||||
Err(error) => return Err(error),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(new_docids_version_offsets)
|
Ok(new_docids_version_offsets)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -561,9 +545,8 @@ impl MergeChanges for MergeDocumentForReplacement {
|
|||||||
match operations.last() {
|
match operations.last() {
|
||||||
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
|
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document =
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
.map_err(UserError::SerdeJson)?;
|
||||||
.map_err(UserError::SerdeJson)?;
|
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
||||||
@@ -649,9 +632,8 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document =
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
.map_err(UserError::SerdeJson)?;
|
||||||
.map_err(UserError::SerdeJson)?;
|
|
||||||
|
|
||||||
Some(Versions::single(document))
|
Some(Versions::single(document))
|
||||||
}
|
}
|
||||||
@@ -665,9 +647,8 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document =
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
.map_err(UserError::SerdeJson)?;
|
||||||
.map_err(UserError::SerdeJson)?;
|
|
||||||
Ok(document)
|
Ok(document)
|
||||||
});
|
});
|
||||||
Versions::multiple(versions)?
|
Versions::multiple(versions)?
|
||||||
|
|||||||
@@ -4,9 +4,7 @@ use std::sync::{OnceLock, RwLock};
|
|||||||
use std::thread::{self, Builder};
|
use std::thread::{self, Builder};
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use bstr::ByteSlice as _;
|
use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
|
||||||
use bumparaw_collections::RawMap;
|
|
||||||
use document_changes::{extract, DocumentChanges, IndexingContext};
|
|
||||||
pub use document_deletion::DocumentDeletion;
|
pub use document_deletion::DocumentDeletion;
|
||||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
@@ -15,7 +13,7 @@ use heed::{RoTxn, RwTxn};
|
|||||||
use itertools::{merge_join_by, EitherOrBoth};
|
use itertools::{merge_join_by, EitherOrBoth};
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
use rand::SeedableRng as _;
|
use rand::SeedableRng as _;
|
||||||
use rustc_hash::FxBuildHasher;
|
use raw_collections::RawMap;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
|
|
||||||
@@ -23,7 +21,7 @@ use super::channel::*;
|
|||||||
use super::extract::*;
|
use super::extract::*;
|
||||||
use super::facet_search_builder::FacetSearchBuilder;
|
use super::facet_search_builder::FacetSearchBuilder;
|
||||||
use super::merger::FacetFieldIdsDelta;
|
use super::merger::FacetFieldIdsDelta;
|
||||||
use super::steps::IndexingStep;
|
use super::steps::Step;
|
||||||
use super::thread_local::ThreadLocal;
|
use super::thread_local::ThreadLocal;
|
||||||
use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
|
use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
|
||||||
use super::words_prefix_docids::{
|
use super::words_prefix_docids::{
|
||||||
@@ -34,11 +32,8 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
|
|||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
||||||
use crate::progress::Progress;
|
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::facet::new_incremental::FacetsUpdateIncremental;
|
|
||||||
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
|
||||||
use crate::update::new::extract::EmbeddingExtractor;
|
use crate::update::new::extract::EmbeddingExtractor;
|
||||||
use crate::update::new::merger::merge_and_send_rtree;
|
use crate::update::new::merger::merge_and_send_rtree;
|
||||||
use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
|
use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
|
||||||
@@ -64,7 +59,7 @@ mod update_by_function;
|
|||||||
///
|
///
|
||||||
/// TODO return stats
|
/// TODO return stats
|
||||||
#[allow(clippy::too_many_arguments)] // clippy: 😝
|
#[allow(clippy::too_many_arguments)] // clippy: 😝
|
||||||
pub fn index<'pl, 'indexer, 'index, DC, MSP>(
|
pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &'index Index,
|
index: &'index Index,
|
||||||
pool: &ThreadPoolNoAbort,
|
pool: &ThreadPoolNoAbort,
|
||||||
@@ -75,11 +70,12 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>(
|
|||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
embedders: EmbeddingConfigs,
|
embedders: EmbeddingConfigs,
|
||||||
must_stop_processing: &'indexer MSP,
|
must_stop_processing: &'indexer MSP,
|
||||||
progress: &'indexer Progress,
|
send_progress: &'indexer SP,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
|
SP: Fn(Progress) + Sync,
|
||||||
{
|
{
|
||||||
let mut bbbuffers = Vec::new();
|
let mut bbbuffers = Vec::new();
|
||||||
let finished_extraction = AtomicBool::new(false);
|
let finished_extraction = AtomicBool::new(false);
|
||||||
@@ -93,32 +89,24 @@ where
|
|||||||
..grenad_parameters
|
..grenad_parameters
|
||||||
};
|
};
|
||||||
|
|
||||||
// 5% percent of the allocated memory for the extractors, or min 100MiB
|
// We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
|
||||||
// 5% percent of the allocated memory for the bbqueues, or min 50MiB
|
let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
|
||||||
//
|
|
||||||
// Minimum capacity for bbqueues
|
|
||||||
let minimum_total_bbbuffer_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
|
|
||||||
let minimum_total_extractors_capacity = minimum_total_bbbuffer_capacity * 2;
|
|
||||||
|
|
||||||
let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
|
let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
|
||||||
(
|
(grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
|
||||||
GrenadParameters {
|
|
||||||
max_memory: Some(minimum_total_extractors_capacity),
|
|
||||||
..grenad_parameters
|
|
||||||
},
|
|
||||||
minimum_total_bbbuffer_capacity,
|
|
||||||
), // 100 MiB by thread by default
|
|
||||||
|max_memory| {
|
|max_memory| {
|
||||||
let total_bbbuffer_capacity = max_memory.max(minimum_total_bbbuffer_capacity);
|
// 2% of the indexing memory
|
||||||
|
let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
|
||||||
let new_grenad_parameters = GrenadParameters {
|
let new_grenad_parameters = GrenadParameters {
|
||||||
max_memory: Some(max_memory.max(minimum_total_extractors_capacity)),
|
max_memory: Some(
|
||||||
|
max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
|
||||||
|
),
|
||||||
..grenad_parameters
|
..grenad_parameters
|
||||||
};
|
};
|
||||||
(new_grenad_parameters, total_bbbuffer_capacity)
|
(new_grenad_parameters, total_bbbuffer_capacity)
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
let (extractor_sender, writer_receiver) = pool
|
let (extractor_sender, mut writer_receiver) = pool
|
||||||
.install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
|
.install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -136,7 +124,7 @@ where
|
|||||||
doc_allocs: &doc_allocs,
|
doc_allocs: &doc_allocs,
|
||||||
fields_ids_map_store: &fields_ids_map_store,
|
fields_ids_map_store: &fields_ids_map_store,
|
||||||
must_stop_processing,
|
must_stop_processing,
|
||||||
progress,
|
send_progress,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut index_embeddings = index.embedding_configs(wtxn)?;
|
let mut index_embeddings = index.embedding_configs(wtxn)?;
|
||||||
@@ -170,7 +158,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
IndexingStep::ExtractingDocuments,
|
Step::ExtractingDocuments,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@@ -202,7 +190,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&extractor_sender.field_id_docid_facet_sender(),
|
&extractor_sender.field_id_docid_facet_sender(),
|
||||||
IndexingStep::ExtractingFacets
|
Step::ExtractingFacets
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -214,7 +202,6 @@ where
|
|||||||
caches,
|
caches,
|
||||||
FacetDatabases::new(index),
|
FacetDatabases::new(index),
|
||||||
index,
|
index,
|
||||||
&rtxn,
|
|
||||||
extractor_sender.facet_docids(),
|
extractor_sender.facet_docids(),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -236,7 +223,7 @@ where
|
|||||||
document_changes,
|
document_changes,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
IndexingStep::ExtractingWords
|
Step::ExtractingWords
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -314,7 +301,7 @@ where
|
|||||||
document_changes,
|
document_changes,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
IndexingStep::ExtractingWordProximity,
|
Step::ExtractingWordProximity,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -350,7 +337,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
IndexingStep::ExtractingEmbeddings,
|
Step::ExtractingEmbeddings,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@@ -383,7 +370,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
IndexingStep::WritingGeoPoints
|
Step::WritingGeoPoints
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -395,7 +382,9 @@ where
|
|||||||
&indexing_context.must_stop_processing,
|
&indexing_context.must_stop_processing,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
|
|
||||||
|
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
||||||
|
|
||||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||||
|
|
||||||
Result::Ok((facet_field_ids_delta, index_embeddings))
|
Result::Ok((facet_field_ids_delta, index_embeddings))
|
||||||
@@ -433,7 +422,6 @@ where
|
|||||||
let mut arroy_writers = arroy_writers?;
|
let mut arroy_writers = arroy_writers?;
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut writer_receiver = writer_receiver;
|
|
||||||
let span = tracing::trace_span!(target: "indexing::write_db", "all");
|
let span = tracing::trace_span!(target: "indexing::write_db", "all");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
@@ -496,7 +484,7 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
|
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
|
||||||
|
|
||||||
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
|
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
|
||||||
|
|
||||||
@@ -509,7 +497,10 @@ where
|
|||||||
break 'vectors;
|
break 'vectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
|
(indexing_context.send_progress)(Progress::from_step(
|
||||||
|
Step::WritingEmbeddingsToDatabase,
|
||||||
|
));
|
||||||
|
|
||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
|
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
|
||||||
let dimensions = *dimensions;
|
let dimensions = *dimensions;
|
||||||
@@ -525,19 +516,21 @@ where
|
|||||||
index.put_embedding_configs(wtxn, index_embeddings)?;
|
index.put_embedding_configs(wtxn, index_embeddings)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
|
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
|
||||||
|
|
||||||
if index.facet_search(wtxn)? {
|
if index.facet_search(wtxn)? {
|
||||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
|
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
|
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
|
||||||
|
|
||||||
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
|
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
|
||||||
compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
|
compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
(indexing_context.send_progress)(Progress::from_step(Step::Finalizing));
|
||||||
|
|
||||||
Ok(()) as Result<_>
|
Ok(()) as Result<_>
|
||||||
})?;
|
})?;
|
||||||
@@ -593,12 +586,7 @@ fn write_from_bbqueue(
|
|||||||
}
|
}
|
||||||
(key, None) => match database.delete(wtxn, key) {
|
(key, None) => match database.delete(wtxn, key) {
|
||||||
Ok(false) => {
|
Ok(false) => {
|
||||||
tracing::error!(
|
unreachable!("We tried to delete an unknown key: {key:?}")
|
||||||
database_name,
|
|
||||||
key_bytes = ?key,
|
|
||||||
formatted_key = ?key.as_bstr(),
|
|
||||||
"Attempt to delete an unknown key"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
@@ -753,66 +741,27 @@ fn compute_facet_search_database(
|
|||||||
fn compute_facet_level_database(
|
fn compute_facet_level_database(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
mut facet_field_ids_delta: FacetFieldIdsDelta,
|
facet_field_ids_delta: FacetFieldIdsDelta,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() {
|
if let Some(modified_facet_string_ids) = facet_field_ids_delta.modified_facet_string_ids() {
|
||||||
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string");
|
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
match delta {
|
FacetsUpdateBulk::new_not_updating_level_0(
|
||||||
super::merger::FacetFieldIdDelta::Bulk => {
|
index,
|
||||||
tracing::debug!(%fid, "bulk string facet processing");
|
modified_facet_string_ids,
|
||||||
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
|
FacetType::String,
|
||||||
.execute(wtxn)?
|
)
|
||||||
}
|
.execute(wtxn)?;
|
||||||
super::merger::FacetFieldIdDelta::Incremental(delta_data) => {
|
|
||||||
tracing::debug!(%fid, len=%delta_data.len(), "incremental string facet processing");
|
|
||||||
FacetsUpdateIncremental::new(
|
|
||||||
index,
|
|
||||||
FacetType::String,
|
|
||||||
fid,
|
|
||||||
delta_data,
|
|
||||||
FACET_GROUP_SIZE,
|
|
||||||
FACET_MIN_LEVEL_SIZE,
|
|
||||||
FACET_MAX_GROUP_SIZE,
|
|
||||||
)
|
|
||||||
.execute(wtxn)?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if let Some(modified_facet_number_ids) = facet_field_ids_delta.modified_facet_number_ids() {
|
||||||
for (fid, delta) in facet_field_ids_delta.consume_facet_number_delta() {
|
|
||||||
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number");
|
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
match delta {
|
FacetsUpdateBulk::new_not_updating_level_0(
|
||||||
super::merger::FacetFieldIdDelta::Bulk => {
|
|
||||||
tracing::debug!(%fid, "bulk number facet processing");
|
|
||||||
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::Number)
|
|
||||||
.execute(wtxn)?
|
|
||||||
}
|
|
||||||
super::merger::FacetFieldIdDelta::Incremental(delta_data) => {
|
|
||||||
tracing::debug!(%fid, len=%delta_data.len(), "incremental number facet processing");
|
|
||||||
FacetsUpdateIncremental::new(
|
|
||||||
index,
|
|
||||||
FacetType::Number,
|
|
||||||
fid,
|
|
||||||
delta_data,
|
|
||||||
FACET_GROUP_SIZE,
|
|
||||||
FACET_MIN_LEVEL_SIZE,
|
|
||||||
FACET_MAX_GROUP_SIZE,
|
|
||||||
)
|
|
||||||
.execute(wtxn)?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
debug_assert!(crate::update::facet::sanity_checks(
|
|
||||||
index,
|
index,
|
||||||
wtxn,
|
modified_facet_number_ids,
|
||||||
fid,
|
|
||||||
FacetType::Number,
|
FacetType::Number,
|
||||||
FACET_GROUP_SIZE as usize,
|
|
||||||
FACET_MIN_LEVEL_SIZE as usize,
|
|
||||||
FACET_MAX_GROUP_SIZE as usize,
|
|
||||||
)
|
)
|
||||||
.is_ok());
|
.execute(wtxn)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -827,7 +776,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
new_fields_ids_map: &mut FieldsIdsMap,
|
new_fields_ids_map: &mut FieldsIdsMap,
|
||||||
primary_key_from_op: Option<&'a str>,
|
primary_key_from_op: Option<&'a str>,
|
||||||
first_document: Option<RawMap<'a, FxBuildHasher>>,
|
first_document: Option<RawMap<'a>>,
|
||||||
) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
|
) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
|
||||||
// make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
|
// make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
use std::ops::DerefMut;
|
use std::ops::DerefMut;
|
||||||
|
|
||||||
use bumparaw_collections::RawMap;
|
|
||||||
use rayon::iter::IndexedParallelIterator;
|
use rayon::iter::IndexedParallelIterator;
|
||||||
use rustc_hash::FxBuildHasher;
|
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
||||||
@@ -77,7 +75,7 @@ where
|
|||||||
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
|
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
|
||||||
let external_document_id = external_document_id.to_de();
|
let external_document_id = external_document_id.to_de();
|
||||||
|
|
||||||
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
.map_err(InternalError::SerdeJson)?;
|
.map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
use bumparaw_collections::RawMap;
|
use raw_collections::RawMap;
|
||||||
use rayon::iter::IndexedParallelIterator;
|
use rayon::iter::IndexedParallelIterator;
|
||||||
use rayon::slice::ParallelSlice as _;
|
use rayon::slice::ParallelSlice as _;
|
||||||
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
|
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use rustc_hash::FxBuildHasher;
|
|
||||||
|
|
||||||
use super::document_changes::DocumentChangeContext;
|
use super::document_changes::DocumentChangeContext;
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
@@ -161,12 +160,8 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
|
|||||||
if document_id != new_document_id {
|
if document_id != new_document_id {
|
||||||
Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
|
Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
|
||||||
} else {
|
} else {
|
||||||
let raw_new_doc = RawMap::from_raw_value_and_hasher(
|
let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
|
||||||
raw_new_doc,
|
.map_err(InternalError::SerdeJson)?;
|
||||||
FxBuildHasher,
|
|
||||||
doc_alloc,
|
|
||||||
)
|
|
||||||
.map_err(InternalError::SerdeJson)?;
|
|
||||||
|
|
||||||
Ok(Some(DocumentChange::Update(Update::create(
|
Ok(Some(DocumentChange::Update(Update::create(
|
||||||
docid,
|
docid,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
|
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashSet;
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use heed::{Database, RoTxn};
|
use heed::{Database, RoTxn};
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
@@ -12,7 +12,6 @@ use super::extract::{
|
|||||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||||
FacetKind, GeoExtractorData,
|
FacetKind, GeoExtractorData,
|
||||||
};
|
};
|
||||||
use crate::update::facet::new_incremental::FacetFieldIdChange;
|
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||||
@@ -81,16 +80,35 @@ where
|
|||||||
}
|
}
|
||||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||||
let current = database.get(&rtxn, key)?;
|
let current = database.get(&rtxn, key)?;
|
||||||
match merge_cbo_bitmaps(current, del, add)? {
|
|
||||||
Operation::Write(bitmap) => {
|
if let (Some(del), Some(current)) = (&del, ¤t) {
|
||||||
|
let current = CboRoaringBitmapCodec::deserialize_from(current).unwrap();
|
||||||
|
let diff = del - ¤t;
|
||||||
|
let external_ids = index.external_id_of(&rtxn, &diff).unwrap().into_iter().map(|id| id.unwrap()).collect::<Vec<_>>();
|
||||||
|
if !del.is_subset(¤t) {
|
||||||
|
eprintln!(
|
||||||
|
"======================== {:?}: {} -> c: {:?} d: {:?} a: {:?} extra: {:?} extra_external_ids: {:?}",
|
||||||
|
D::DATABASE,
|
||||||
|
D::DATABASE.stringify_key(key),
|
||||||
|
¤t,
|
||||||
|
del,
|
||||||
|
add,
|
||||||
|
diff,
|
||||||
|
external_ids
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match merge_cbo_bitmaps(current, del, add) {
|
||||||
|
Ok(Operation::Write(bitmap)) => {
|
||||||
docids_sender.write(key, &bitmap)?;
|
docids_sender.write(key, &bitmap)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
Operation::Delete => {
|
Ok(Operation::Delete) => {
|
||||||
docids_sender.delete(key)?;
|
docids_sender.delete(key)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
Operation::Ignore => Ok(()),
|
Ok(Operation::Ignore) => Ok(()),
|
||||||
|
Err(e) => Err(e),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
@@ -101,18 +119,12 @@ pub fn merge_and_send_facet_docids<'extractor>(
|
|||||||
mut caches: Vec<BalancedCaches<'extractor>>,
|
mut caches: Vec<BalancedCaches<'extractor>>,
|
||||||
database: FacetDatabases,
|
database: FacetDatabases,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &RoTxn,
|
|
||||||
docids_sender: FacetDocidsSender,
|
docids_sender: FacetDocidsSender,
|
||||||
) -> Result<FacetFieldIdsDelta> {
|
) -> Result<FacetFieldIdsDelta> {
|
||||||
let max_string_count = (index.facet_id_string_docids.len(rtxn)? / 500) as usize;
|
|
||||||
let max_number_count = (index.facet_id_f64_docids.len(rtxn)? / 500) as usize;
|
|
||||||
let max_string_count = max_string_count.clamp(1000, 100_000);
|
|
||||||
let max_number_count = max_number_count.clamp(1000, 100_000);
|
|
||||||
transpose_and_freeze_caches(&mut caches)?
|
transpose_and_freeze_caches(&mut caches)?
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.map(|frozen| {
|
.map(|frozen| {
|
||||||
let mut facet_field_ids_delta =
|
let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
|
||||||
FacetFieldIdsDelta::new(max_string_count, max_number_count);
|
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||||
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
||||||
@@ -133,10 +145,7 @@ pub fn merge_and_send_facet_docids<'extractor>(
|
|||||||
|
|
||||||
Ok(facet_field_ids_delta)
|
Ok(facet_field_ids_delta)
|
||||||
})
|
})
|
||||||
.reduce(
|
.reduce(|| Ok(FacetFieldIdsDelta::default()), |lhs, rhs| Ok(lhs?.merge(rhs?)))
|
||||||
|| Ok(FacetFieldIdsDelta::new(max_string_count, max_number_count)),
|
|
||||||
|lhs, rhs| Ok(lhs?.merge(rhs?)),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FacetDatabases<'a> {
|
pub struct FacetDatabases<'a> {
|
||||||
@@ -165,131 +174,60 @@ impl<'a> FacetDatabases<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Default)]
|
||||||
pub enum FacetFieldIdDelta {
|
|
||||||
Bulk,
|
|
||||||
Incremental(Vec<FacetFieldIdChange>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FacetFieldIdDelta {
|
|
||||||
fn push(&mut self, facet_value: &[u8], max_count: usize) {
|
|
||||||
*self = match std::mem::replace(self, FacetFieldIdDelta::Bulk) {
|
|
||||||
FacetFieldIdDelta::Bulk => FacetFieldIdDelta::Bulk,
|
|
||||||
FacetFieldIdDelta::Incremental(mut v) => {
|
|
||||||
if v.len() >= max_count {
|
|
||||||
FacetFieldIdDelta::Bulk
|
|
||||||
} else {
|
|
||||||
v.push(FacetFieldIdChange { facet_value: facet_value.into() });
|
|
||||||
FacetFieldIdDelta::Incremental(v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn merge(&mut self, rhs: Option<Self>, max_count: usize) {
|
|
||||||
let Some(rhs) = rhs else {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
*self = match (std::mem::replace(self, FacetFieldIdDelta::Bulk), rhs) {
|
|
||||||
(FacetFieldIdDelta::Bulk, _) | (_, FacetFieldIdDelta::Bulk) => FacetFieldIdDelta::Bulk,
|
|
||||||
(
|
|
||||||
FacetFieldIdDelta::Incremental(mut left),
|
|
||||||
FacetFieldIdDelta::Incremental(mut right),
|
|
||||||
) => {
|
|
||||||
if left.len() + right.len() >= max_count {
|
|
||||||
FacetFieldIdDelta::Bulk
|
|
||||||
} else {
|
|
||||||
left.append(&mut right);
|
|
||||||
FacetFieldIdDelta::Incremental(left)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct FacetFieldIdsDelta {
|
pub struct FacetFieldIdsDelta {
|
||||||
/// The field ids that have been modified
|
/// The field ids that have been modified
|
||||||
modified_facet_string_ids: HashMap<FieldId, FacetFieldIdDelta, rustc_hash::FxBuildHasher>,
|
modified_facet_string_ids: HashSet<FieldId>,
|
||||||
modified_facet_number_ids: HashMap<FieldId, FacetFieldIdDelta, rustc_hash::FxBuildHasher>,
|
modified_facet_number_ids: HashSet<FieldId>,
|
||||||
max_string_count: usize,
|
|
||||||
max_number_count: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FacetFieldIdsDelta {
|
impl FacetFieldIdsDelta {
|
||||||
pub fn new(max_string_count: usize, max_number_count: usize) -> Self {
|
fn register_facet_string_id(&mut self, field_id: FieldId) {
|
||||||
Self {
|
self.modified_facet_string_ids.insert(field_id);
|
||||||
max_string_count,
|
|
||||||
max_number_count,
|
|
||||||
modified_facet_string_ids: Default::default(),
|
|
||||||
modified_facet_number_ids: Default::default(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn register_facet_string_id(&mut self, field_id: FieldId, facet_value: &[u8]) {
|
fn register_facet_number_id(&mut self, field_id: FieldId) {
|
||||||
self.modified_facet_string_ids
|
self.modified_facet_number_ids.insert(field_id);
|
||||||
.entry(field_id)
|
|
||||||
.or_insert(FacetFieldIdDelta::Incremental(Default::default()))
|
|
||||||
.push(facet_value, self.max_string_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn register_facet_number_id(&mut self, field_id: FieldId, facet_value: &[u8]) {
|
|
||||||
self.modified_facet_number_ids
|
|
||||||
.entry(field_id)
|
|
||||||
.or_insert(FacetFieldIdDelta::Incremental(Default::default()))
|
|
||||||
.push(facet_value, self.max_number_count);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn register_from_key(&mut self, key: &[u8]) {
|
fn register_from_key(&mut self, key: &[u8]) {
|
||||||
let (facet_kind, field_id, facet_value) = self.extract_key_data(key);
|
let (facet_kind, field_id) = self.extract_key_data(key);
|
||||||
match (facet_kind, facet_value) {
|
match facet_kind {
|
||||||
(FacetKind::Number, Some(facet_value)) => {
|
FacetKind::Number => self.register_facet_number_id(field_id),
|
||||||
self.register_facet_number_id(field_id, facet_value)
|
FacetKind::String => self.register_facet_string_id(field_id),
|
||||||
}
|
|
||||||
(FacetKind::String, Some(facet_value)) => {
|
|
||||||
self.register_facet_string_id(field_id, facet_value)
|
|
||||||
}
|
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_key_data<'key>(&self, key: &'key [u8]) -> (FacetKind, FieldId, Option<&'key [u8]>) {
|
fn extract_key_data(&self, key: &[u8]) -> (FacetKind, FieldId) {
|
||||||
let facet_kind = FacetKind::from(key[0]);
|
let facet_kind = FacetKind::from(key[0]);
|
||||||
let field_id = FieldId::from_be_bytes([key[1], key[2]]);
|
let field_id = FieldId::from_be_bytes([key[1], key[2]]);
|
||||||
let facet_value = if key.len() >= 4 {
|
(facet_kind, field_id)
|
||||||
// level is also stored in the key at [3] (always 0)
|
}
|
||||||
Some(&key[4..])
|
|
||||||
} else {
|
pub fn modified_facet_string_ids(&self) -> Option<Vec<FieldId>> {
|
||||||
|
if self.modified_facet_string_ids.is_empty() {
|
||||||
None
|
None
|
||||||
};
|
} else {
|
||||||
|
Some(self.modified_facet_string_ids.iter().copied().collect())
|
||||||
(facet_kind, field_id, facet_value)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn consume_facet_string_delta(
|
pub fn modified_facet_number_ids(&self) -> Option<Vec<FieldId>> {
|
||||||
&mut self,
|
if self.modified_facet_number_ids.is_empty() {
|
||||||
) -> impl Iterator<Item = (FieldId, FacetFieldIdDelta)> + '_ {
|
None
|
||||||
self.modified_facet_string_ids.drain()
|
} else {
|
||||||
}
|
Some(self.modified_facet_number_ids.iter().copied().collect())
|
||||||
|
}
|
||||||
pub fn consume_facet_number_delta(
|
|
||||||
&mut self,
|
|
||||||
) -> impl Iterator<Item = (FieldId, FacetFieldIdDelta)> + '_ {
|
|
||||||
self.modified_facet_number_ids.drain()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge(mut self, rhs: Self) -> Self {
|
pub fn merge(mut self, rhs: Self) -> Self {
|
||||||
// rhs.max_xx_count is assumed to be equal to self.max_xx_count, and so gets unused
|
let Self { modified_facet_number_ids, modified_facet_string_ids } = rhs;
|
||||||
let Self { modified_facet_number_ids, modified_facet_string_ids, .. } = rhs;
|
modified_facet_number_ids.into_iter().for_each(|fid| {
|
||||||
modified_facet_number_ids.into_iter().for_each(|(fid, mut delta)| {
|
self.modified_facet_number_ids.insert(fid);
|
||||||
let old_delta = self.modified_facet_number_ids.remove(&fid);
|
|
||||||
delta.merge(old_delta, self.max_number_count);
|
|
||||||
self.modified_facet_number_ids.insert(fid, delta);
|
|
||||||
});
|
});
|
||||||
modified_facet_string_ids.into_iter().for_each(|(fid, mut delta)| {
|
modified_facet_string_ids.into_iter().for_each(|fid| {
|
||||||
let old_delta = self.modified_facet_string_ids.remove(&fid);
|
self.modified_facet_string_ids.insert(fid);
|
||||||
delta.merge(old_delta, self.max_string_count);
|
|
||||||
self.modified_facet_string_ids.insert(fid, delta);
|
|
||||||
});
|
});
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
@@ -315,21 +253,46 @@ fn merge_cbo_bitmaps(
|
|||||||
(None, Some(_del), Some(add)) => Ok(Operation::Write(add)),
|
(None, Some(_del), Some(add)) => Ok(Operation::Write(add)),
|
||||||
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
|
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
|
||||||
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
|
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
|
||||||
(Some(current), Some(del), add) => {
|
(Some(current), Some(mut del), add) => {
|
||||||
debug_assert!(
|
debug_assert!(
|
||||||
del.is_subset(¤t),
|
del.is_subset(¤t),
|
||||||
"del is not a subset of current, which must be impossible."
|
"del is not a subset of current, which must be impossible."
|
||||||
);
|
);
|
||||||
let output = match add {
|
let output = match add {
|
||||||
Some(add) => (¤t - (&del - &add)) | (add - del),
|
Some(add) => {
|
||||||
None => ¤t - del,
|
del -= &add;
|
||||||
|
|
||||||
|
if del.is_empty() {
|
||||||
|
if add.is_subset(¤t) {
|
||||||
|
// no changes, no allocation
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
// addition
|
||||||
|
Some(current | add)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if add.is_subset(¤t) {
|
||||||
|
// deletion only, no union
|
||||||
|
Some(current - del)
|
||||||
|
} else {
|
||||||
|
// deletion and addition
|
||||||
|
Some((current - del) | add)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// deletion only, no union
|
||||||
|
None => Some(current - del),
|
||||||
};
|
};
|
||||||
if output.is_empty() {
|
|
||||||
Ok(Operation::Delete)
|
match output {
|
||||||
} else if current == output {
|
Some(output) => {
|
||||||
Ok(Operation::Ignore)
|
if output.is_empty() {
|
||||||
} else {
|
Ok(Operation::Delete)
|
||||||
Ok(Operation::Write(output))
|
} else {
|
||||||
|
Ok(Operation::Write(output))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => Ok(Operation::Ignore),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ pub mod indexer;
|
|||||||
mod merger;
|
mod merger;
|
||||||
mod parallel_iterator_ext;
|
mod parallel_iterator_ext;
|
||||||
mod ref_cell_ext;
|
mod ref_cell_ext;
|
||||||
pub mod reindex;
|
|
||||||
pub(crate) mod steps;
|
pub(crate) mod steps;
|
||||||
pub(crate) mod thread_local;
|
pub(crate) mod thread_local;
|
||||||
pub mod vector_document;
|
pub mod vector_document;
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
use heed::RwTxn;
|
|
||||||
|
|
||||||
use super::document::{Document, DocumentFromDb};
|
|
||||||
use crate::progress::{self, AtomicSubStep, Progress};
|
|
||||||
use crate::{FieldDistribution, Index, Result};
|
|
||||||
|
|
||||||
pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progress) -> Result<()> {
|
|
||||||
let mut distribution = FieldDistribution::new();
|
|
||||||
|
|
||||||
let document_count = index.number_of_documents(wtxn)?;
|
|
||||||
let field_id_map = index.fields_ids_map(wtxn)?;
|
|
||||||
|
|
||||||
let (update_document_count, sub_step) =
|
|
||||||
AtomicSubStep::<progress::Document>::new(document_count as u32);
|
|
||||||
progress.update_progress(sub_step);
|
|
||||||
|
|
||||||
let docids = index.documents_ids(wtxn)?;
|
|
||||||
|
|
||||||
for docid in docids {
|
|
||||||
update_document_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
|
||||||
|
|
||||||
let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
let geo_iter = document.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
|
||||||
for res in document.iter_top_level_fields().chain(geo_iter) {
|
|
||||||
let (field_name, _) = res?;
|
|
||||||
if let Some(count) = distribution.get_mut(field_name) {
|
|
||||||
*count += 1;
|
|
||||||
} else {
|
|
||||||
distribution.insert(field_name.to_owned(), 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
index.put_field_distribution(wtxn, &distribution)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@@ -1,12 +1,8 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
|
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
|
|
||||||
use crate::progress::Step;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
||||||
#[repr(u8)]
|
#[repr(u16)]
|
||||||
pub enum IndexingStep {
|
pub enum Step {
|
||||||
PreparingPayloads,
|
PreparingPayloads,
|
||||||
ExtractingDocuments,
|
ExtractingDocuments,
|
||||||
ExtractingFacets,
|
ExtractingFacets,
|
||||||
@@ -22,31 +18,30 @@ pub enum IndexingStep {
|
|||||||
Finalizing,
|
Finalizing,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Step for IndexingStep {
|
impl Step {
|
||||||
fn name(&self) -> Cow<'static, str> {
|
pub fn name(&self) -> &'static str {
|
||||||
match self {
|
match self {
|
||||||
IndexingStep::PreparingPayloads => "preparing update file",
|
Step::PreparingPayloads => "preparing update file",
|
||||||
IndexingStep::ExtractingDocuments => "extracting documents",
|
Step::ExtractingDocuments => "extracting documents",
|
||||||
IndexingStep::ExtractingFacets => "extracting facets",
|
Step::ExtractingFacets => "extracting facets",
|
||||||
IndexingStep::ExtractingWords => "extracting words",
|
Step::ExtractingWords => "extracting words",
|
||||||
IndexingStep::ExtractingWordProximity => "extracting word proximity",
|
Step::ExtractingWordProximity => "extracting word proximity",
|
||||||
IndexingStep::ExtractingEmbeddings => "extracting embeddings",
|
Step::ExtractingEmbeddings => "extracting embeddings",
|
||||||
IndexingStep::WritingGeoPoints => "writing geo points",
|
Step::WritingGeoPoints => "writing geo points",
|
||||||
IndexingStep::WritingToDatabase => "writing to database",
|
Step::WritingToDatabase => "writing to database",
|
||||||
IndexingStep::WaitingForExtractors => "waiting for extractors",
|
Step::WaitingForExtractors => "waiting for extractors",
|
||||||
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||||
IndexingStep::PostProcessingFacets => "post-processing facets",
|
Step::PostProcessingFacets => "post-processing facets",
|
||||||
IndexingStep::PostProcessingWords => "post-processing words",
|
Step::PostProcessingWords => "post-processing words",
|
||||||
IndexingStep::Finalizing => "finalizing",
|
Step::Finalizing => "finalizing",
|
||||||
}
|
}
|
||||||
.into()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn current(&self) -> u32 {
|
pub fn finished_steps(self) -> u16 {
|
||||||
*self as u32
|
self as u16
|
||||||
}
|
}
|
||||||
|
|
||||||
fn total(&self) -> u32 {
|
pub const fn total_steps() -> u16 {
|
||||||
Self::CARDINALITY as u32
|
Self::CARDINALITY as u16
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::RawMap;
|
|
||||||
use deserr::{Deserr, IntoValue};
|
use deserr::{Deserr, IntoValue};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rustc_hash::FxBuildHasher;
|
use raw_collections::RawMap;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@@ -85,7 +84,7 @@ pub struct VectorDocumentFromDb<'t> {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
embedding_config: Vec<IndexEmbeddingConfig>,
|
embedding_config: Vec<IndexEmbeddingConfig>,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
vectors_field: Option<RawMap<'t, FxBuildHasher>>,
|
vectors_field: Option<RawMap<'t>>,
|
||||||
rtxn: &'t RoTxn<'t>,
|
rtxn: &'t RoTxn<'t>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
}
|
}
|
||||||
@@ -103,10 +102,9 @@ impl<'t> VectorDocumentFromDb<'t> {
|
|||||||
};
|
};
|
||||||
let vectors = document.vectors_field()?;
|
let vectors = document.vectors_field()?;
|
||||||
let vectors_field = match vectors {
|
let vectors_field = match vectors {
|
||||||
Some(vectors) => Some(
|
Some(vectors) => {
|
||||||
RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
|
Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?)
|
||||||
.map_err(InternalError::SerdeJson)?,
|
}
|
||||||
),
|
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -222,7 +220,7 @@ fn entry_from_raw_value(
|
|||||||
|
|
||||||
pub struct VectorDocumentFromVersions<'doc> {
|
pub struct VectorDocumentFromVersions<'doc> {
|
||||||
external_document_id: &'doc str,
|
external_document_id: &'doc str,
|
||||||
vectors: RawMap<'doc, FxBuildHasher>,
|
vectors: RawMap<'doc>,
|
||||||
embedders: &'doc EmbeddingConfigs,
|
embedders: &'doc EmbeddingConfigs,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -235,8 +233,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
|
|||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let document = DocumentFromVersions::new(versions);
|
let document = DocumentFromVersions::new(versions);
|
||||||
if let Some(vectors_field) = document.vectors_field()? {
|
if let Some(vectors_field) = document.vectors_field()? {
|
||||||
let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump)
|
let vectors =
|
||||||
.map_err(UserError::SerdeJson)?;
|
RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
|
||||||
Ok(Some(Self { external_document_id, vectors, embedders }))
|
Ok(Some(Self { external_document_id, vectors, embedders }))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ use bumpalo::Bump;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::documents::mmap_from_objects;
|
use milli::documents::mmap_from_objects;
|
||||||
use milli::progress::Progress;
|
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@@ -58,7 +57,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -73,7 +72,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ use bumpalo::Bump;
|
|||||||
use either::{Either, Left, Right};
|
use either::{Either, Left, Right};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{btreemap, hashset};
|
use maplit::{btreemap, hashset};
|
||||||
use milli::progress::Progress;
|
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@@ -91,7 +90,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -110,7 +109,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ use bumpalo::Bump;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::progress::Progress;
|
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@@ -327,7 +326,7 @@ fn criteria_ascdesc() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -342,7 +341,7 @@ fn criteria_ascdesc() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ use std::collections::BTreeSet;
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::documents::mmap_from_objects;
|
use milli::documents::mmap_from_objects;
|
||||||
use milli::progress::Progress;
|
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@@ -136,7 +135,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
Progress::default(),
|
&|_progress| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -151,7 +150,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&|_| (),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -1,56 +1,23 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use anyhow::{bail, Context as _};
|
use anyhow::{bail, Context as _};
|
||||||
use tokio::process::Command;
|
|
||||||
use tokio::time;
|
|
||||||
|
|
||||||
use super::assets::Asset;
|
use super::assets::Asset;
|
||||||
use super::client::Client;
|
use super::client::Client;
|
||||||
use super::workload::Workload;
|
use super::workload::Workload;
|
||||||
|
|
||||||
pub async fn kill(mut meilisearch: tokio::process::Child) {
|
pub async fn kill(mut meilisearch: tokio::process::Child) {
|
||||||
let Some(id) = meilisearch.id() else { return };
|
if let Err(error) = meilisearch.kill().await {
|
||||||
|
tracing::warn!(
|
||||||
match Command::new("kill").args(["--signal=TERM", &id.to_string()]).spawn() {
|
error = &error as &dyn std::error::Error,
|
||||||
Ok(mut cmd) => {
|
"while terminating Meilisearch server"
|
||||||
let Err(error) = cmd.wait().await else { return };
|
)
|
||||||
tracing::warn!(
|
|
||||||
error = &error as &dyn std::error::Error,
|
|
||||||
"while awaiting the Meilisearch server kill"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
tracing::warn!(
|
|
||||||
error = &error as &dyn std::error::Error,
|
|
||||||
"while terminating Meilisearch server with a kill -s TERM"
|
|
||||||
);
|
|
||||||
if let Err(error) = meilisearch.kill().await {
|
|
||||||
tracing::warn!(
|
|
||||||
error = &error as &dyn std::error::Error,
|
|
||||||
"while terminating Meilisearch server"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match time::timeout(Duration::from_secs(5), meilisearch.wait()).await {
|
|
||||||
Ok(_) => (),
|
|
||||||
Err(_) => {
|
|
||||||
if let Err(error) = meilisearch.kill().await {
|
|
||||||
tracing::warn!(
|
|
||||||
error = &error as &dyn std::error::Error,
|
|
||||||
"while terminating Meilisearch server"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument]
|
#[tracing::instrument]
|
||||||
pub async fn build() -> anyhow::Result<()> {
|
pub async fn build() -> anyhow::Result<()> {
|
||||||
let mut command = Command::new("cargo");
|
let mut command = tokio::process::Command::new("cargo");
|
||||||
command.arg("build").arg("--release").arg("-p").arg("meilisearch");
|
command.arg("build").arg("--release").arg("-p").arg("meilisearch");
|
||||||
|
|
||||||
command.kill_on_drop(true);
|
command.kill_on_drop(true);
|
||||||
@@ -70,8 +37,17 @@ pub async fn start(
|
|||||||
master_key: Option<&str>,
|
master_key: Option<&str>,
|
||||||
workload: &Workload,
|
workload: &Workload,
|
||||||
asset_folder: &str,
|
asset_folder: &str,
|
||||||
mut command: Command,
|
|
||||||
) -> anyhow::Result<tokio::process::Child> {
|
) -> anyhow::Result<tokio::process::Child> {
|
||||||
|
let mut command = tokio::process::Command::new("cargo");
|
||||||
|
command
|
||||||
|
.arg("run")
|
||||||
|
.arg("--release")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("meilisearch")
|
||||||
|
.arg("--bin")
|
||||||
|
.arg("meilisearch")
|
||||||
|
.arg("--");
|
||||||
|
|
||||||
command.arg("--db-path").arg("./_xtask_benchmark.ms");
|
command.arg("--db-path").arg("./_xtask_benchmark.ms");
|
||||||
if let Some(master_key) = master_key {
|
if let Some(master_key) = master_key {
|
||||||
command.arg("--master-key").arg(master_key);
|
command.arg("--master-key").arg(master_key);
|
||||||
@@ -110,7 +86,7 @@ async fn wait_for_health(
|
|||||||
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
time::sleep(Duration::from_millis(500)).await;
|
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||||
// check whether the Meilisearch instance exited early (cut the wait)
|
// check whether the Meilisearch instance exited early (cut the wait)
|
||||||
if let Some(exit_code) =
|
if let Some(exit_code) =
|
||||||
meilisearch.try_wait().context("cannot check Meilisearch server process status")?
|
meilisearch.try_wait().context("cannot check Meilisearch server process status")?
|
||||||
|
|||||||
@@ -86,12 +86,6 @@ pub struct BenchDeriveArgs {
|
|||||||
/// The maximum time in seconds we allow for fetching the task queue before timing out.
|
/// The maximum time in seconds we allow for fetching the task queue before timing out.
|
||||||
#[arg(long, default_value_t = 60)]
|
#[arg(long, default_value_t = 60)]
|
||||||
tasks_queue_timeout_secs: u64,
|
tasks_queue_timeout_secs: u64,
|
||||||
|
|
||||||
/// The path to the binary to run.
|
|
||||||
///
|
|
||||||
/// If unspecified, runs `cargo run` after building Meilisearch with `cargo build`.
|
|
||||||
#[arg(long)]
|
|
||||||
binary_path: Option<PathBuf>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||||
@@ -176,7 +170,6 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
|||||||
args.master_key.as_deref(),
|
args.master_key.as_deref(),
|
||||||
workload,
|
workload,
|
||||||
&args,
|
&args,
|
||||||
args.binary_path.as_deref(),
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{Seek as _, Write as _};
|
use std::io::{Seek as _, Write as _};
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
use anyhow::{bail, Context as _};
|
use anyhow::{bail, Context as _};
|
||||||
use futures_util::TryStreamExt as _;
|
use futures_util::TryStreamExt as _;
|
||||||
@@ -86,13 +85,13 @@ pub async fn execute(
|
|||||||
master_key: Option<&str>,
|
master_key: Option<&str>,
|
||||||
workload: Workload,
|
workload: Workload,
|
||||||
args: &BenchDeriveArgs,
|
args: &BenchDeriveArgs,
|
||||||
binary_path: Option<&Path>,
|
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
|
assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
|
||||||
|
|
||||||
let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;
|
let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;
|
||||||
|
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
|
|
||||||
for i in 0..workload.run_count {
|
for i in 0..workload.run_count {
|
||||||
tasks.push(
|
tasks.push(
|
||||||
execute_run(
|
execute_run(
|
||||||
@@ -103,7 +102,6 @@ pub async fn execute(
|
|||||||
master_key,
|
master_key,
|
||||||
&workload,
|
&workload,
|
||||||
args,
|
args,
|
||||||
binary_path,
|
|
||||||
i,
|
i,
|
||||||
)
|
)
|
||||||
.await?,
|
.await?,
|
||||||
@@ -111,6 +109,7 @@ pub async fn execute(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut reports = Vec::with_capacity(workload.run_count as usize);
|
let mut reports = Vec::with_capacity(workload.run_count as usize);
|
||||||
|
|
||||||
for task in tasks {
|
for task in tasks {
|
||||||
reports.push(
|
reports.push(
|
||||||
task.await
|
task.await
|
||||||
@@ -134,31 +133,13 @@ async fn execute_run(
|
|||||||
master_key: Option<&str>,
|
master_key: Option<&str>,
|
||||||
workload: &Workload,
|
workload: &Workload,
|
||||||
args: &BenchDeriveArgs,
|
args: &BenchDeriveArgs,
|
||||||
binary_path: Option<&Path>,
|
|
||||||
run_number: u16,
|
run_number: u16,
|
||||||
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
||||||
meili_process::delete_db();
|
meili_process::delete_db();
|
||||||
|
|
||||||
let run_command = match binary_path {
|
meili_process::build().await?;
|
||||||
Some(binary_path) => tokio::process::Command::new(binary_path),
|
|
||||||
None => {
|
|
||||||
meili_process::build().await?;
|
|
||||||
let mut command = tokio::process::Command::new("cargo");
|
|
||||||
command
|
|
||||||
.arg("run")
|
|
||||||
.arg("--release")
|
|
||||||
.arg("-p")
|
|
||||||
.arg("meilisearch")
|
|
||||||
.arg("--bin")
|
|
||||||
.arg("meilisearch")
|
|
||||||
.arg("--");
|
|
||||||
command
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let meilisearch =
|
let meilisearch =
|
||||||
meili_process::start(meili_client, master_key, workload, &args.asset_folder, run_command)
|
meili_process::start(meili_client, master_key, workload, &args.asset_folder).await?;
|
||||||
.await?;
|
|
||||||
|
|
||||||
let processor = run_commands(
|
let processor = run_commands(
|
||||||
dashboard_client,
|
dashboard_client,
|
||||||
|
|||||||
Reference in New Issue
Block a user