mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-26 07:40:31 +00:00
Compare commits
111 Commits
v1.12.0-rc
...
replace-ha
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9762d02900 | ||
|
|
9a9383643f | ||
|
|
cac355bfa7 | ||
|
|
9020a50df8 | ||
|
|
52843123d4 | ||
|
|
6298db5bea | ||
|
|
a003a0934a | ||
|
|
3a11e39c01 | ||
|
|
5f896b1050 | ||
|
|
d0c4e6da6b | ||
|
|
2da5584bb5 | ||
|
|
b7eb802ae6 | ||
|
|
2e32d0474c | ||
|
|
cb99ac6f7e | ||
|
|
be411435f5 | ||
|
|
29ef164530 | ||
|
|
739c52a3cd | ||
|
|
8388698993 | ||
|
|
7458f0386c | ||
|
|
3ded069042 | ||
|
|
261d2ceb06 | ||
|
|
1a17e2e572 | ||
|
|
5b8cd68abe | ||
|
|
5ce9acb0b9 | ||
|
|
54341c2e80 | ||
|
|
96831ed9bb | ||
|
|
0459b1a242 | ||
|
|
8ecb726683 | ||
|
|
297e72e262 | ||
|
|
0ad2f57a92 | ||
|
|
71d53f413f | ||
|
|
054622bd16 | ||
|
|
e905a72d73 | ||
|
|
2e879c1df8 | ||
|
|
d040aff101 | ||
|
|
5e30731cad | ||
|
|
beeb31ce41 | ||
|
|
057143214d | ||
|
|
6a1d26a60c | ||
|
|
d78f4666a0 | ||
|
|
a439fa3e1a | ||
|
|
767259be7e | ||
|
|
e9f34fb4b1 | ||
|
|
d5c07ef7b3 | ||
|
|
5e218f3f4d | ||
|
|
bcab61ab1d | ||
|
|
263c5a348e | ||
|
|
be7d2fbe63 | ||
|
|
f7f9a131e4 | ||
|
|
5df5eb2db2 | ||
|
|
30eb0e5b5b | ||
|
|
5b860cb989 | ||
|
|
76d0623b11 | ||
|
|
db4eaf4d2d | ||
|
|
13f21206a6 | ||
|
|
14ee7aa84c | ||
|
|
8a35cd1743 | ||
|
|
8d33af1dff | ||
|
|
3c7ac093d3 | ||
|
|
d49d127863 | ||
|
|
b57dd5c58e | ||
|
|
90b428a8c3 | ||
|
|
096a28656e | ||
|
|
3dc87f5baa | ||
|
|
cc4bd54669 | ||
|
|
5383f41bba | ||
|
|
58eab9a018 | ||
|
|
9f36ffcbdb | ||
|
|
68c4717e21 | ||
|
|
5c488e20cc | ||
|
|
da650f834e | ||
|
|
e83534a430 | ||
|
|
98d4a2909e | ||
|
|
a514ce472a | ||
|
|
cc63802115 | ||
|
|
acec45ad7c | ||
|
|
08d6413365 | ||
|
|
70802eb7c7 | ||
|
|
6ac5b3b136 | ||
|
|
e1e76f39d0 | ||
|
|
2094ce8a9a | ||
|
|
8442db8101 | ||
|
|
79671c9faa | ||
|
|
a2f64f6552 | ||
|
|
fde2e0691c | ||
|
|
18a9af353c | ||
|
|
aae0dc715d | ||
|
|
d0b2c0a523 | ||
|
|
2e896f30a5 | ||
|
|
8f57b4fdf4 | ||
|
|
f014e78684 | ||
|
|
9008ecda3d | ||
|
|
d7bcfb2d19 | ||
|
|
fb66fec398 | ||
|
|
fa15be5bc4 | ||
|
|
aa460819a7 | ||
|
|
e241f91285 | ||
|
|
d66dc363ed | ||
|
|
5560452ef9 | ||
|
|
d9df7e00e1 | ||
|
|
b4fb2dabd4 | ||
|
|
5606679c53 | ||
|
|
a3103f347e | ||
|
|
25aac45fc7 | ||
|
|
510ca99996 | ||
|
|
8924d486db | ||
|
|
e0c3f3d560 | ||
|
|
cd796b0f4b | ||
|
|
3a8051866a | ||
|
|
72ba353498 | ||
|
|
0dd321afc7 |
59
Cargo.lock
generated
59
Cargo.lock
generated
@@ -489,6 +489,11 @@ version = "0.22.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
||||
|
||||
[[package]]
|
||||
name = "bbqueue"
|
||||
version = "0.5.1"
|
||||
source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.12.0"
|
||||
@@ -969,8 +974,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.9.1"
|
||||
source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf8921fe4d53ab8f9e8f9b72ce6f91726cfc40fffab1243d27db406b5e2e9cc2"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"csv",
|
||||
@@ -1245,19 +1251,6 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-queue",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.13"
|
||||
@@ -1917,6 +1910,15 @@ dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flume"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
|
||||
dependencies = [
|
||||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@@ -2263,8 +2265,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "grenad"
|
||||
version = "0.4.7"
|
||||
source = "git+https://github.com/meilisearch/grenad?branch=various-improvements#58ac87d852413571102f44c5e55ca13509a3f1a0"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e2ac9baf835ee2a7f0622a5617792ced6f65af25994078c343d429431ef2bbc"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -2614,7 +2617,7 @@ dependencies = [
|
||||
"big_s",
|
||||
"bincode",
|
||||
"bumpalo",
|
||||
"crossbeam",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"derive_builder 0.20.0",
|
||||
"dump",
|
||||
@@ -2708,7 +2711,8 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
|
||||
[[package]]
|
||||
name = "irg-kvariants"
|
||||
version = "0.1.1"
|
||||
source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26"
|
||||
dependencies = [
|
||||
"csv",
|
||||
"once_cell",
|
||||
@@ -3608,6 +3612,7 @@ version = "1.12.0"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bbqueue",
|
||||
"big_s",
|
||||
"bimap",
|
||||
"bincode",
|
||||
@@ -3627,6 +3632,7 @@ dependencies = [
|
||||
"enum-iterator",
|
||||
"filter-parser",
|
||||
"flatten-serde-json",
|
||||
"flume",
|
||||
"fst",
|
||||
"fxhash",
|
||||
"geoutils",
|
||||
@@ -3912,7 +3918,8 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "obkv"
|
||||
version = "0.3.0"
|
||||
source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#ce535874008ecac554f02e0c670e6caf62134d6b"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
@@ -4739,8 +4746,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.6"
|
||||
source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#8ff028e484fb6192a0acf5a669eaf18c30cada6e"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -5182,6 +5190,9 @@ name = "spin"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spm_precompiled"
|
||||
@@ -6015,9 +6026,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wana_kana"
|
||||
version = "3.0.0"
|
||||
version = "4.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "477976a5c56fb7b014795df5a2ce08d2de8bcd4d5980844c5bd3978a7fd1c30b"
|
||||
checksum = "a74666202acfcb4f9b995be2e3e9f7f530deb65e05a1407b8d0b30c9c451238a"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"itertools 0.10.5",
|
||||
|
||||
@@ -43,6 +43,3 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[patch.crates-io]
|
||||
roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
|
||||
|
||||
@@ -24,7 +24,7 @@ tempfile = "3.14.0"
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.6"
|
||||
roaring = "0.10.7"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
|
||||
@@ -16,6 +16,7 @@ use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@@ -157,6 +158,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -223,6 +225,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -267,6 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -335,6 +339,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -411,6 +416,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -455,6 +461,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -495,6 +502,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -562,6 +570,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -628,6 +637,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -694,6 +704,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -759,6 +770,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -803,6 +815,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -870,6 +883,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -946,6 +960,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -991,6 +1006,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1032,6 +1048,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1098,6 +1115,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1163,6 +1181,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1207,6 +1226,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1274,6 +1294,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1321,6 +1342,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1385,6 +1407,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1429,6 +1452,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1469,6 +1493,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1558,6 +1583,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1648,6 +1674,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1730,6 +1757,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1796,6 +1824,7 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1861,6 +1890,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1905,6 +1935,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1972,6 +2003,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
||||
@@ -117,6 +117,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -17,7 +17,7 @@ http = "1.1.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
|
||||
@@ -292,6 +292,8 @@ pub(crate) mod test {
|
||||
embedders: Setting::NotSet,
|
||||
search_cutoff_ms: Setting::NotSet,
|
||||
localized_attributes: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
};
|
||||
settings.check()
|
||||
|
||||
@@ -382,6 +382,8 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
embedders: v6::Setting::NotSet,
|
||||
localized_attributes: v6::Setting::NotSet,
|
||||
search_cutoff_ms: v6::Setting::NotSet,
|
||||
facet_search: v6::Setting::NotSet,
|
||||
prefix_search: v6::Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,6 +135,7 @@ fn main() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -24,7 +24,7 @@ meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.6.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
@@ -45,7 +45,7 @@ bumpalo = "3.16.0"
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
crossbeam-channel = "0.5.13"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
@@ -115,13 +115,6 @@ pub enum BatchKind {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
SettingsAndDocumentOperation {
|
||||
settings_ids: Vec<TaskId>,
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
@@ -146,7 +139,6 @@ impl BatchKind {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
@@ -154,10 +146,7 @@ impl BatchKind {
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. }
|
||||
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
|
||||
Some(primary_key.as_deref())
|
||||
}
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -275,8 +264,7 @@ impl BatchKind {
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
|
||||
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
@@ -356,15 +344,9 @@ impl BatchKind {
|
||||
) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids: vec![id],
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
}),
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
@@ -477,63 +459,7 @@ impl BatchKind {
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
other: operation_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
|
||||
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method: ReplaceDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk2,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method: UpdateDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk2,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// But we can't batch a settings and a doc op with another doc op
|
||||
// this MUST be AFTER the two previous branch
|
||||
(
|
||||
this @ BatchKind::SettingsAndDocumentOperation { .. },
|
||||
K::DocumentDeletion { .. } | K::DocumentImport { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
@@ -808,30 +734,30 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_batch_with_settings() {
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// We ensure this kind of batch doesn't batch with forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -859,8 +785,8 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -907,50 +833,6 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
// Then the mixed cases.
|
||||
// The index already exists, whatever is the right of the tasks it shouldn't change the result.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
|
||||
// When the index doesn't exists yet it's more complicated.
|
||||
// Either the first task we encounter create it, in which case we can create a big batch with everything.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
// The right of the tasks following isn't really important.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
// Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index.
|
||||
// that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc.
|
||||
// All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whit what
|
||||
// follows because we first need to process the erronous batch.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -959,13 +841,13 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
|
||||
@@ -104,7 +104,6 @@ pub(crate) enum IndexOperation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
documents_counts: Vec<u64>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
@@ -130,19 +129,6 @@ pub(crate) enum IndexOperation {
|
||||
index_uid: String,
|
||||
cleared_tasks: Vec<Task>,
|
||||
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
settings_tasks: Vec<Task>,
|
||||
},
|
||||
SettingsAndDocumentOperation {
|
||||
index_uid: String,
|
||||
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
documents_counts: Vec<u64>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
document_import_tasks: Vec<Task>,
|
||||
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
settings_tasks: Vec<Task>,
|
||||
@@ -174,12 +160,7 @@ impl Batch {
|
||||
IndexOperation::DocumentEdition { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation {
|
||||
document_import_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
}
|
||||
| IndexOperation::DocumentClearAndSetting {
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
cleared_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
@@ -239,8 +220,7 @@ impl IndexOperation {
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
|
||||
| IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -262,9 +242,6 @@ impl fmt::Display for IndexOperation {
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::SettingsAndDocumentOperation")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -330,21 +307,14 @@ impl IndexScheduler {
|
||||
})
|
||||
.flatten();
|
||||
|
||||
let mut documents_counts = Vec::new();
|
||||
let mut operations = Vec::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
content_file,
|
||||
documents_count,
|
||||
..
|
||||
} => {
|
||||
documents_counts.push(documents_count);
|
||||
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => {
|
||||
operations.push(DocumentOperation::Add(content_file));
|
||||
}
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
documents_counts.push(documents_ids.len() as u64);
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -356,7 +326,6 @@ impl IndexScheduler {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
@@ -441,67 +410,6 @@ impl IndexScheduler {
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
} => {
|
||||
let settings = self.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid.clone(),
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?;
|
||||
|
||||
let document_import = self.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid.clone(),
|
||||
BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
},
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?;
|
||||
|
||||
match (document_import, settings) {
|
||||
(
|
||||
Some(Batch::IndexOperation {
|
||||
op:
|
||||
IndexOperation::DocumentOperation {
|
||||
primary_key,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks: document_import_tasks,
|
||||
..
|
||||
},
|
||||
..
|
||||
}),
|
||||
Some(Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { settings, tasks: settings_tasks, .. },
|
||||
..
|
||||
}),
|
||||
) => Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::SettingsAndDocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
document_import_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
},
|
||||
must_create_index,
|
||||
})),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
BatchKind::IndexCreation { id } => {
|
||||
let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
@@ -1024,7 +932,13 @@ impl IndexScheduler {
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
index_wtxn.commit()?;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||
let _entered = span.enter();
|
||||
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
@@ -1298,7 +1212,6 @@ impl IndexScheduler {
|
||||
index_uid: _,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts: _,
|
||||
operations,
|
||||
mut tasks,
|
||||
} => {
|
||||
@@ -1345,7 +1258,10 @@ impl IndexScheduler {
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
@@ -1393,21 +1309,19 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
if tasks.iter().any(|res| res.error.is_none()) {
|
||||
pool.install(|| {
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
@@ -1483,34 +1397,34 @@ impl IndexScheduler {
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
pool.install(|| {
|
||||
let indexer =
|
||||
UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||
let document_changes = indexer.into_changes(&primary_key)?;
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||
let document_changes =
|
||||
pool.install(|| indexer.into_changes(&primary_key)).unwrap()?;
|
||||
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // cannot change primary key in DocumentEdition
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
|
||||
Result::Ok(())
|
||||
})
|
||||
.unwrap()?;
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // cannot change primary key in DocumentEdition
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
|
||||
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
@@ -1635,7 +1549,10 @@ impl IndexScheduler {
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
@@ -1646,21 +1563,19 @@ impl IndexScheduler {
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
|
||||
pool.install(|| {
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
|
||||
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
@@ -1688,43 +1603,6 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
document_import_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
} => {
|
||||
let settings_tasks = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::Settings {
|
||||
index_uid: index_uid.clone(),
|
||||
settings,
|
||||
tasks: settings_tasks,
|
||||
},
|
||||
)?;
|
||||
|
||||
let mut import_tasks = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks: document_import_tasks,
|
||||
},
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
tasks.append(&mut import_tasks);
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
cleared_tasks,
|
||||
|
||||
@@ -407,7 +407,7 @@ pub struct IndexScheduler {
|
||||
///
|
||||
/// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation.
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
|
||||
test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
|
||||
|
||||
/// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler.
|
||||
///
|
||||
@@ -476,7 +476,7 @@ impl IndexScheduler {
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub fn new(
|
||||
options: IndexSchedulerOptions,
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
|
||||
#[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>,
|
||||
) -> Result<Self> {
|
||||
std::fs::create_dir_all(&options.tasks_path)?;
|
||||
@@ -1440,7 +1440,7 @@ impl IndexScheduler {
|
||||
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
}
|
||||
@@ -1738,11 +1738,8 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
// We must re-add the canceled task so they're part of the same batch.
|
||||
// processed.processing |= canceled;
|
||||
ids |= canceled;
|
||||
|
||||
self.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1750,6 +1747,10 @@ impl IndexScheduler {
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
@@ -2237,7 +2238,7 @@ mod tests {
|
||||
use std::time::Instant;
|
||||
|
||||
use big_s::S;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use crossbeam_channel::RecvTimeoutError;
|
||||
use file_store::File;
|
||||
use insta::assert_json_snapshot;
|
||||
use maplit::btreeset;
|
||||
@@ -2289,7 +2290,7 @@ mod tests {
|
||||
configuration: impl Fn(&mut IndexSchedulerOptions),
|
||||
) -> (Self, IndexSchedulerHandle) {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let (sender, receiver) = crossbeam::channel::bounded(0);
|
||||
let (sender, receiver) = crossbeam_channel::bounded(0);
|
||||
|
||||
let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
|
||||
|
||||
@@ -2421,7 +2422,7 @@ mod tests {
|
||||
pub struct IndexSchedulerHandle {
|
||||
_tempdir: TempDir,
|
||||
index_scheduler: IndexScheduler,
|
||||
test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
|
||||
test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>,
|
||||
last_breakpoint: Breakpoint,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||
3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -106,7 +106,7 @@ impl ProcessingBatch {
|
||||
self.stats.total_nb_tasks = 0;
|
||||
}
|
||||
|
||||
/// Update the timestamp of the tasks and the inner structure of this sturcture.
|
||||
/// Update the timestamp of the tasks and the inner structure of this structure.
|
||||
pub fn update(&mut self, task: &mut Task) {
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state
|
||||
|
||||
@@ -17,7 +17,7 @@ hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
|
||||
@@ -25,7 +25,7 @@ fst = "0.4.7"
|
||||
memmap2 = "0.9.4"
|
||||
milli = { path = "../milli" }
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.120"
|
||||
|
||||
@@ -214,7 +214,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
|
||||
// We memory map to be able to deserialize into a RawMap that
|
||||
// does not allocate when possible and only materialize the first/top level.
|
||||
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
|
||||
let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB
|
||||
let mut doc_alloc = Bump::with_capacity(1024 * 1024); // 1MiB
|
||||
|
||||
let mut out = BufWriter::new(output);
|
||||
let mut deserializer = serde_json::Deserializer::from_slice(&input);
|
||||
|
||||
@@ -290,6 +290,8 @@ InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFacetSearch , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPrefixSearch , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||
|
||||
@@ -8,7 +8,7 @@ use std::str::FromStr;
|
||||
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use fst::IntoStreamer;
|
||||
use milli::index::IndexEmbeddingConfig;
|
||||
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
|
||||
use milli::proximity::ProximityPrecision;
|
||||
use milli::update::Setting;
|
||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||
@@ -202,6 +202,12 @@ pub struct Settings<T> {
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsLocalizedAttributes>)]
|
||||
pub localized_attributes: Setting<Vec<LocalizedAttributesRuleView>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsFacetSearch>)]
|
||||
pub facet_search: Setting<bool>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsPrefixSearch>)]
|
||||
pub prefix_search: Setting<PrefixSearchSettings>,
|
||||
|
||||
#[serde(skip)]
|
||||
#[deserr(skip)]
|
||||
@@ -266,6 +272,8 @@ impl Settings<Checked> {
|
||||
embedders: Setting::Reset,
|
||||
search_cutoff_ms: Setting::Reset,
|
||||
localized_attributes: Setting::Reset,
|
||||
facet_search: Setting::Reset,
|
||||
prefix_search: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -290,6 +298,8 @@ impl Settings<Checked> {
|
||||
embedders,
|
||||
search_cutoff_ms,
|
||||
localized_attributes: localized_attributes_rules,
|
||||
facet_search,
|
||||
prefix_search,
|
||||
_kind,
|
||||
} = self;
|
||||
|
||||
@@ -312,6 +322,8 @@ impl Settings<Checked> {
|
||||
embedders,
|
||||
search_cutoff_ms,
|
||||
localized_attributes: localized_attributes_rules,
|
||||
facet_search,
|
||||
prefix_search,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -360,6 +372,8 @@ impl Settings<Unchecked> {
|
||||
embedders: self.embedders,
|
||||
search_cutoff_ms: self.search_cutoff_ms,
|
||||
localized_attributes: self.localized_attributes,
|
||||
facet_search: self.facet_search,
|
||||
prefix_search: self.prefix_search,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -433,6 +447,8 @@ impl Settings<Unchecked> {
|
||||
Setting::Set(this)
|
||||
}
|
||||
},
|
||||
prefix_search: other.prefix_search.or(self.prefix_search),
|
||||
facet_search: other.facet_search.or(self.facet_search),
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -469,6 +485,8 @@ pub fn apply_settings_to_builder(
|
||||
embedders,
|
||||
search_cutoff_ms,
|
||||
localized_attributes: localized_attributes_rules,
|
||||
facet_search,
|
||||
prefix_search,
|
||||
_kind,
|
||||
} = settings;
|
||||
|
||||
@@ -657,6 +675,20 @@ pub fn apply_settings_to_builder(
|
||||
Setting::Reset => builder.reset_search_cutoff(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match prefix_search {
|
||||
Setting::Set(prefix_search) => {
|
||||
builder.set_prefix_search(PrefixSearch::from(*prefix_search))
|
||||
}
|
||||
Setting::Reset => builder.reset_prefix_search(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match facet_search {
|
||||
Setting::Set(facet_search) => builder.set_facet_search(*facet_search),
|
||||
Setting::Reset => builder.reset_facet_search(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
|
||||
pub enum SecretPolicy {
|
||||
@@ -755,6 +787,10 @@ pub fn settings(
|
||||
|
||||
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
|
||||
|
||||
let prefix_search = index.prefix_search(rtxn)?.map(PrefixSearchSettings::from);
|
||||
|
||||
let facet_search = index.facet_search(rtxn)?;
|
||||
|
||||
let mut settings = Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
@@ -791,13 +827,14 @@ pub fn settings(
|
||||
Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
prefix_search: Setting::Set(prefix_search.unwrap_or_default()),
|
||||
facet_search: Setting::Set(facet_search),
|
||||
_kind: PhantomData,
|
||||
};
|
||||
|
||||
if let SecretPolicy::HideSecrets = secret_policy {
|
||||
settings.hide_secrets()
|
||||
}
|
||||
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
@@ -964,6 +1001,32 @@ impl std::ops::Deref for WildcardSetting {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(error = DeserrJsonError<InvalidSettingsPrefixSearch>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub enum PrefixSearchSettings {
|
||||
#[default]
|
||||
IndexingTime,
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl From<PrefixSearch> for PrefixSearchSettings {
|
||||
fn from(value: PrefixSearch) -> Self {
|
||||
match value {
|
||||
PrefixSearch::IndexingTime => PrefixSearchSettings::IndexingTime,
|
||||
PrefixSearch::Disabled => PrefixSearchSettings::Disabled,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<PrefixSearchSettings> for PrefixSearch {
|
||||
fn from(value: PrefixSearchSettings) -> Self {
|
||||
match value {
|
||||
PrefixSearchSettings::IndexingTime => PrefixSearch::IndexingTime,
|
||||
PrefixSearchSettings::Disabled => PrefixSearch::Disabled,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use super::*;
|
||||
@@ -990,6 +1053,8 @@ pub(crate) mod test {
|
||||
embedders: Setting::NotSet,
|
||||
localized_attributes: Setting::NotSet,
|
||||
search_cutoff_ms: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
@@ -1019,6 +1084,8 @@ pub(crate) mod test {
|
||||
embedders: Setting::NotSet,
|
||||
localized_attributes: Setting::NotSet,
|
||||
search_cutoff_ms: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.16.0"
|
||||
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.1"
|
||||
parking_lot = "0.12.3"
|
||||
@@ -103,7 +103,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.11"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.2"
|
||||
roaring = "0.10.7"
|
||||
mopa-maintained = "0.2.3"
|
||||
|
||||
[dev-dependencies]
|
||||
@@ -157,5 +157,5 @@ german = ["meilisearch-types/german"]
|
||||
turkish = ["meilisearch-types/turkish"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.15/build.zip"
|
||||
sha1 = "d057600b4a839a2e0c0be7a372cd1b2683f3ca7e"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.16/build.zip"
|
||||
sha1 = "68f83438a114aabbe76bc9fe480071e741996662"
|
||||
|
||||
@@ -20,14 +20,14 @@ use meilisearch::{
|
||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use mimalloc::MiMalloc;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: MiMalloc = MiMalloc;
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn default_log_route_layer() -> LogRouteType {
|
||||
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
|
||||
|
||||
@@ -654,8 +654,9 @@ impl Opt {
|
||||
|
||||
#[derive(Debug, Default, Clone, Parser, Deserialize)]
|
||||
pub struct IndexerOpts {
|
||||
/// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
|
||||
/// uses no more than two thirds of available memory.
|
||||
/// Specifies the maximum resident memory that Meilisearch can use for indexing.
|
||||
/// By default, Meilisearch limits the RAM usage to 5% of the total available memory.
|
||||
/// Note that the underlying store utilizes memory-mapping and makes use of the rest.
|
||||
#[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
|
||||
#[serde(default)]
|
||||
pub max_indexing_memory: MaxMemory,
|
||||
@@ -714,7 +715,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// A type used to detect the max memory available and use 2/3 of it.
|
||||
/// A type used to detect the max resident memory available and use 5% of it.
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxMemory(Option<Byte>);
|
||||
|
||||
@@ -728,7 +729,7 @@ impl FromStr for MaxMemory {
|
||||
|
||||
impl Default for MaxMemory {
|
||||
fn default() -> MaxMemory {
|
||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
|
||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 5 / 100).map(Byte::from_u64))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,32 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
/// This macro generates the routes for the settings.
|
||||
///
|
||||
/// It takes a list of settings and generates a module for each setting.
|
||||
/// Each module contains the `get`, `update` and `delete` routes for the setting.
|
||||
///
|
||||
/// It also generates a `configure` function that configures the routes for the settings.
|
||||
macro_rules! make_setting_routes {
|
||||
($({route: $route:literal, update_verb: $update_verb:ident, value_type: $type:ty, err_type: $err_ty:ty, attr: $attr:ident, camelcase_attr: $camelcase_attr:literal, analytics: $analytics:ident},)*) => {
|
||||
$(
|
||||
make_setting_route!($route, $update_verb, $type, $err_ty, $attr, $camelcase_attr, $analytics);
|
||||
)*
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::patch().to(SeqHandler(update_all)))
|
||||
.route(web::get().to(SeqHandler(get_all)))
|
||||
.route(web::delete().to(SeqHandler(delete_all))))
|
||||
$(.service($attr::resources()))*;
|
||||
}
|
||||
|
||||
pub const ALL_SETTINGS_NAMES: &[&str] = &[$(stringify!($attr)),*];
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => {
|
||||
@@ -153,255 +179,227 @@ macro_rules! make_setting_route {
|
||||
};
|
||||
}
|
||||
|
||||
make_setting_route!(
|
||||
"/filterable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
|
||||
>,
|
||||
filterable_attributes,
|
||||
"filterableAttributes",
|
||||
FilterableAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/sortable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
|
||||
>,
|
||||
sortable_attributes,
|
||||
"sortableAttributes",
|
||||
SortableAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/displayed-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
|
||||
>,
|
||||
displayed_attributes,
|
||||
"displayedAttributes",
|
||||
DisplayedAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/typo-tolerance",
|
||||
patch,
|
||||
meilisearch_types::settings::TypoSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
|
||||
>,
|
||||
typo_tolerance,
|
||||
"typoTolerance",
|
||||
TypoToleranceAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/searchable-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
|
||||
>,
|
||||
searchable_attributes,
|
||||
"searchableAttributes",
|
||||
SearchableAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/stop-words",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
|
||||
>,
|
||||
stop_words,
|
||||
"stopWords",
|
||||
StopWordsAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/non-separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
non_separator_tokens,
|
||||
"nonSeparatorTokens",
|
||||
NonSeparatorTokensAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
separator_tokens,
|
||||
"separatorTokens",
|
||||
SeparatorTokensAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/dictionary",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
dictionary,
|
||||
"dictionary",
|
||||
DictionaryAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
std::collections::BTreeMap<String, Vec<String>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
|
||||
>,
|
||||
synonyms,
|
||||
"synonyms",
|
||||
SynonymsAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/distinct-attribute",
|
||||
put,
|
||||
String,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
|
||||
>,
|
||||
distinct_attribute,
|
||||
"distinctAttribute",
|
||||
DistinctAttributeAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/proximity-precision",
|
||||
put,
|
||||
meilisearch_types::settings::ProximityPrecisionView,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||
>,
|
||||
proximity_precision,
|
||||
"proximityPrecision",
|
||||
ProximityPrecisionAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/localized-attributes",
|
||||
put,
|
||||
Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
|
||||
>,
|
||||
localized_attributes,
|
||||
"localizedAttributes",
|
||||
LocalesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/ranking-rules",
|
||||
put,
|
||||
Vec<meilisearch_types::settings::RankingRuleView>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
|
||||
>,
|
||||
ranking_rules,
|
||||
"rankingRules",
|
||||
RankingRulesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/faceting",
|
||||
patch,
|
||||
meilisearch_types::settings::FacetingSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
|
||||
>,
|
||||
faceting,
|
||||
"faceting",
|
||||
FacetingAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/pagination",
|
||||
patch,
|
||||
meilisearch_types::settings::PaginationSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
|
||||
>,
|
||||
pagination,
|
||||
"pagination",
|
||||
PaginationAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/embedders",
|
||||
patch,
|
||||
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
|
||||
>,
|
||||
embedders,
|
||||
"embedders",
|
||||
EmbeddersAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/search-cutoff-ms",
|
||||
put,
|
||||
u64,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
|
||||
>,
|
||||
search_cutoff_ms,
|
||||
"searchCutoffMs",
|
||||
SearchCutoffMsAnalytics
|
||||
);
|
||||
|
||||
macro_rules! generate_configure {
|
||||
($($mod:ident),*) => {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::patch().to(SeqHandler(update_all)))
|
||||
.route(web::get().to(SeqHandler(get_all)))
|
||||
.route(web::delete().to(SeqHandler(delete_all))))
|
||||
$(.service($mod::resources()))*;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
generate_configure!(
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
displayed_attributes,
|
||||
localized_attributes,
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
pagination,
|
||||
faceting,
|
||||
embedders,
|
||||
search_cutoff_ms
|
||||
make_setting_routes!(
|
||||
{
|
||||
route: "/filterable-attributes",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
|
||||
>,
|
||||
attr: filterable_attributes,
|
||||
camelcase_attr: "filterableAttributes",
|
||||
analytics: FilterableAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/sortable-attributes",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
|
||||
>,
|
||||
attr: sortable_attributes,
|
||||
camelcase_attr: "sortableAttributes",
|
||||
analytics: SortableAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/displayed-attributes",
|
||||
update_verb: put,
|
||||
value_type: Vec<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
|
||||
>,
|
||||
attr: displayed_attributes,
|
||||
camelcase_attr: "displayedAttributes",
|
||||
analytics: DisplayedAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/typo-tolerance",
|
||||
update_verb: patch,
|
||||
value_type: meilisearch_types::settings::TypoSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
|
||||
>,
|
||||
attr: typo_tolerance,
|
||||
camelcase_attr: "typoTolerance",
|
||||
analytics: TypoToleranceAnalytics
|
||||
},
|
||||
{
|
||||
route: "/searchable-attributes",
|
||||
update_verb: put,
|
||||
value_type: Vec<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
|
||||
>,
|
||||
attr: searchable_attributes,
|
||||
camelcase_attr: "searchableAttributes",
|
||||
analytics: SearchableAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/stop-words",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
|
||||
>,
|
||||
attr: stop_words,
|
||||
camelcase_attr: "stopWords",
|
||||
analytics: StopWordsAnalytics
|
||||
},
|
||||
{
|
||||
route: "/non-separator-tokens",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
attr: non_separator_tokens,
|
||||
camelcase_attr: "nonSeparatorTokens",
|
||||
analytics: NonSeparatorTokensAnalytics
|
||||
},
|
||||
{
|
||||
route: "/separator-tokens",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
attr: separator_tokens,
|
||||
camelcase_attr: "separatorTokens",
|
||||
analytics: SeparatorTokensAnalytics
|
||||
},
|
||||
{
|
||||
route: "/dictionary",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
attr: dictionary,
|
||||
camelcase_attr: "dictionary",
|
||||
analytics: DictionaryAnalytics
|
||||
},
|
||||
{
|
||||
route: "/synonyms",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeMap<String, Vec<String>>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
|
||||
>,
|
||||
attr: synonyms,
|
||||
camelcase_attr: "synonyms",
|
||||
analytics: SynonymsAnalytics
|
||||
},
|
||||
{
|
||||
route: "/distinct-attribute",
|
||||
update_verb: put,
|
||||
value_type: String,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
|
||||
>,
|
||||
attr: distinct_attribute,
|
||||
camelcase_attr: "distinctAttribute",
|
||||
analytics: DistinctAttributeAnalytics
|
||||
},
|
||||
{
|
||||
route: "/proximity-precision",
|
||||
update_verb: put,
|
||||
value_type: meilisearch_types::settings::ProximityPrecisionView,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||
>,
|
||||
attr: proximity_precision,
|
||||
camelcase_attr: "proximityPrecision",
|
||||
analytics: ProximityPrecisionAnalytics
|
||||
},
|
||||
{
|
||||
route: "/localized-attributes",
|
||||
update_verb: put,
|
||||
value_type: Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
|
||||
>,
|
||||
attr: localized_attributes,
|
||||
camelcase_attr: "localizedAttributes",
|
||||
analytics: LocalesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/ranking-rules",
|
||||
update_verb: put,
|
||||
value_type: Vec<meilisearch_types::settings::RankingRuleView>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
|
||||
>,
|
||||
attr: ranking_rules,
|
||||
camelcase_attr: "rankingRules",
|
||||
analytics: RankingRulesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/faceting",
|
||||
update_verb: patch,
|
||||
value_type: meilisearch_types::settings::FacetingSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
|
||||
>,
|
||||
attr: faceting,
|
||||
camelcase_attr: "faceting",
|
||||
analytics: FacetingAnalytics
|
||||
},
|
||||
{
|
||||
route: "/pagination",
|
||||
update_verb: patch,
|
||||
value_type: meilisearch_types::settings::PaginationSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
|
||||
>,
|
||||
attr: pagination,
|
||||
camelcase_attr: "pagination",
|
||||
analytics: PaginationAnalytics
|
||||
},
|
||||
{
|
||||
route: "/embedders",
|
||||
update_verb: patch,
|
||||
value_type: std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
|
||||
>,
|
||||
attr: embedders,
|
||||
camelcase_attr: "embedders",
|
||||
analytics: EmbeddersAnalytics
|
||||
},
|
||||
{
|
||||
route: "/search-cutoff-ms",
|
||||
update_verb: put,
|
||||
value_type: u64,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
|
||||
>,
|
||||
attr: search_cutoff_ms,
|
||||
camelcase_attr: "searchCutoffMs",
|
||||
analytics: SearchCutoffMsAnalytics
|
||||
},
|
||||
{
|
||||
route: "/facet-search",
|
||||
update_verb: put,
|
||||
value_type: bool,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFacetSearch,
|
||||
>,
|
||||
attr: facet_search,
|
||||
camelcase_attr: "facetSearch",
|
||||
analytics: FacetSearchAnalytics
|
||||
},
|
||||
{
|
||||
route: "/prefix-search",
|
||||
update_verb: put,
|
||||
value_type: meilisearch_types::settings::PrefixSearchSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPrefixSearch,
|
||||
>,
|
||||
attr: prefix_search,
|
||||
camelcase_attr: "prefixSearch",
|
||||
analytics: PrefixSearchAnalytics
|
||||
},
|
||||
);
|
||||
|
||||
pub async fn update_all(
|
||||
@@ -456,6 +454,8 @@ pub async fn update_all(
|
||||
non_separator_tokens: NonSeparatorTokensAnalytics::new(
|
||||
new_settings.non_separator_tokens.as_ref().set(),
|
||||
),
|
||||
facet_search: FacetSearchAnalytics::new(new_settings.facet_search.as_ref().set()),
|
||||
prefix_search: PrefixSearchAnalytics::new(new_settings.prefix_search.as_ref().set()),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
@@ -10,7 +10,8 @@ use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||
use meilisearch_types::settings::{
|
||||
FacetingSettings, PaginationSettings, ProximityPrecisionView, RankingRuleView, TypoSettings,
|
||||
FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView,
|
||||
RankingRuleView, TypoSettings,
|
||||
};
|
||||
use serde::Serialize;
|
||||
|
||||
@@ -36,6 +37,8 @@ pub struct SettingsAnalytics {
|
||||
pub dictionary: DictionaryAnalytics,
|
||||
pub separator_tokens: SeparatorTokensAnalytics,
|
||||
pub non_separator_tokens: NonSeparatorTokensAnalytics,
|
||||
pub facet_search: FacetSearchAnalytics,
|
||||
pub prefix_search: PrefixSearchAnalytics,
|
||||
}
|
||||
|
||||
impl Aggregate for SettingsAnalytics {
|
||||
@@ -183,6 +186,14 @@ impl Aggregate for SettingsAnalytics {
|
||||
non_separator_tokens: NonSeparatorTokensAnalytics {
|
||||
total: new.non_separator_tokens.total.or(self.non_separator_tokens.total),
|
||||
},
|
||||
facet_search: FacetSearchAnalytics {
|
||||
set: new.facet_search.set | self.facet_search.set,
|
||||
value: new.facet_search.value.or(self.facet_search.value),
|
||||
},
|
||||
prefix_search: PrefixSearchAnalytics {
|
||||
set: new.prefix_search.set | self.prefix_search.set,
|
||||
value: new.prefix_search.value.or(self.prefix_search.value),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -620,3 +631,35 @@ impl NonSeparatorTokensAnalytics {
|
||||
SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default)]
|
||||
pub struct FacetSearchAnalytics {
|
||||
pub set: bool,
|
||||
pub value: Option<bool>,
|
||||
}
|
||||
|
||||
impl FacetSearchAnalytics {
|
||||
pub fn new(settings: Option<&bool>) -> Self {
|
||||
Self { set: settings.is_some(), value: settings.copied() }
|
||||
}
|
||||
|
||||
pub fn into_settings(self) -> SettingsAnalytics {
|
||||
SettingsAnalytics { facet_search: self, ..Default::default() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default)]
|
||||
pub struct PrefixSearchAnalytics {
|
||||
pub set: bool,
|
||||
pub value: Option<PrefixSearchSettings>,
|
||||
}
|
||||
|
||||
impl PrefixSearchAnalytics {
|
||||
pub fn new(settings: Option<&PrefixSearchSettings>) -> Self {
|
||||
Self { set: settings.is_some(), value: settings.cloned() }
|
||||
}
|
||||
|
||||
pub fn into_settings(self) -> SettingsAnalytics {
|
||||
SettingsAnalytics { prefix_search: self, ..Default::default() }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -224,7 +224,7 @@ async fn list_batches_status_and_type_filtered() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_batch_filter_error() {
|
||||
async fn list_batch_filter_error() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.batches_filter("lol=pied").await;
|
||||
|
||||
@@ -79,7 +79,9 @@ async fn import_dump_v1_movie_raw() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -242,7 +244,9 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -391,7 +395,9 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -526,7 +532,9 @@ async fn import_dump_v2_movie_raw() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -673,7 +681,9 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -819,7 +829,9 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -954,7 +966,9 @@ async fn import_dump_v3_movie_raw() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -1101,7 +1115,9 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -1247,7 +1263,9 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -1382,7 +1400,9 @@ async fn import_dump_v4_movie_raw() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -1529,7 +1549,9 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -1675,7 +1697,9 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###
|
||||
);
|
||||
@@ -1922,7 +1946,9 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2102,7 +2128,9 @@ async fn generate_and_import_dump_containing_vectors() {
|
||||
}
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -41,8 +41,8 @@ async fn simple_facet_search() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
@@ -65,8 +65,8 @@ async fn advanced_facet_search() {
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.update_settings_typo_tolerance(json!({ "enabled": false })).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await;
|
||||
@@ -89,8 +89,8 @@ async fn more_advanced_facet_search() {
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.update_settings_typo_tolerance(json!({ "disableOnWords": ["adventre"] })).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await;
|
||||
@@ -113,8 +113,8 @@ async fn simple_facet_search_with_max_values() {
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
@@ -135,8 +135,8 @@ async fn simple_facet_search_by_count_with_max_values() {
|
||||
)
|
||||
.await;
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
@@ -151,8 +151,8 @@ async fn non_filterable_facet_search_error() {
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
@@ -170,8 +170,8 @@ async fn facet_search_dont_support_words() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "words"})).await;
|
||||
@@ -188,8 +188,8 @@ async fn simple_facet_search_with_sort_by_count() {
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await;
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
@@ -200,3 +200,115 @@ async fn simple_facet_search_with_sort_by_count() {
|
||||
assert_eq!(hits[0], json!({ "value": "Action", "count": 3 }));
|
||||
assert_eq!(hits[1], json!({ "value": "Adventure", "count": 2 }));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_documents_and_deactivate_facet_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"facetSearch": false,
|
||||
"filterableAttributes": ["genres"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn deactivate_facet_search_and_add_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"facetSearch": false,
|
||||
"filterableAttributes": ["genres"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn deactivate_facet_search_add_documents_and_activate_facet_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"facetSearch": false,
|
||||
"filterableAttributes": ["genres"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"facetSearch": true,
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn deactivate_facet_search_add_documents_and_reset_facet_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"facetSearch": false,
|
||||
"filterableAttributes": ["genres"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"facetSearch": serde_json::Value::Null,
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2);
|
||||
}
|
||||
|
||||
@@ -4,6 +4,58 @@ use super::*;
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_formatted_from_sdk() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
index
|
||||
.update_settings(
|
||||
json!({ "filterableAttributes": ["genre"], "searchableAttributes": ["title"] }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let documents = json!([
|
||||
{ "id": 123, "title": "Pride and Prejudice", "genre": "romance" },
|
||||
{ "id": 456, "title": "Le Petit Prince", "genre": "adventure" },
|
||||
{ "id": 1, "title": "Alice In Wonderland", "genre": "adventure" },
|
||||
{ "id": 2, "title": "Le Rouge et le Noir", "genre": "romance" },
|
||||
{ "id": 1344, "title": "The Hobbit", "genre": "adventure" },
|
||||
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "genre": "fantasy" },
|
||||
{ "id": 7, "title": "Harry Potter and the Chamber of Secrets", "genre": "fantasy" },
|
||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy" }
|
||||
]);
|
||||
let (response, _) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({ "q":"prince",
|
||||
"attributesToCrop": ["title"],
|
||||
"cropLength": 2,
|
||||
"filter": "genre = adventure",
|
||||
"attributesToHighlight": ["title"],
|
||||
"attributesToRetrieve": ["title"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"title": "Le Petit Prince",
|
||||
"_formatted": {
|
||||
"title": "…Petit <em>Prince</em>"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn formatted_contain_wildcard() {
|
||||
let server = Server::new_shared();
|
||||
|
||||
@@ -15,6 +15,7 @@ mod pagination;
|
||||
mod restrict_searchable;
|
||||
mod search_queue;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@@ -62,6 +63,71 @@ async fn simple_search() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_stop_word() {
|
||||
// related to https://github.com/meilisearch/meilisearch/issues/4984
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_, code) = index
|
||||
.update_settings(json!({"stopWords": ["the", "The", "a", "an", "to", "in", "of"]}))
|
||||
.await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// prefix search
|
||||
index
|
||||
.search(json!({"q": "to the", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
})
|
||||
.await;
|
||||
|
||||
// non-prefix search
|
||||
index
|
||||
.search(json!({"q": "to the ", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"_formatted": {
|
||||
"title": "Shazam!"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"_formatted": {
|
||||
"title": "Captain Marvel"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"_formatted": {
|
||||
"title": "Escape Room"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"_formatted": {
|
||||
"title": "How to Train Your Dragon: The Hidden World"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"_formatted": {
|
||||
"title": "Gläss"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn phrase_search_with_stop_word() {
|
||||
// related to https://github.com/meilisearch/meilisearch/issues/3521
|
||||
|
||||
@@ -367,3 +367,50 @@ async fn search_on_exact_field() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn phrase_search_on_title() {
|
||||
let server = Server::new().await;
|
||||
let documents = json!([
|
||||
{ "id": 8, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 5, "desc": "Document Review", "title": "Document Review Attorney" },
|
||||
{ "id": 4, "desc": "Document Review", "title": "Document Review Manager - Cyber Incident Response (Remote)" },
|
||||
{ "id": 3, "desc": "Document Review", "title": "Document Review Paralegal" },
|
||||
{ "id": 2, "desc": "Document Review", "title": "Document Controller (Saudi National)" },
|
||||
{ "id": 1, "desc": "Document Review", "title": "Document Reviewer" },
|
||||
{ "id": 7, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 6, "desc": "Document Review", "title": "Document Review (Entry Level)" }
|
||||
]);
|
||||
let index = index_with_documents(&server, &documents).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "\"Document Review\"", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["title"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Attorney"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Manager - Cyber Incident Response (Remote)"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Paralegal"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review (Entry Level)"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -1,44 +1,185 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("displayed_attributes", json!(["*"]));
|
||||
map.insert("searchable_attributes", json!(["*"]));
|
||||
map.insert("localized_attributes", json!(null));
|
||||
map.insert("filterable_attributes", json!([]));
|
||||
map.insert("distinct_attribute", json!(null));
|
||||
map.insert(
|
||||
"ranking_rules",
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
|
||||
);
|
||||
map.insert("stop_words", json!([]));
|
||||
map.insert("non_separator_tokens", json!([]));
|
||||
map.insert("separator_tokens", json!([]));
|
||||
map.insert("dictionary", json!([]));
|
||||
map.insert("synonyms", json!({}));
|
||||
map.insert(
|
||||
"faceting",
|
||||
json!({
|
||||
"maxValuesPerFacet": json!(100),
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
macro_rules! test_setting_routes {
|
||||
($({setting: $setting:ident, update_verb: $update_verb:ident, default_value: $default_value:tt},) *) => {
|
||||
$(
|
||||
mod $setting {
|
||||
use crate::common::Server;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.$update_verb(url, serde_json::Value::Null.into()).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
server.index("").wait_task(0).await;
|
||||
let (response, code) = server.index("test").get().await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_, code) = server.service.delete(url).await;
|
||||
assert_eq!(code, 202);
|
||||
let response = server.index("").wait_task(0).await;
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_default() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(None).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
index.wait_task(0).await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let expected = crate::json!($default_value);
|
||||
assert_eq!(expected, response);
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
map.insert(
|
||||
"pagination",
|
||||
json!({
|
||||
"maxTotalHits": json!(1000),
|
||||
}),
|
||||
);
|
||||
map.insert("search_cutoff_ms", json!(null));
|
||||
map
|
||||
});
|
||||
)*
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn all_setting_tested() {
|
||||
let expected = std::collections::BTreeSet::from_iter(meilisearch::routes::indexes::settings::ALL_SETTINGS_NAMES.iter());
|
||||
let tested = std::collections::BTreeSet::from_iter([$(stringify!($setting)),*].iter());
|
||||
let diff: Vec<_> = expected.difference(&tested).collect();
|
||||
assert!(diff.is_empty(), "Not all settings were tested, please add the following settings to the `test_setting_routes!` macro: {:?}", diff);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_setting_routes!(
|
||||
{
|
||||
setting: filterable_attributes,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: displayed_attributes,
|
||||
update_verb: put,
|
||||
default_value: ["*"]
|
||||
},
|
||||
{
|
||||
setting: localized_attributes,
|
||||
update_verb: put,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: searchable_attributes,
|
||||
update_verb: put,
|
||||
default_value: ["*"]
|
||||
},
|
||||
{
|
||||
setting: distinct_attribute,
|
||||
update_verb: put,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: stop_words,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: separator_tokens,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: non_separator_tokens,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: dictionary,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: ranking_rules,
|
||||
update_verb: put,
|
||||
default_value: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
|
||||
},
|
||||
{
|
||||
setting: synonyms,
|
||||
update_verb: put,
|
||||
default_value: {}
|
||||
},
|
||||
{
|
||||
setting: pagination,
|
||||
update_verb: patch,
|
||||
default_value: {"maxTotalHits": 1000}
|
||||
},
|
||||
{
|
||||
setting: faceting,
|
||||
update_verb: patch,
|
||||
default_value: {"maxValuesPerFacet": 100, "sortFacetValuesBy": {"*": "alpha"}}
|
||||
},
|
||||
{
|
||||
setting: search_cutoff_ms,
|
||||
update_verb: put,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: embedders,
|
||||
update_verb: patch,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: facet_search,
|
||||
update_verb: put,
|
||||
default_value: true
|
||||
},
|
||||
{
|
||||
setting: prefix_search,
|
||||
update_verb: put,
|
||||
default_value: "indexingTime"
|
||||
},
|
||||
{
|
||||
setting: proximity_precision,
|
||||
update_verb: put,
|
||||
default_value: "byWord"
|
||||
},
|
||||
{
|
||||
setting: sortable_attributes,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: typo_tolerance,
|
||||
update_verb: patch,
|
||||
default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": []}
|
||||
},
|
||||
);
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_settings_unexisting_index() {
|
||||
@@ -56,7 +197,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 17);
|
||||
assert_eq!(settings.keys().len(), 19);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
@@ -87,6 +228,8 @@ async fn get_settings() {
|
||||
);
|
||||
assert_eq!(settings["proximityPrecision"], json!("byWord"));
|
||||
assert_eq!(settings["searchCutoffMs"], json!(null));
|
||||
assert_eq!(settings["prefixSearch"], json!("indexingTime"));
|
||||
assert_eq!(settings["facetSearch"], json!(true));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -199,7 +342,9 @@ async fn secrets_are_hidden_in_settings() {
|
||||
}
|
||||
},
|
||||
"searchCutoffMs": null,
|
||||
"localizedAttributes": null
|
||||
"localizedAttributes": null,
|
||||
"facetSearch": true,
|
||||
"prefixSearch": "indexingTime"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -338,93 +483,6 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
|
||||
"###);
|
||||
}
|
||||
|
||||
macro_rules! test_setting_routes {
|
||||
($($setting:ident $write_method:ident), *) => {
|
||||
$(
|
||||
mod $setting {
|
||||
use crate::common::Server;
|
||||
use super::DEFAULT_SETTINGS_VALUES;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.$write_method(url, serde_json::Value::Null.into()).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
server.index("").wait_task(0).await;
|
||||
let (response, code) = server.index("test").get().await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_, code) = server.service.delete(url).await;
|
||||
assert_eq!(code, 202);
|
||||
let response = server.index("").wait_task(0).await;
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_default() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(None).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
index.wait_task(0).await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let expected = DEFAULT_SETTINGS_VALUES.get(stringify!($setting)).unwrap();
|
||||
assert_eq!(expected, &response);
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
test_setting_routes!(
|
||||
filterable_attributes put,
|
||||
displayed_attributes put,
|
||||
localized_attributes put,
|
||||
searchable_attributes put,
|
||||
distinct_attribute put,
|
||||
stop_words put,
|
||||
separator_tokens put,
|
||||
non_separator_tokens put,
|
||||
dictionary put,
|
||||
ranking_rules put,
|
||||
synonyms put,
|
||||
pagination patch,
|
||||
faceting patch,
|
||||
search_cutoff_ms put
|
||||
);
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_set_invalid_ranking_rules() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod prefix_search_settings;
|
||||
mod proximity_settings;
|
||||
mod tokenizer_customization;
|
||||
|
||||
458
crates/meilisearch/tests/settings/prefix_search_settings.rs
Normal file
458
crates/meilisearch/tests/settings/prefix_search_settings.rs
Normal file
@@ -0,0 +1,458 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "manythefishou",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "manythelazyfish",
|
||||
},
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_docs_and_disable() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"prefixSearch": "disabled",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
// only 1 document should match
|
||||
index
|
||||
.search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "Soup of day <em>so</em>",
|
||||
"b": "manythe manythelazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// only 1 document should match
|
||||
index
|
||||
.search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "Soup of day so",
|
||||
"b": "<em>manythe</em> manythelazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn disable_and_add_docs() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"prefixSearch": "disabled",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
// only 1 document should match
|
||||
index
|
||||
.search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "Soup of day <em>so</em>",
|
||||
"b": "manythe manythelazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "Soup of day so",
|
||||
"b": "<em>manythe</em> manythelazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn disable_add_docs_and_enable() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"prefixSearch": "disabled",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"prefixSearch": "indexingTime",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(2).await;
|
||||
|
||||
// all documents should match
|
||||
index
|
||||
.search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "manythefishou",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"a": "<em>So</em>up of the day",
|
||||
"b": "manythefishou"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "<em>So</em>up of day <em>so</em>",
|
||||
"b": "manythe manythelazyfish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"a": "the <em>So</em>up of day",
|
||||
"b": "manythelazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "manythefishou",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"a": "Soup of the day",
|
||||
"b": "<em>manythe</em>fishou"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "Soup of day so",
|
||||
"b": "<em>manythe</em> <em>manythe</em>lazyfish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"a": "the Soup of day",
|
||||
"b": "<em>manythe</em>lazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn disable_add_docs_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"prefixSearch": "disabled",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"prefixSearch": serde_json::Value::Null,
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(2).await;
|
||||
|
||||
// all documents should match
|
||||
index
|
||||
.search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "manythefishou",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"a": "<em>So</em>up of the day",
|
||||
"b": "manythefishou"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "<em>So</em>up of day <em>so</em>",
|
||||
"b": "manythe manythelazyfish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"a": "the <em>So</em>up of day",
|
||||
"b": "manythelazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "manythefishou",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"a": "Soup of the day",
|
||||
"b": "<em>manythe</em>fishou"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "Soup of day so",
|
||||
"b": "<em>manythe</em> <em>manythe</em>lazyfish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"a": "the Soup of day",
|
||||
"b": "<em>manythe</em>lazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn default_behavior() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
// all documents should match
|
||||
index
|
||||
.search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "manythefishou",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"a": "<em>So</em>up of the day",
|
||||
"b": "manythefishou"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "<em>So</em>up of day <em>so</em>",
|
||||
"b": "manythe manythelazyfish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"a": "the <em>So</em>up of day",
|
||||
"b": "manythelazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "manythefishou",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"a": "Soup of the day",
|
||||
"b": "<em>manythe</em>fishou"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day so",
|
||||
"b": "manythe manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"a": "Soup of day so",
|
||||
"b": "<em>manythe</em> <em>manythe</em>lazyfish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "manythelazyfish",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"a": "the Soup of day",
|
||||
"b": "<em>manythe</em>lazyfish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
@@ -18,8 +18,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.9.1"
|
||||
bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
# charabia = { version = "0.9.0", default-features = false }
|
||||
charabia = { git = "https://github.com/meilisearch/charabia", branch = "mutualize-char-normalizer", default-features = false }
|
||||
charabia = { version = "0.9.2", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.13"
|
||||
deserr = "0.6.2"
|
||||
@@ -28,10 +27,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.7", default-features = false, features = [
|
||||
"rayon", # TODO Should we keep this feature
|
||||
"tempfile",
|
||||
], git = "https://github.com/meilisearch/grenad", branch = "various-improvements" }
|
||||
grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
|
||||
heed = { version = "0.20.3", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
@@ -42,11 +38,11 @@ json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memchr = "2.5.0"
|
||||
memmap2 = "0.9.4"
|
||||
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.1"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
rstar = { version = "0.12.0", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
|
||||
@@ -102,6 +98,8 @@ allocator-api2 = "0.2.18"
|
||||
rustc-hash = "2.0.0"
|
||||
uell = "0.1.0"
|
||||
enum-iterator = "2.1.0"
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::convert::Infallible;
|
||||
use std::fmt::Write;
|
||||
use std::{io, str};
|
||||
|
||||
use bstr::BString;
|
||||
use heed::{Error as HeedError, MdbError};
|
||||
use rayon::ThreadPoolBuildError;
|
||||
use rhai::EvalAltResult;
|
||||
@@ -61,6 +62,10 @@ pub enum InternalError {
|
||||
Serialization(#[from] SerializationError),
|
||||
#[error(transparent)]
|
||||
Store(#[from] MdbError),
|
||||
#[error("Cannot delete {key:?} from database {database_name}: {error}")]
|
||||
StoreDeletion { database_name: &'static str, key: BString, error: heed::Error },
|
||||
#[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
|
||||
StorePut { database_name: &'static str, key: BString, value_length: usize, error: heed::Error },
|
||||
#[error(transparent)]
|
||||
Utf8(#[from] str::Utf8Error),
|
||||
#[error("An indexation process was explicitly aborted")]
|
||||
|
||||
@@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![value.size];
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,18 +27,27 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
|
||||
pub fn serialize_into_vec(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
|
||||
Self::serialize_into_writer(roaring, vec).unwrap()
|
||||
}
|
||||
|
||||
pub fn serialize_into_writer<W: io::Write>(
|
||||
roaring: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
) -> io::Result<()> {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
for integer in roaring {
|
||||
vec.write_u32::<NativeEndian>(integer).unwrap();
|
||||
writer.write_u32::<NativeEndian>(integer)?;
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
|
||||
roaring.serialize_into(vec).unwrap();
|
||||
roaring.serialize_into(writer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
@@ -143,7 +152,7 @@ impl CboRoaringBitmapCodec {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into(&previous, buffer);
|
||||
Self::serialize_into_vec(&previous, buffer);
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
@@ -169,7 +178,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec);
|
||||
Self::serialize_into_vec(item, &mut vec);
|
||||
Ok(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,6 +70,8 @@ pub mod main_key {
|
||||
pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
|
||||
pub const SEARCH_CUTOFF: &str = "search_cutoff";
|
||||
pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
|
||||
pub const FACET_SEARCH: &str = "facet_search";
|
||||
pub const PREFIX_SEARCH: &str = "prefix_search";
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
@@ -1233,6 +1235,10 @@ impl Index {
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_words_prefixes_fst(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::WORDS_PREFIXES_FST_KEY)
|
||||
}
|
||||
|
||||
/// Returns the FST which is the words prefixes dictionary of the engine.
|
||||
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
|
||||
@@ -1562,6 +1568,41 @@ impl Index {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
|
||||
}
|
||||
|
||||
pub fn prefix_search(&self, txn: &RoTxn<'_>) -> heed::Result<Option<PrefixSearch>> {
|
||||
self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().get(txn, main_key::PREFIX_SEARCH)
|
||||
}
|
||||
|
||||
pub(crate) fn put_prefix_search(
|
||||
&self,
|
||||
txn: &mut RwTxn<'_>,
|
||||
val: PrefixSearch,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().put(
|
||||
txn,
|
||||
main_key::PREFIX_SEARCH,
|
||||
&val,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_prefix_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::PREFIX_SEARCH)
|
||||
}
|
||||
|
||||
pub fn facet_search(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
|
||||
self.main
|
||||
.remap_types::<Str, SerdeBincode<bool>>()
|
||||
.get(txn, main_key::FACET_SEARCH)
|
||||
.map(|v| v.unwrap_or(true))
|
||||
}
|
||||
|
||||
pub(crate) fn put_facet_search(&self, txn: &mut RwTxn<'_>, val: bool) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeBincode<bool>>().put(txn, main_key::FACET_SEARCH, &val)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_facet_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
|
||||
}
|
||||
|
||||
pub fn localized_attributes_rules(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
@@ -1647,12 +1688,9 @@ impl Index {
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
pub fn prefix_settings(&self, _rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
|
||||
Ok(PrefixSettings {
|
||||
compute_prefixes: true,
|
||||
max_prefix_length: 4,
|
||||
prefix_count_threshold: 100,
|
||||
})
|
||||
pub fn prefix_settings(&self, rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
|
||||
let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
|
||||
Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1665,9 +1703,17 @@ pub struct IndexEmbeddingConfig {
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct PrefixSettings {
|
||||
pub prefix_count_threshold: u64,
|
||||
pub prefix_count_threshold: usize,
|
||||
pub max_prefix_length: usize,
|
||||
pub compute_prefixes: bool,
|
||||
pub compute_prefixes: PrefixSearch,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum PrefixSearch {
|
||||
#[default]
|
||||
IndexingTime,
|
||||
Disabled,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -1775,6 +1821,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1865,6 +1912,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1945,6 +1993,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[cfg(test)]
|
||||
#[global_allocator]
|
||||
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@@ -207,7 +207,11 @@ impl<'a> Search<'a> {
|
||||
Ok(embedding) => embedding,
|
||||
Err(error) => {
|
||||
tracing::error!(error=%error, "Embedding failed");
|
||||
return Ok((keyword_results, Some(0)));
|
||||
return Ok(return_keyword_results(
|
||||
self.limit,
|
||||
self.offset,
|
||||
keyword_results,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,7 +274,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
last_match_last_token_position_plus_one
|
||||
} else {
|
||||
// we have matched the end of possible tokens, there's nothing to advance
|
||||
tokens.len() - 1
|
||||
tokens.len()
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use self::vector_sort::VectorSort;
|
||||
use crate::index::PrefixSearch;
|
||||
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
@@ -68,6 +69,7 @@ pub struct SearchContext<'ctx> {
|
||||
pub term_interner: Interner<QueryTerm>,
|
||||
pub phrase_docids: PhraseDocIdsCache,
|
||||
pub restricted_fids: Option<RestrictedFids>,
|
||||
pub prefix_search: PrefixSearch,
|
||||
}
|
||||
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
@@ -85,6 +87,8 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
}
|
||||
}
|
||||
|
||||
let prefix_search = index.prefix_search(txn)?.unwrap_or_default();
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
txn,
|
||||
@@ -94,9 +98,14 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
term_interner: <_>::default(),
|
||||
phrase_docids: <_>::default(),
|
||||
restricted_fids: None,
|
||||
prefix_search,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_prefix_search_allowed(&self) -> bool {
|
||||
self.prefix_search != PrefixSearch::Disabled
|
||||
}
|
||||
|
||||
pub fn attributes_to_search_on(
|
||||
&mut self,
|
||||
attributes_to_search_on: &'ctx [String],
|
||||
|
||||
@@ -28,6 +28,7 @@ pub fn located_query_terms_from_tokens(
|
||||
words_limit: Option<usize>,
|
||||
) -> Result<ExtractedTokens> {
|
||||
let nbr_typos = number_of_typos_allowed(ctx)?;
|
||||
let allow_prefix_search = ctx.is_prefix_search_allowed();
|
||||
|
||||
let mut query_terms = Vec::new();
|
||||
|
||||
@@ -94,7 +95,7 @@ pub fn located_query_terms_from_tokens(
|
||||
ctx,
|
||||
word,
|
||||
nbr_typos(word),
|
||||
true,
|
||||
allow_prefix_search,
|
||||
false,
|
||||
)?;
|
||||
let located_term = LocatedQueryTerm {
|
||||
|
||||
@@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
|
||||
if words.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut candidates = None;
|
||||
for word in words.iter().flatten().copied() {
|
||||
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
|
||||
candidates |= word_docids;
|
||||
if let Some(candidates) = candidates.as_mut() {
|
||||
*candidates &= word_docids;
|
||||
} else {
|
||||
candidates = Some(word_docids);
|
||||
}
|
||||
} else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
}
|
||||
|
||||
let Some(mut candidates) = candidates else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
let winsize = words.len().min(3);
|
||||
|
||||
for win in words.windows(winsize) {
|
||||
|
||||
@@ -83,6 +83,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -172,6 +172,14 @@ impl<'i> FacetsUpdate<'i> {
|
||||
incremental_update.execute(wtxn)?;
|
||||
}
|
||||
|
||||
if !self.index.facet_search(wtxn)? {
|
||||
// If facet search is disabled, we don't need to compute facet search databases.
|
||||
// We clear the facet search databases.
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
self.index.facet_id_normalized_string_strings.clear(wtxn)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match self.normalized_delta_data {
|
||||
Some(data) => index_facet_search(wtxn, data, self.index),
|
||||
None => Ok(()),
|
||||
|
||||
@@ -58,9 +58,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let old_dictionary: Option<Vec<_>> =
|
||||
settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let del_builder =
|
||||
let mut del_builder =
|
||||
tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
|
||||
let del_tokenizer = del_builder.into_tokenizer();
|
||||
let del_tokenizer = del_builder.build();
|
||||
|
||||
let new_stop_words = settings_diff.new.stop_words.as_ref();
|
||||
let new_separators: Option<Vec<_>> = settings_diff
|
||||
@@ -70,9 +70,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let new_dictionary: Option<Vec<_>> =
|
||||
settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let add_builder =
|
||||
let mut add_builder =
|
||||
tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
|
||||
let add_tokenizer = add_builder.into_tokenizer();
|
||||
let add_tokenizer = add_builder.build();
|
||||
|
||||
// iterate over documents.
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
||||
@@ -34,10 +34,12 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
|
||||
} else {
|
||||
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
|
||||
let facet_search = settings_diff.new.facet_search;
|
||||
extract_facet_string_docids_document_update(
|
||||
docid_fid_facet_string,
|
||||
indexer,
|
||||
localized_field_ids,
|
||||
facet_search,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -51,6 +53,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
localized_field_ids: &LocalizedFieldIds,
|
||||
facet_search: bool,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -96,7 +99,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
{
|
||||
if facet_search {
|
||||
let locales = localized_field_ids.locales(field_id);
|
||||
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
||||
|
||||
@@ -179,8 +182,10 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
|
||||
|
||||
let are_same_locales = old_locales == new_locales;
|
||||
let reindex_facet_search =
|
||||
settings_diff.new.facet_search && !settings_diff.old.facet_search;
|
||||
|
||||
if is_same_value && are_same_locales {
|
||||
if is_same_value && are_same_locales && !reindex_facet_search {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -191,18 +196,26 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
{
|
||||
let old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
|
||||
let new_hyper_normalized_value = if are_same_locales {
|
||||
&old_hyper_normalized_value
|
||||
if settings_diff.new.facet_search {
|
||||
let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
|
||||
let old_hyper_normalized_value;
|
||||
let old_hyper_normalized_value = if !settings_diff.old.facet_search
|
||||
|| deladd_reader.get(DelAdd::Deletion).is_none()
|
||||
{
|
||||
// if the facet search is disabled in the old settings or if no facet string is deleted,
|
||||
// we don't need to normalize the facet string.
|
||||
None
|
||||
} else if are_same_locales {
|
||||
Some(&new_hyper_normalized_value)
|
||||
} else {
|
||||
&normalize_facet_string(normalized_value, new_locales)
|
||||
old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
|
||||
Some(&old_hyper_normalized_value)
|
||||
};
|
||||
|
||||
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||
|
||||
// if the facet string is the same, we can put the deletion and addition in the same obkv.
|
||||
if old_hyper_normalized_value == new_hyper_normalized_value.as_str() {
|
||||
if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
|
||||
// nothing to do if we delete and re-add the value.
|
||||
if is_same_value {
|
||||
continue;
|
||||
@@ -222,7 +235,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
} else {
|
||||
// if the facet string is different, we need to insert the deletion and addition in different obkv because the related key is different.
|
||||
// deletion
|
||||
if deladd_reader.get(DelAdd::Deletion).is_some() {
|
||||
if let Some(old_hyper_normalized_value) = old_hyper_normalized_value {
|
||||
// insert old value
|
||||
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||
buffer.clear();
|
||||
|
||||
@@ -80,7 +80,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
let new_faceted_fids: BTreeSet<_> =
|
||||
settings_diff.new.faceted_fields_ids.iter().copied().collect();
|
||||
|
||||
if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
|
||||
if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
|
||||
let obkv = obkv::KvReader::from_slice(value);
|
||||
@@ -112,8 +112,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
(field_id, None, add_value)
|
||||
}
|
||||
EitherOrBoth::Both(&field_id, _) => {
|
||||
// during settings update, recompute the changing settings only.
|
||||
if settings_diff.settings_update_only {
|
||||
// during settings update, recompute the changing settings only unless a global change is detected.
|
||||
if settings_diff.settings_update_only
|
||||
&& !settings_diff.global_facet_settings_changed()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ pub use self::transform::{Transform, TransformOutput};
|
||||
use super::new::StdResult;
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{PrefixSearch, PrefixSettings};
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
@@ -82,8 +83,6 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct IndexDocumentsConfig {
|
||||
pub words_prefix_threshold: Option<u32>,
|
||||
pub max_prefix_length: Option<usize>,
|
||||
pub words_positions_level_group_size: Option<NonZeroU32>,
|
||||
pub words_positions_min_level_size: Option<NonZeroU32>,
|
||||
pub update_method: IndexDocumentsMethod,
|
||||
@@ -565,14 +564,32 @@ where
|
||||
self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
|
||||
|
||||
// Run the words prefixes update operation.
|
||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
||||
if let Some(value) = self.config.words_prefix_threshold {
|
||||
builder.threshold(value);
|
||||
let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
|
||||
self.index.prefix_settings(self.wtxn)?;
|
||||
|
||||
// If the prefix search is enabled at indexing time, we compute the prefixes.
|
||||
if compute_prefixes == PrefixSearch::IndexingTime {
|
||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
||||
builder.threshold(prefix_count_threshold);
|
||||
builder.max_prefix_length(max_prefix_length);
|
||||
builder.execute()?;
|
||||
} else {
|
||||
// If the prefix search is disabled at indexing time, we delete the previous words prefixes fst.
|
||||
// And all the associated docids databases.
|
||||
self.index.delete_words_prefixes_fst(self.wtxn)?;
|
||||
self.index.word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.exact_word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_position_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_fid_docids.clear(self.wtxn)?;
|
||||
|
||||
databases_seen += 3;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen,
|
||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||
});
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(value) = self.config.max_prefix_length {
|
||||
builder.max_prefix_length(value);
|
||||
}
|
||||
builder.execute()?;
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
@@ -2138,6 +2155,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2199,6 +2217,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2251,6 +2270,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2302,6 +2322,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2355,6 +2376,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2413,6 +2435,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2464,6 +2487,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2515,6 +2539,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2708,6 +2733,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2766,6 +2792,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -2821,6 +2848,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -667,14 +667,23 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
|
||||
|
||||
// If only a faceted field has been added, keep only this field.
|
||||
let must_reindex_facets = settings_diff.reindex_facets();
|
||||
let necessary_faceted_field = |id: FieldId| -> bool {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
must_reindex_facets
|
||||
&& modified_faceted_fields
|
||||
.iter()
|
||||
.any(|long| is_faceted_by(long, field_name) || is_faceted_by(field_name, long))
|
||||
};
|
||||
let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
|
||||
let facet_fids_changed = settings_diff.facet_fids_changed();
|
||||
let necessary_faceted_field =
|
||||
|id: FieldId| -> bool {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
if global_facet_settings_changed {
|
||||
settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else if facet_fids_changed {
|
||||
modified_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
// Alway provide all fields when vectors are involved because
|
||||
// we need the fields for the prompt/templating.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -60,9 +60,10 @@
|
||||
//! For now we can use a grenad sorter for spilling even thought I think
|
||||
//! it's not the most efficient way (too many files open, sorting entries).
|
||||
|
||||
use std::borrow::Borrow;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::binary_heap::PeekMut;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::{BTreeMap, BinaryHeap};
|
||||
use std::fs::File;
|
||||
use std::hash::BuildHasher;
|
||||
use std::io::BufReader;
|
||||
@@ -70,10 +71,7 @@ use std::{io, iter, mem};
|
||||
|
||||
use bumpalo::Bump;
|
||||
use grenad::ReaderCursor;
|
||||
use hashbrown::hash_map::RawEntryMut;
|
||||
use hashbrown::HashMap;
|
||||
use raw_collections::bbbul::{BitPacker, BitPacker4x};
|
||||
use raw_collections::map::FrozenMap;
|
||||
use raw_collections::{Bbbul, FrozenBbbul};
|
||||
use roaring::RoaringBitmap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
@@ -105,9 +103,7 @@ impl<'extractor> BalancedCaches<'extractor> {
|
||||
hasher: FxBuildHasher,
|
||||
max_memory,
|
||||
caches: InnerCaches::Normal(NormalCaches {
|
||||
caches: iter::repeat_with(|| HashMap::with_hasher_in(FxBuildHasher, alloc))
|
||||
.take(buckets)
|
||||
.collect(),
|
||||
caches: iter::repeat_with(BTreeMap::new).take(buckets).collect(),
|
||||
}),
|
||||
alloc,
|
||||
}
|
||||
@@ -166,8 +162,8 @@ impl<'extractor> BalancedCaches<'extractor> {
|
||||
rayon::current_thread_index().unwrap_or(0)
|
||||
);
|
||||
|
||||
let allocated: usize = normal_caches.caches.iter().map(|m| m.allocation_size()).sum();
|
||||
tracing::trace!("The last allocated HashMap took {allocated} bytes");
|
||||
// let allocated: usize = normal_caches.caches.iter().map(|m| m.allocation_size()).sum();
|
||||
// tracing::trace!("The last allocated BTreeMap took {allocated} bytes");
|
||||
|
||||
let dummy = NormalCaches { caches: Vec::new() };
|
||||
let NormalCaches { caches: cache_maps } = mem::replace(normal_caches, dummy);
|
||||
@@ -187,21 +183,17 @@ impl<'extractor> BalancedCaches<'extractor> {
|
||||
// that are the same size.
|
||||
let map = unsafe {
|
||||
std::mem::transmute::<
|
||||
&mut HashMap<
|
||||
&mut BTreeMap<
|
||||
&[u8],
|
||||
DelAddBbbul<BitPacker4x>, // from this
|
||||
FxBuildHasher,
|
||||
&Bump,
|
||||
>,
|
||||
&mut HashMap<
|
||||
&mut BTreeMap<
|
||||
&[u8],
|
||||
FrozenDelAddBbbul<BitPacker4x>, // to that
|
||||
FxBuildHasher,
|
||||
&Bump,
|
||||
>,
|
||||
>(map)
|
||||
};
|
||||
Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled: Vec::new() })
|
||||
Ok(FrozenCache { bucket, cache: FrozenBTreeMap::new(map), spilled: Vec::new() })
|
||||
})
|
||||
.collect(),
|
||||
InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches
|
||||
@@ -220,21 +212,17 @@ impl<'extractor> BalancedCaches<'extractor> {
|
||||
// that are the same size.
|
||||
let map = unsafe {
|
||||
std::mem::transmute::<
|
||||
&mut HashMap<
|
||||
&mut BTreeMap<
|
||||
&[u8],
|
||||
DelAddBbbul<BitPacker4x>, // from this
|
||||
FxBuildHasher,
|
||||
&Bump,
|
||||
>,
|
||||
&mut HashMap<
|
||||
&mut BTreeMap<
|
||||
&[u8],
|
||||
FrozenDelAddBbbul<BitPacker4x>, // to that
|
||||
FxBuildHasher,
|
||||
&Bump,
|
||||
>,
|
||||
>(map)
|
||||
};
|
||||
Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled })
|
||||
Ok(FrozenCache { bucket, cache: FrozenBTreeMap::new(map), spilled })
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
@@ -245,14 +233,7 @@ impl<'extractor> BalancedCaches<'extractor> {
|
||||
unsafe impl MostlySend for BalancedCaches<'_> {}
|
||||
|
||||
struct NormalCaches<'extractor> {
|
||||
caches: Vec<
|
||||
HashMap<
|
||||
&'extractor [u8],
|
||||
DelAddBbbul<'extractor, BitPacker4x>,
|
||||
FxBuildHasher,
|
||||
&'extractor Bump,
|
||||
>,
|
||||
>,
|
||||
caches: Vec<BTreeMap<&'extractor [u8], DelAddBbbul<'extractor, BitPacker4x>>>,
|
||||
}
|
||||
|
||||
impl<'extractor> NormalCaches<'extractor> {
|
||||
@@ -266,17 +247,13 @@ impl<'extractor> NormalCaches<'extractor> {
|
||||
) {
|
||||
let hash = hasher.hash_one(key);
|
||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) {
|
||||
RawEntryMut::Occupied(mut entry) => {
|
||||
entry.get_mut().del.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
let cache = &mut self.caches[bucket];
|
||||
match cache.get_mut(key) {
|
||||
Some(deladd) => {
|
||||
deladd.del.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
}
|
||||
RawEntryMut::Vacant(entry) => {
|
||||
entry.insert_hashed_nocheck(
|
||||
hash,
|
||||
alloc.alloc_slice_copy(key),
|
||||
DelAddBbbul::new_del_u32_in(n, alloc),
|
||||
);
|
||||
None => {
|
||||
cache.insert(alloc.alloc_slice_copy(key), DelAddBbbul::new_del_u32_in(n, alloc));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -291,30 +268,20 @@ impl<'extractor> NormalCaches<'extractor> {
|
||||
) {
|
||||
let hash = hasher.hash_one(key);
|
||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) {
|
||||
RawEntryMut::Occupied(mut entry) => {
|
||||
entry.get_mut().add.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
let cache = &mut self.caches[bucket];
|
||||
match cache.get_mut(key) {
|
||||
Some(deladd) => {
|
||||
deladd.add.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
}
|
||||
RawEntryMut::Vacant(entry) => {
|
||||
entry.insert_hashed_nocheck(
|
||||
hash,
|
||||
alloc.alloc_slice_copy(key),
|
||||
DelAddBbbul::new_add_u32_in(n, alloc),
|
||||
);
|
||||
None => {
|
||||
cache.insert(alloc.alloc_slice_copy(key), DelAddBbbul::new_add_u32_in(n, alloc));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct SpillingCaches<'extractor> {
|
||||
caches: Vec<
|
||||
HashMap<
|
||||
&'extractor [u8],
|
||||
DelAddBbbul<'extractor, BitPacker4x>,
|
||||
FxBuildHasher,
|
||||
&'extractor Bump,
|
||||
>,
|
||||
>,
|
||||
caches: Vec<BTreeMap<&'extractor [u8], DelAddBbbul<'extractor, BitPacker4x>>>,
|
||||
spilled_entries: Vec<grenad::Sorter<MergeDeladdCboRoaringBitmaps>>,
|
||||
deladd_buffer: Vec<u8>,
|
||||
cbo_buffer: Vec<u8>,
|
||||
@@ -322,14 +289,7 @@ struct SpillingCaches<'extractor> {
|
||||
|
||||
impl<'extractor> SpillingCaches<'extractor> {
|
||||
fn from_cache_maps(
|
||||
caches: Vec<
|
||||
HashMap<
|
||||
&'extractor [u8],
|
||||
DelAddBbbul<'extractor, BitPacker4x>,
|
||||
FxBuildHasher,
|
||||
&'extractor Bump,
|
||||
>,
|
||||
>,
|
||||
caches: Vec<BTreeMap<&'extractor [u8], DelAddBbbul<'extractor, BitPacker4x>>>,
|
||||
) -> SpillingCaches<'extractor> {
|
||||
SpillingCaches {
|
||||
spilled_entries: iter::repeat_with(|| {
|
||||
@@ -356,12 +316,12 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
) -> Result<()> {
|
||||
let hash = hasher.hash_one(key);
|
||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) {
|
||||
RawEntryMut::Occupied(mut entry) => {
|
||||
entry.get_mut().del.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
match self.caches[bucket].get_mut(key) {
|
||||
Some(deladd) => {
|
||||
deladd.del.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
Ok(())
|
||||
}
|
||||
RawEntryMut::Vacant(_entry) => spill_entry_to_sorter(
|
||||
None => spill_entry_to_sorter(
|
||||
&mut self.spilled_entries[bucket],
|
||||
&mut self.deladd_buffer,
|
||||
&mut self.cbo_buffer,
|
||||
@@ -381,12 +341,12 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
) -> Result<()> {
|
||||
let hash = hasher.hash_one(key);
|
||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) {
|
||||
RawEntryMut::Occupied(mut entry) => {
|
||||
entry.get_mut().add.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
match self.caches[bucket].get_mut(key) {
|
||||
Some(deladd) => {
|
||||
deladd.add.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n);
|
||||
Ok(())
|
||||
}
|
||||
RawEntryMut::Vacant(_entry) => spill_entry_to_sorter(
|
||||
None => spill_entry_to_sorter(
|
||||
&mut self.spilled_entries[bucket],
|
||||
&mut self.deladd_buffer,
|
||||
&mut self.cbo_buffer,
|
||||
@@ -415,21 +375,21 @@ fn spill_entry_to_sorter(
|
||||
match deladd {
|
||||
DelAddRoaringBitmap { del: Some(del), add: None } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
||||
@@ -441,13 +401,7 @@ fn spill_entry_to_sorter(
|
||||
|
||||
pub struct FrozenCache<'a, 'extractor> {
|
||||
bucket: usize,
|
||||
cache: FrozenMap<
|
||||
'a,
|
||||
'extractor,
|
||||
&'extractor [u8],
|
||||
FrozenDelAddBbbul<'extractor, BitPacker4x>,
|
||||
FxBuildHasher,
|
||||
>,
|
||||
cache: FrozenBTreeMap<'a, &'extractor [u8], FrozenDelAddBbbul<'extractor, BitPacker4x>>,
|
||||
spilled: Vec<grenad::Reader<BufReader<File>>>,
|
||||
}
|
||||
|
||||
@@ -466,12 +420,43 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
|
||||
Ok(bucket_caches)
|
||||
}
|
||||
|
||||
/// Merges the caches that must be all associated to the same bucket.
|
||||
pub struct FrozenBTreeMap<'a, K, V>(&'a mut BTreeMap<K, V>);
|
||||
|
||||
unsafe impl<'a, K, V> Send for FrozenBTreeMap<'a, K, V>
|
||||
where
|
||||
K: Send,
|
||||
V: Send,
|
||||
{
|
||||
}
|
||||
|
||||
impl<'a, K, V> FrozenBTreeMap<'a, K, V> {
|
||||
#[inline]
|
||||
pub fn new(map: &'a mut BTreeMap<K, V>) -> Self {
|
||||
Self(map)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn iter_mut(&mut self) -> std::collections::btree_map::IterMut<'_, K, V> {
|
||||
self.0.iter_mut()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
|
||||
where
|
||||
K: Borrow<Q> + Ord,
|
||||
Q: Ord + ?Sized,
|
||||
{
|
||||
self.0.get_mut(key)
|
||||
}
|
||||
}
|
||||
|
||||
/// Merges the caches that must be all associated to the same bucket
|
||||
/// but make sure to sort the different buckets before performing the merges.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If the bucket IDs in these frozen caches are not exactly the same.
|
||||
pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||
pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||
where
|
||||
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
||||
{
|
||||
@@ -490,7 +475,7 @@ where
|
||||
for (source_index, source) in readers.into_iter().enumerate() {
|
||||
let mut cursor = source.into_cursor()?;
|
||||
if cursor.move_on_next()?.is_some() {
|
||||
heap.push(Entry { cursor, source_index });
|
||||
heap.push(CursorEntry { cursor, source_index });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -544,7 +529,6 @@ where
|
||||
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
||||
while let Some(mut map) = maps.pop() {
|
||||
for (key, bbbul) in map.iter_mut() {
|
||||
// Make sure we don't try to work with entries already managed by the spilled
|
||||
if bbbul.is_empty() {
|
||||
continue;
|
||||
}
|
||||
@@ -566,29 +550,29 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct Entry<R> {
|
||||
struct CursorEntry<R> {
|
||||
cursor: ReaderCursor<R>,
|
||||
source_index: usize,
|
||||
}
|
||||
|
||||
impl<R> Ord for Entry<R> {
|
||||
fn cmp(&self, other: &Entry<R>) -> Ordering {
|
||||
impl<R> Ord for CursorEntry<R> {
|
||||
fn cmp(&self, other: &CursorEntry<R>) -> Ordering {
|
||||
let skey = self.cursor.current().map(|(k, _)| k);
|
||||
let okey = other.cursor.current().map(|(k, _)| k);
|
||||
skey.cmp(&okey).then(self.source_index.cmp(&other.source_index)).reverse()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> Eq for Entry<R> {}
|
||||
impl<R> Eq for CursorEntry<R> {}
|
||||
|
||||
impl<R> PartialEq for Entry<R> {
|
||||
fn eq(&self, other: &Entry<R>) -> bool {
|
||||
impl<R> PartialEq for CursorEntry<R> {
|
||||
fn eq(&self, other: &CursorEntry<R>) -> bool {
|
||||
self.cmp(other) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> PartialOrd for Entry<R> {
|
||||
fn partial_cmp(&self, other: &Entry<R>) -> Option<Ordering> {
|
||||
impl<R> PartialOrd for CursorEntry<R> {
|
||||
fn partial_cmp(&self, other: &CursorEntry<R>) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,13 +12,14 @@ use crate::update::new::thread_local::FullySend;
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::Result;
|
||||
pub struct DocumentsExtractor<'a> {
|
||||
document_sender: &'a DocumentsSender<'a>,
|
||||
|
||||
pub struct DocumentsExtractor<'a, 'b> {
|
||||
document_sender: DocumentsSender<'a, 'b>,
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
}
|
||||
|
||||
impl<'a> DocumentsExtractor<'a> {
|
||||
pub fn new(document_sender: &'a DocumentsSender<'a>, embedders: &'a EmbeddingConfigs) -> Self {
|
||||
impl<'a, 'b> DocumentsExtractor<'a, 'b> {
|
||||
pub fn new(document_sender: DocumentsSender<'a, 'b>, embedders: &'a EmbeddingConfigs) -> Self {
|
||||
Self { document_sender, embedders }
|
||||
}
|
||||
}
|
||||
@@ -29,7 +30,7 @@ pub struct DocumentExtractorData {
|
||||
pub field_distribution_delta: HashMap<String, i64>,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
|
||||
type Data = FullySend<RefCell<DocumentExtractorData>>;
|
||||
|
||||
fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
|
||||
@@ -25,14 +25,14 @@ use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
pub struct FacetedExtractorData<'a> {
|
||||
pub struct FacetedExtractorData<'a, 'b> {
|
||||
attributes_to_extract: &'a [&'a str],
|
||||
sender: &'a FieldIdDocidFacetSender<'a>,
|
||||
sender: &'a FieldIdDocidFacetSender<'a, 'b>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
buckets: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
|
||||
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||
|
||||
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
@@ -318,7 +318,7 @@ impl<'doc> DelAddFacetValue<'doc> {
|
||||
docid: DocumentId,
|
||||
sender: &FieldIdDocidFacetSender,
|
||||
doc_alloc: &Bump,
|
||||
) -> std::result::Result<(), crossbeam_channel::SendError<()>> {
|
||||
) -> crate::Result<()> {
|
||||
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
for ((fid, value), deladd) in self.strings {
|
||||
if let Ok(s) = std::str::from_utf8(&value) {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::cell::RefCell;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
|
||||
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
|
||||
use std::{iter, mem, result};
|
||||
|
||||
use bumpalo::Bump;
|
||||
@@ -97,30 +97,34 @@ pub struct FrozenGeoExtractorData<'extractor> {
|
||||
impl<'extractor> FrozenGeoExtractorData<'extractor> {
|
||||
pub fn iter_and_clear_removed(
|
||||
&mut self,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
mem::take(&mut self.removed)
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
Ok(mem::take(&mut self.removed)
|
||||
.iter()
|
||||
.copied()
|
||||
.map(Ok)
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_removed))
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_removed)?))
|
||||
}
|
||||
|
||||
pub fn iter_and_clear_inserted(
|
||||
&mut self,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
mem::take(&mut self.inserted)
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
Ok(mem::take(&mut self.inserted)
|
||||
.iter()
|
||||
.copied()
|
||||
.map(Ok)
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted))
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted)?))
|
||||
}
|
||||
}
|
||||
|
||||
fn iterator_over_spilled_geopoints(
|
||||
spilled: &mut Option<BufReader<File>>,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
let mut spilled = spilled.take();
|
||||
iter::from_fn(move || match &mut spilled {
|
||||
if let Some(spilled) = &mut spilled {
|
||||
spilled.rewind()?;
|
||||
}
|
||||
|
||||
Ok(iter::from_fn(move || match &mut spilled {
|
||||
Some(file) => {
|
||||
let geopoint_bytes = &mut [0u8; mem::size_of::<ExtractedGeoPoint>()];
|
||||
match file.read_exact(geopoint_bytes) {
|
||||
@@ -130,7 +134,7 @@ fn iterator_over_spilled_geopoints(
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
||||
impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
@@ -157,7 +161,9 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
let mut data_ref = context.data.borrow_mut_or_yield();
|
||||
|
||||
for change in changes {
|
||||
if max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm) {
|
||||
if data_ref.spilled_removed.is_none()
|
||||
&& max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm)
|
||||
{
|
||||
// We must spill as we allocated too much memory
|
||||
data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||
data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||
|
||||
@@ -6,7 +6,9 @@ mod searchable;
|
||||
mod vectors;
|
||||
|
||||
use bumpalo::Bump;
|
||||
pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap};
|
||||
pub use cache::{
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
};
|
||||
pub use documents::*;
|
||||
pub use faceted::*;
|
||||
pub use geo::*;
|
||||
|
||||
@@ -18,17 +18,17 @@ use crate::vector::error::{
|
||||
use crate::vector::{Embedder, Embedding, EmbeddingConfigs};
|
||||
use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
|
||||
|
||||
pub struct EmbeddingExtractor<'a> {
|
||||
pub struct EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
}
|
||||
|
||||
impl<'a> EmbeddingExtractor<'a> {
|
||||
impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||
pub fn new(
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
field_distribution: &'a FieldDistribution,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
) -> Self {
|
||||
@@ -43,7 +43,7 @@ pub struct EmbeddingExtractorData<'extractor>(
|
||||
|
||||
unsafe impl MostlySend for EmbeddingExtractorData<'_> {}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
||||
type Data = RefCell<EmbeddingExtractorData<'extractor>>;
|
||||
|
||||
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||
@@ -259,7 +259,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
// Currently this is the case as:
|
||||
// 1. BVec are inside of the bumaplo
|
||||
// 2. All other fields are either trivial (u8) or references.
|
||||
struct Chunks<'a, 'extractor> {
|
||||
struct Chunks<'a, 'b, 'extractor> {
|
||||
texts: BVec<'a, &'a str>,
|
||||
ids: BVec<'a, DocumentId>,
|
||||
|
||||
@@ -270,11 +270,11 @@ struct Chunks<'a, 'extractor> {
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
has_manual_generation: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
embedder: &'a Embedder,
|
||||
@@ -284,7 +284,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
doc_alloc: &'a Bump,
|
||||
) -> Self {
|
||||
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
|
||||
@@ -368,7 +368,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
sender: &EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
has_manual_generation: Option<&'a str>,
|
||||
) -> Result<()> {
|
||||
if let Some(external_docid) = has_manual_generation {
|
||||
|
||||
@@ -70,7 +70,7 @@ impl<
|
||||
F: FnOnce(&'extractor Bump) -> Result<T>,
|
||||
{
|
||||
let doc_alloc =
|
||||
doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024))));
|
||||
doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024))));
|
||||
let doc_alloc = doc_alloc.0.take();
|
||||
let fields_ids_map = fields_ids_map_store
|
||||
.get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into());
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::{OnceLock, RwLock};
|
||||
use std::thread::{self, Builder};
|
||||
|
||||
@@ -41,7 +42,7 @@ use crate::update::settings::InnerIndexSettings;
|
||||
use crate::update::{FacetsUpdateBulk, GrenadParameters};
|
||||
use crate::vector::{ArroyWrapper, EmbeddingConfigs, Embeddings};
|
||||
use crate::{
|
||||
FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
|
||||
Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
|
||||
ThreadPoolNoAbortBuilder, UserError,
|
||||
};
|
||||
|
||||
@@ -61,6 +62,7 @@ mod update_by_function;
|
||||
pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &'index Index,
|
||||
pool: &ThreadPoolNoAbort,
|
||||
grenad_parameters: GrenadParameters,
|
||||
db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||
new_fields_ids_map: FieldsIdsMap,
|
||||
@@ -75,7 +77,29 @@ where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
SP: Fn(Progress) + Sync,
|
||||
{
|
||||
let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000);
|
||||
let mut bbbuffers = Vec::new();
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
|
||||
// We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
|
||||
let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
|
||||
let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
|
||||
(grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
|
||||
|max_memory| {
|
||||
// 2% of the indexing memory
|
||||
let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
|
||||
let new_grenad_parameters = GrenadParameters {
|
||||
max_memory: Some(
|
||||
max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
|
||||
),
|
||||
..grenad_parameters
|
||||
};
|
||||
(new_grenad_parameters, total_bbbuffer_capacity)
|
||||
},
|
||||
);
|
||||
|
||||
let (extractor_sender, mut writer_receiver) = pool
|
||||
.install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
|
||||
.unwrap();
|
||||
|
||||
let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
|
||||
let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
|
||||
@@ -94,241 +118,273 @@ where
|
||||
send_progress,
|
||||
};
|
||||
|
||||
let mut index_embeddings = index.embedding_configs(wtxn)?;
|
||||
let mut field_distribution = index.field_distribution(wtxn)?;
|
||||
let mut document_ids = index.documents_ids(wtxn)?;
|
||||
|
||||
thread::scope(|s| -> Result<()> {
|
||||
let indexer_span = tracing::Span::current();
|
||||
let embedders = &embedders;
|
||||
let finished_extraction = &finished_extraction;
|
||||
// prevent moving the field_distribution and document_ids in the inner closure...
|
||||
let field_distribution = &mut field_distribution;
|
||||
let document_ids = &mut document_ids;
|
||||
let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
|
||||
let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
|
||||
let _entered = span.enter();
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
// document but we need to create a function that collects and compresses documents.
|
||||
let document_sender = extractor_sender.documents();
|
||||
let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
|
||||
extract(document_changes,
|
||||
&document_extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::ExtractingDocuments,
|
||||
)?;
|
||||
|
||||
for document_extractor_data in datastore {
|
||||
let document_extractor_data = document_extractor_data.0.into_inner();
|
||||
for (field, delta) in document_extractor_data.field_distribution_delta {
|
||||
let current = field_distribution.entry(field).or_default();
|
||||
// adding the delta should never cause a negative result, as we are removing fields that previously existed.
|
||||
*current = current.saturating_add_signed(delta);
|
||||
}
|
||||
document_extractor_data.docids_delta.apply_to(document_ids);
|
||||
}
|
||||
|
||||
field_distribution.retain(|_, v| *v != 0);
|
||||
|
||||
let facet_field_ids_delta;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted");
|
||||
pool.install(move || {
|
||||
let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
|
||||
let _entered = span.enter();
|
||||
|
||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||
FacetedDocidsExtractor::run_extraction(
|
||||
grenad_parameters,
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
// document but we need to create a function that collects and compresses documents.
|
||||
let document_sender = extractor_sender.documents();
|
||||
let document_extractor = DocumentsExtractor::new(document_sender, embedders);
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
|
||||
let _entered = span.enter();
|
||||
extract(
|
||||
document_changes,
|
||||
&document_extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&extractor_sender.field_id_docid_facet_sender(),
|
||||
Step::ExtractingFacets
|
||||
)?,
|
||||
FacetDatabases::new(index),
|
||||
index,
|
||||
extractor_sender.facet_docids(),
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
|
||||
let WordDocidsCaches {
|
||||
word_docids,
|
||||
word_fid_docids,
|
||||
exact_word_docids,
|
||||
word_position_docids,
|
||||
fid_word_count_docids,
|
||||
} = WordDocidsExtractors::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWords
|
||||
)?;
|
||||
|
||||
// TODO Word Docids Merger
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_docids,
|
||||
index.word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
&datastore,
|
||||
Step::ExtractingDocuments,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Word Fid Docids Merging
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_fid_docids,
|
||||
index.word_fid_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordFidDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
for document_extractor_data in datastore {
|
||||
let document_extractor_data = document_extractor_data.0.into_inner();
|
||||
for (field, delta) in document_extractor_data.field_distribution_delta {
|
||||
let current = field_distribution.entry(field).or_default();
|
||||
// adding the delta should never cause a negative result, as we are removing fields that previously existed.
|
||||
*current = current.saturating_add_signed(delta);
|
||||
}
|
||||
document_extractor_data.docids_delta.apply_to(document_ids);
|
||||
}
|
||||
|
||||
field_distribution.retain(|_, v| *v != 0);
|
||||
}
|
||||
|
||||
// Exact Word Docids Merging
|
||||
let facet_field_ids_delta;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
exact_word_docids,
|
||||
index.exact_word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<ExactWordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
|
||||
// Word Position Docids Merging
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_position_docids,
|
||||
index.word_position_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPositionDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
FacetedDocidsExtractor::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&extractor_sender.field_id_docid_facet_sender(),
|
||||
Step::ExtractingFacets
|
||||
)?
|
||||
};
|
||||
|
||||
// Fid Word Count Docids Merging
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
fid_word_count_docids,
|
||||
index.field_id_word_count_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<FidWordCountDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
|
||||
// run the proximity extraction only if the precision is by word
|
||||
// this works only if the settings didn't change during this transaction.
|
||||
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
|
||||
if proximity_precision == ProximityPrecision::ByWord {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
|
||||
let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWordProximity,
|
||||
)?;
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
index.word_pair_proximity_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPairProximityDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
'vectors: {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut index_embeddings = index.embedding_configs(&rtxn)?;
|
||||
if index_embeddings.is_empty() {
|
||||
break 'vectors;
|
||||
}
|
||||
|
||||
let embedding_sender = extractor_sender.embeddings();
|
||||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
|
||||
|
||||
for config in &mut index_embeddings {
|
||||
'data: for data in datastore.iter_mut() {
|
||||
let data = &mut data.get_mut().0;
|
||||
let Some(deladd) = data.remove(&config.name) else { continue 'data; };
|
||||
deladd.apply_to(&mut config.user_provided);
|
||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||
caches,
|
||||
FacetDatabases::new(index),
|
||||
index,
|
||||
extractor_sender.facet_docids(),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
embedding_sender.finish(index_embeddings).unwrap();
|
||||
}
|
||||
{
|
||||
let WordDocidsCaches {
|
||||
word_docids,
|
||||
word_fid_docids,
|
||||
exact_word_docids,
|
||||
word_position_docids,
|
||||
fid_word_count_docids,
|
||||
} = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
'geo: {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||
let _entered = span.enter();
|
||||
WordDocidsExtractors::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWords
|
||||
)?
|
||||
};
|
||||
|
||||
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
||||
break 'geo;
|
||||
};
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::WritingGeoPoints
|
||||
)?;
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_docids,
|
||||
index.word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
merge_and_send_rtree(
|
||||
datastore,
|
||||
&rtxn,
|
||||
index,
|
||||
extractor_sender.geo(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_fid_docids,
|
||||
index.word_fid_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordFidDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
exact_word_docids,
|
||||
index.exact_word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<ExactWordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_position_docids,
|
||||
index.word_position_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPositionDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
fid_word_count_docids,
|
||||
index.field_id_word_count_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<FidWordCountDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
// run the proximity extraction only if the precision is by word
|
||||
// this works only if the settings didn't change during this transaction.
|
||||
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
|
||||
if proximity_precision == ProximityPrecision::ByWord {
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWordProximity,
|
||||
)?
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
index.word_pair_proximity_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPairProximityDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
'vectors: {
|
||||
if index_embeddings.is_empty() {
|
||||
break 'vectors;
|
||||
}
|
||||
|
||||
let embedding_sender = extractor_sender.embeddings();
|
||||
let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::ExtractingEmbeddings,
|
||||
)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
for config in &mut index_embeddings {
|
||||
'data: for data in datastore.iter_mut() {
|
||||
let data = &mut data.get_mut().0;
|
||||
let Some(deladd) = data.remove(&config.name) else { continue 'data; };
|
||||
deladd.apply_to(&mut config.user_provided);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'geo: {
|
||||
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
||||
break 'geo;
|
||||
};
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||
let _entered = span.enter();
|
||||
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::WritingGeoPoints
|
||||
)?;
|
||||
}
|
||||
|
||||
merge_and_send_rtree(
|
||||
datastore,
|
||||
&rtxn,
|
||||
index,
|
||||
extractor_sender.geo(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH");
|
||||
let _entered = span.enter();
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
||||
}
|
||||
|
||||
Result::Ok(facet_field_ids_delta)
|
||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
Result::Ok((facet_field_ids_delta, index_embeddings))
|
||||
}).unwrap()
|
||||
})?;
|
||||
|
||||
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||
|
||||
let vector_arroy = index.vector_arroy;
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
let indexer_span = tracing::Span::current();
|
||||
let arroy_writers: Result<HashMap<_, _>> = embedders
|
||||
.inner_as_ref()
|
||||
@@ -351,85 +407,112 @@ where
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Used by by the ArroySetVector to copy the embedding into an
|
||||
// aligned memory area, required by arroy to accept a new vector.
|
||||
let mut aligned_embedding = Vec::new();
|
||||
let mut arroy_writers = arroy_writers?;
|
||||
for operation in writer_receiver {
|
||||
match operation {
|
||||
WriterOperation::DbOperation(db_operation) => {
|
||||
let database = db_operation.database(index);
|
||||
match db_operation.entry() {
|
||||
EntryOperation::Delete(e) => {
|
||||
if !database.delete(wtxn, e.entry())? {
|
||||
unreachable!("We tried to delete an unknown key")
|
||||
}
|
||||
}
|
||||
EntryOperation::Write(e) => database.put(wtxn, e.key(), e.value())?,
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "all");
|
||||
let _entered = span.enter();
|
||||
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "post_merge");
|
||||
let mut _entered_post_merge = None;
|
||||
|
||||
while let Some(action) = writer_receiver.recv_action() {
|
||||
if _entered_post_merge.is_none()
|
||||
&& finished_extraction.load(std::sync::atomic::Ordering::Relaxed)
|
||||
{
|
||||
_entered_post_merge = Some(span.enter());
|
||||
}
|
||||
WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
|
||||
ArroyOperation::DeleteVectors { docid } => {
|
||||
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
|
||||
&mut arroy_writers
|
||||
{
|
||||
let dimensions = *dimensions;
|
||||
writer.del_items(wtxn, dimensions, docid)?;
|
||||
|
||||
match action {
|
||||
ReceiverAction::WakeUp => (),
|
||||
ReceiverAction::LargeEntry(LargeEntry { database, key, value }) => {
|
||||
let database_name = database.database_name();
|
||||
let database = database.database(index);
|
||||
if let Err(error) = database.put(wtxn, &key, &value) {
|
||||
return Err(Error::InternalError(InternalError::StorePut {
|
||||
database_name,
|
||||
key: bstr::BString::from(&key[..]),
|
||||
value_length: value.len(),
|
||||
error,
|
||||
}));
|
||||
}
|
||||
}
|
||||
ArroyOperation::SetVectors {
|
||||
docid,
|
||||
embedder_id,
|
||||
embeddings: raw_embeddings,
|
||||
} => {
|
||||
ReceiverAction::LargeVectors(large_vectors) => {
|
||||
let LargeVectors { docid, embedder_id, .. } = large_vectors;
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
// TODO: switch to Embeddings
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
for embedding in raw_embeddings {
|
||||
embeddings.append(embedding).unwrap();
|
||||
for embedding in large_vectors.read_embeddings(*dimensions) {
|
||||
embeddings.push(embedding.to_vec()).unwrap();
|
||||
}
|
||||
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_items(wtxn, docid, &embeddings)?;
|
||||
}
|
||||
ArroyOperation::SetVector { docid, embedder_id, embedding } => {
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_item(wtxn, docid, &embedding)?;
|
||||
}
|
||||
ArroyOperation::Finish { configs } => {
|
||||
let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
|
||||
let _entered = span.enter();
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(
|
||||
Step::WritingEmbeddingsToDatabase,
|
||||
));
|
||||
|
||||
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
|
||||
&mut arroy_writers
|
||||
{
|
||||
let dimensions = *dimensions;
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
&mut rng,
|
||||
dimensions,
|
||||
false,
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
index.put_embedding_configs(wtxn, configs)?;
|
||||
}
|
||||
},
|
||||
// Every time the is a message in the channel we search
|
||||
// for new entries in the BBQueue buffers.
|
||||
write_from_bbqueue(
|
||||
&mut writer_receiver,
|
||||
index,
|
||||
wtxn,
|
||||
&arroy_writers,
|
||||
&mut aligned_embedding,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Once the extractor/writer channel is closed
|
||||
// we must process the remaining BBQueue messages.
|
||||
write_from_bbqueue(
|
||||
&mut writer_receiver,
|
||||
index,
|
||||
wtxn,
|
||||
&arroy_writers,
|
||||
&mut aligned_embedding,
|
||||
)?;
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
|
||||
|
||||
let facet_field_ids_delta = extractor_handle.join().unwrap()?;
|
||||
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
|
||||
|
||||
'vectors: {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
|
||||
let _entered = span.enter();
|
||||
|
||||
if index_embeddings.is_empty() {
|
||||
break 'vectors;
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(
|
||||
Step::WritingEmbeddingsToDatabase,
|
||||
));
|
||||
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
|
||||
let dimensions = *dimensions;
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
&mut rng,
|
||||
dimensions,
|
||||
false,
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
index.put_embedding_configs(wtxn, index_embeddings)?;
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
|
||||
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||
if index.facet_search(wtxn)? {
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||
}
|
||||
|
||||
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
|
||||
@@ -464,6 +547,72 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A function dedicated to manage all the available BBQueue frames.
|
||||
///
|
||||
/// It reads all the available frames, do the corresponding database operations
|
||||
/// and stops when no frame are available.
|
||||
fn write_from_bbqueue(
|
||||
writer_receiver: &mut WriterBbqueueReceiver<'_>,
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
|
||||
aligned_embedding: &mut Vec<f32>,
|
||||
) -> crate::Result<()> {
|
||||
while let Some(frame_with_header) = writer_receiver.recv_frame() {
|
||||
match frame_with_header.header() {
|
||||
EntryHeader::DbOperation(operation) => {
|
||||
let database_name = operation.database.database_name();
|
||||
let database = operation.database.database(index);
|
||||
let frame = frame_with_header.frame();
|
||||
match operation.key_value(frame) {
|
||||
(key, Some(value)) => {
|
||||
if let Err(error) = database.put(wtxn, key, value) {
|
||||
return Err(Error::InternalError(InternalError::StorePut {
|
||||
database_name,
|
||||
key: key.into(),
|
||||
value_length: value.len(),
|
||||
error,
|
||||
}));
|
||||
}
|
||||
}
|
||||
(key, None) => match database.delete(wtxn, key) {
|
||||
Ok(false) => {
|
||||
unreachable!("We tried to delete an unknown key: {key:?}")
|
||||
}
|
||||
Ok(_) => (),
|
||||
Err(error) => {
|
||||
return Err(Error::InternalError(InternalError::StoreDeletion {
|
||||
database_name,
|
||||
key: key.into(),
|
||||
error,
|
||||
}));
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
|
||||
for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers {
|
||||
let dimensions = *dimensions;
|
||||
writer.del_items(wtxn, dimensions, docid)?;
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroySetVectors(asvs) => {
|
||||
let ArroySetVectors { docid, embedder_id, .. } = asvs;
|
||||
let frame = frame_with_header.frame();
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
|
||||
embeddings.append(all_embeddings.to_vec()).unwrap();
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_items(wtxn, docid, &embeddings)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
fn compute_prefix_database(
|
||||
index: &Index,
|
||||
|
||||
@@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::channel::*;
|
||||
use super::extract::{
|
||||
merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
|
||||
GeoExtractorData,
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
FacetKind, GeoExtractorData,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||
|
||||
@@ -19,7 +19,7 @@ pub fn merge_and_send_rtree<'extractor, MSP>(
|
||||
datastore: impl IntoIterator<Item = RefCell<GeoExtractorData<'extractor>>>,
|
||||
rtxn: &RoTxn,
|
||||
index: &Index,
|
||||
geo_sender: GeoSender<'_>,
|
||||
geo_sender: GeoSender<'_, '_>,
|
||||
must_stop_processing: &MSP,
|
||||
) -> Result<()>
|
||||
where
|
||||
@@ -34,7 +34,7 @@ where
|
||||
}
|
||||
|
||||
let mut frozen = data.into_inner().freeze()?;
|
||||
for result in frozen.iter_and_clear_removed() {
|
||||
for result in frozen.iter_and_clear_removed()? {
|
||||
let extracted_geo_point = result?;
|
||||
let removed = rtree.remove(&GeoPoint::from(extracted_geo_point));
|
||||
debug_assert!(removed.is_some());
|
||||
@@ -42,7 +42,7 @@ where
|
||||
debug_assert!(removed);
|
||||
}
|
||||
|
||||
for result in frozen.iter_and_clear_inserted() {
|
||||
for result in frozen.iter_and_clear_inserted()? {
|
||||
let extracted_geo_point = result?;
|
||||
rtree.insert(GeoPoint::from(extracted_geo_point));
|
||||
let inserted = faceted.insert(extracted_geo_point.docid);
|
||||
@@ -56,38 +56,37 @@ where
|
||||
|
||||
let rtree_mmap = unsafe { Mmap::map(&file)? };
|
||||
geo_sender.set_rtree(rtree_mmap).unwrap();
|
||||
geo_sender.set_geo_faceted(&faceted).unwrap();
|
||||
geo_sender.set_geo_faceted(&faceted)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||
pub fn merge_and_send_docids<'extractor, MSP>(
|
||||
pub fn merge_and_send_docids<'extractor, MSP, D>(
|
||||
mut caches: Vec<BalancedCaches<'extractor>>,
|
||||
database: Database<Bytes, Bytes>,
|
||||
index: &Index,
|
||||
docids_sender: impl DocidsSender + Sync,
|
||||
docids_sender: WordDocidsSender<D>,
|
||||
must_stop_processing: &MSP,
|
||||
) -> Result<()>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
D: DatabaseType + Sync,
|
||||
{
|
||||
transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
|
||||
let rtxn = index.read_txn()?;
|
||||
let mut buffer = Vec::new();
|
||||
if must_stop_processing() {
|
||||
return Err(InternalError::AbortedIndexation.into());
|
||||
}
|
||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
|
||||
docids_sender.write(key, value).unwrap();
|
||||
docids_sender.write(key, &bitmap)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Delete => {
|
||||
docids_sender.delete(key).unwrap();
|
||||
docids_sender.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Ignore => Ok(()),
|
||||
@@ -101,26 +100,24 @@ pub fn merge_and_send_facet_docids<'extractor>(
|
||||
mut caches: Vec<BalancedCaches<'extractor>>,
|
||||
database: FacetDatabases,
|
||||
index: &Index,
|
||||
docids_sender: impl DocidsSender + Sync,
|
||||
docids_sender: FacetDocidsSender,
|
||||
) -> Result<FacetFieldIdsDelta> {
|
||||
transpose_and_freeze_caches(&mut caches)?
|
||||
.into_par_iter()
|
||||
.map(|frozen| {
|
||||
let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
|
||||
let rtxn = index.read_txn()?;
|
||||
let mut buffer = Vec::new();
|
||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
facet_field_ids_delta.register_from_key(key);
|
||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
|
||||
docids_sender.write(key, value).unwrap();
|
||||
docids_sender.write(key, &bitmap)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Delete => {
|
||||
facet_field_ids_delta.register_from_key(key);
|
||||
docids_sender.delete(key).unwrap();
|
||||
docids_sender.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Ignore => Ok(()),
|
||||
@@ -252,10 +249,3 @@ fn merge_cbo_bitmaps(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO Return the slice directly from the serialize_into method
|
||||
fn cbo_bitmap_serialize_into_vec<'b>(bitmap: &RoaringBitmap, buffer: &'b mut Vec<u8>) -> &'b [u8] {
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(bitmap, buffer);
|
||||
buffer.as_slice()
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ pub trait RefCellExt<T: ?Sized> {
|
||||
&self,
|
||||
) -> std::result::Result<RefMut<'_, T>, std::cell::BorrowMutError>;
|
||||
|
||||
#[track_caller]
|
||||
fn borrow_mut_or_yield(&self) -> RefMut<'_, T> {
|
||||
self.try_borrow_mut_or_yield().unwrap()
|
||||
}
|
||||
|
||||
@@ -11,8 +11,8 @@ pub enum Step {
|
||||
ExtractingEmbeddings,
|
||||
WritingGeoPoints,
|
||||
WritingToDatabase,
|
||||
WritingEmbeddingsToDatabase,
|
||||
WaitingForExtractors,
|
||||
WritingEmbeddingsToDatabase,
|
||||
PostProcessingFacets,
|
||||
PostProcessingWords,
|
||||
Finalizing,
|
||||
@@ -29,8 +29,8 @@ impl Step {
|
||||
Step::ExtractingEmbeddings => "extracting embeddings",
|
||||
Step::WritingGeoPoints => "writing geo points",
|
||||
Step::WritingToDatabase => "writing to database",
|
||||
Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||
Step::WaitingForExtractors => "waiting for extractors",
|
||||
Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||
Step::PostProcessingFacets => "post-processing facets",
|
||||
Step::PostProcessingWords => "post-processing words",
|
||||
Step::Finalizing => "finalizing",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::BufWriter;
|
||||
|
||||
use fst::{Set, SetBuilder, Streamer};
|
||||
@@ -75,18 +75,18 @@ pub struct PrefixData {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PrefixDelta {
|
||||
pub modified: HashSet<Prefix>,
|
||||
pub deleted: HashSet<Prefix>,
|
||||
pub modified: BTreeSet<Prefix>,
|
||||
pub deleted: BTreeSet<Prefix>,
|
||||
}
|
||||
|
||||
struct PrefixFstBuilder {
|
||||
prefix_count_threshold: u64,
|
||||
prefix_count_threshold: usize,
|
||||
max_prefix_length: usize,
|
||||
/// TODO: Replace the full memory allocation
|
||||
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
||||
current_prefix: Vec<Prefix>,
|
||||
current_prefix_count: Vec<u64>,
|
||||
modified_prefixes: HashSet<Prefix>,
|
||||
current_prefix_count: Vec<usize>,
|
||||
modified_prefixes: BTreeSet<Prefix>,
|
||||
current_prefix_is_modified: Vec<bool>,
|
||||
}
|
||||
|
||||
@@ -95,7 +95,7 @@ impl PrefixFstBuilder {
|
||||
let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
|
||||
prefix_settings;
|
||||
|
||||
if !compute_prefixes {
|
||||
if compute_prefixes != crate::index::PrefixSearch::IndexingTime {
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -110,7 +110,7 @@ impl PrefixFstBuilder {
|
||||
prefix_fst_builders,
|
||||
current_prefix: vec![Prefix::new(); max_prefix_length],
|
||||
current_prefix_count: vec![0; max_prefix_length],
|
||||
modified_prefixes: HashSet::new(),
|
||||
modified_prefixes: BTreeSet::new(),
|
||||
current_prefix_is_modified: vec![false; max_prefix_length],
|
||||
})
|
||||
}
|
||||
@@ -180,7 +180,7 @@ impl PrefixFstBuilder {
|
||||
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
|
||||
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
|
||||
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
|
||||
let mut deleted_prefixes = HashSet::new();
|
||||
let mut deleted_prefixes = BTreeSet::new();
|
||||
{
|
||||
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
|
||||
while let Some(prefix) = deleted_prefixes_stream.next() {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
@@ -37,8 +37,8 @@ impl WordPrefixDocids {
|
||||
fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
@@ -48,7 +48,7 @@ impl WordPrefixDocids {
|
||||
fn recompute_modified_prefixes(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||
@@ -76,7 +76,7 @@ impl WordPrefixDocids {
|
||||
.union()?;
|
||||
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&output, buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
index.push(PrefixEntry { prefix, serialized_length: buffer.len() });
|
||||
file.write_all(buffer)
|
||||
})?;
|
||||
@@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a HashSet<Prefix>,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
let database = database.remap_data_type::<Bytes>();
|
||||
|
||||
@@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
|
||||
fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
@@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
|
||||
fn recompute_modified_prefixes(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||
@@ -211,7 +211,7 @@ impl WordPrefixIntegerDocids {
|
||||
.union()?;
|
||||
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&output, buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() });
|
||||
file.write_all(buffer)?;
|
||||
}
|
||||
@@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a HashSet<Prefix>,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
let database = database.remap_data_type::<Bytes>();
|
||||
|
||||
@@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
|
||||
fn delete_prefixes(
|
||||
wtxn: &mut RwTxn,
|
||||
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We remove all the entries that are no more required in this word prefix docids database.
|
||||
for prefix in prefixes {
|
||||
@@ -309,8 +309,8 @@ fn delete_prefixes(
|
||||
pub fn compute_word_prefix_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixDocids::new(
|
||||
@@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
|
||||
pub fn compute_exact_word_prefix_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixDocids::new(
|
||||
@@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
|
||||
pub fn compute_word_prefix_fid_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
@@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
|
||||
pub fn compute_word_prefix_position_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
|
||||
@@ -17,7 +17,8 @@ use super::IndexerConfig;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{
|
||||
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||
IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
|
||||
DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||
};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::prompt::default_max_bytes;
|
||||
@@ -177,6 +178,8 @@ pub struct Settings<'a, 't, 'i> {
|
||||
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
|
||||
search_cutoff: Setting<u64>,
|
||||
localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
|
||||
prefix_search: Setting<PrefixSearch>,
|
||||
facet_search: Setting<bool>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
@@ -212,6 +215,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
embedder_settings: Setting::NotSet,
|
||||
search_cutoff: Setting::NotSet,
|
||||
localized_attributes_rules: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
indexer_config,
|
||||
}
|
||||
}
|
||||
@@ -418,6 +423,22 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.localized_attributes_rules = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_prefix_search(&mut self, value: PrefixSearch) {
|
||||
self.prefix_search = Setting::Set(value);
|
||||
}
|
||||
|
||||
pub fn reset_prefix_search(&mut self) {
|
||||
self.prefix_search = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_facet_search(&mut self, value: bool) {
|
||||
self.facet_search = Setting::Set(value);
|
||||
}
|
||||
|
||||
pub fn reset_facet_search(&mut self) {
|
||||
self.facet_search = Setting::Reset;
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
level = "trace"
|
||||
skip(self, progress_callback, should_abort, settings_diff),
|
||||
@@ -944,7 +965,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
false
|
||||
} else {
|
||||
self.index.put_proximity_precision(self.wtxn, new)?;
|
||||
true
|
||||
old.is_some() || new != ProximityPrecision::default()
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
|
||||
@@ -954,6 +975,42 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
fn update_prefix_search(&mut self) -> Result<bool> {
|
||||
let changed = match self.prefix_search {
|
||||
Setting::Set(new) => {
|
||||
let old = self.index.prefix_search(self.wtxn)?;
|
||||
if old == Some(new) {
|
||||
false
|
||||
} else {
|
||||
self.index.put_prefix_search(self.wtxn, new)?;
|
||||
old.is_some() || new != PrefixSearch::default()
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_prefix_search(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
fn update_facet_search(&mut self) -> Result<bool> {
|
||||
let changed = match self.facet_search {
|
||||
Setting::Set(new) => {
|
||||
let old = self.index.facet_search(self.wtxn)?;
|
||||
if old == new {
|
||||
false
|
||||
} else {
|
||||
self.index.put_facet_search(self.wtxn, new)?;
|
||||
true
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_facet_search(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> {
|
||||
match std::mem::take(&mut self.embedder_settings) {
|
||||
Setting::Set(configs) => self.update_embedding_configs_set(configs),
|
||||
@@ -1203,6 +1260,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.update_searchable()?;
|
||||
self.update_exact_attributes()?;
|
||||
self.update_proximity_precision()?;
|
||||
self.update_prefix_search()?;
|
||||
self.update_facet_search()?;
|
||||
self.update_localized_attributes_rules()?;
|
||||
|
||||
let embedding_config_updates = self.update_embedding_configs()?;
|
||||
@@ -1282,6 +1341,7 @@ impl InnerIndexSettingsDiff {
|
||||
|| old_settings.allowed_separators != new_settings.allowed_separators
|
||||
|| old_settings.dictionary != new_settings.dictionary
|
||||
|| old_settings.proximity_precision != new_settings.proximity_precision
|
||||
|| old_settings.prefix_search != new_settings.prefix_search
|
||||
|| old_settings.localized_searchable_fields_ids
|
||||
!= new_settings.localized_searchable_fields_ids
|
||||
};
|
||||
@@ -1372,7 +1432,7 @@ impl InnerIndexSettingsDiff {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reindex_facets(&self) -> bool {
|
||||
pub fn facet_fids_changed(&self) -> bool {
|
||||
let existing_fields = &self.new.existing_fields;
|
||||
if existing_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
@@ -1392,7 +1452,15 @@ impl InnerIndexSettingsDiff {
|
||||
}
|
||||
|
||||
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
|
||||
|| self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
|
||||
}
|
||||
|
||||
pub fn global_facet_settings_changed(&self) -> bool {
|
||||
self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
|
||||
|| self.old.facet_search != self.new.facet_search
|
||||
}
|
||||
|
||||
pub fn reindex_facets(&self) -> bool {
|
||||
self.facet_fids_changed() || self.global_facet_settings_changed()
|
||||
}
|
||||
|
||||
pub fn reindex_vectors(&self) -> bool {
|
||||
@@ -1432,6 +1500,8 @@ pub(crate) struct InnerIndexSettings {
|
||||
pub non_faceted_fields_ids: Vec<FieldId>,
|
||||
pub localized_searchable_fields_ids: LocalizedFieldIds,
|
||||
pub localized_faceted_fields_ids: LocalizedFieldIds,
|
||||
pub prefix_search: PrefixSearch,
|
||||
pub facet_search: bool,
|
||||
}
|
||||
|
||||
impl InnerIndexSettings {
|
||||
@@ -1457,6 +1527,8 @@ impl InnerIndexSettings {
|
||||
Some(embedding_configs) => embedding_configs,
|
||||
None => embedders(index.embedding_configs(rtxn)?)?,
|
||||
};
|
||||
let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
|
||||
let facet_search = index.facet_search(rtxn)?;
|
||||
let existing_fields: HashSet<_> = index
|
||||
.field_distribution(rtxn)?
|
||||
.into_iter()
|
||||
@@ -1514,6 +1586,8 @@ impl InnerIndexSettings {
|
||||
non_faceted_fields_ids: vectors_fids.clone(),
|
||||
localized_searchable_fields_ids,
|
||||
localized_faceted_fields_ids,
|
||||
prefix_search,
|
||||
facet_search,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2721,6 +2795,8 @@ mod tests {
|
||||
embedder_settings,
|
||||
search_cutoff,
|
||||
localized_attributes_rules,
|
||||
prefix_search,
|
||||
facet_search,
|
||||
} = settings;
|
||||
assert!(matches!(searchable_fields, Setting::NotSet));
|
||||
assert!(matches!(displayed_fields, Setting::NotSet));
|
||||
@@ -2746,6 +2822,8 @@ mod tests {
|
||||
assert!(matches!(embedder_settings, Setting::NotSet));
|
||||
assert!(matches!(search_cutoff, Setting::NotSet));
|
||||
assert!(matches!(localized_attributes_rules, Setting::NotSet));
|
||||
assert!(matches!(prefix_search, Setting::NotSet));
|
||||
assert!(matches!(facet_search, Setting::NotSet));
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::{Index, Result, SmallString32};
|
||||
pub struct WordsPrefixesFst<'t, 'i> {
|
||||
wtxn: &'t mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
threshold: u32,
|
||||
threshold: usize,
|
||||
max_prefix_length: usize,
|
||||
}
|
||||
|
||||
@@ -24,8 +24,8 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
|
||||
///
|
||||
/// Default value is 100. This value must be higher than 50 and will be clamped
|
||||
/// to this bound otherwise.
|
||||
pub fn threshold(&mut self, value: u32) -> &mut Self {
|
||||
self.threshold = value.max(50);
|
||||
pub fn threshold(&mut self, value: usize) -> &mut Self {
|
||||
self.threshold = value;
|
||||
self
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
|
||||
/// Default value is `4` bytes. This value must be between 1 and 25 will be clamped
|
||||
/// to these bounds, otherwise.
|
||||
pub fn max_prefix_length(&mut self, value: usize) -> &mut Self {
|
||||
self.max_prefix_length = value.clamp(1, 25);
|
||||
self.max_prefix_length = value;
|
||||
self
|
||||
}
|
||||
|
||||
|
||||
@@ -475,7 +475,7 @@ impl<F> Embeddings<F> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Append a flat vector of embeddings a the end of the embeddings.
|
||||
/// Append a flat vector of embeddings at the end of the embeddings.
|
||||
///
|
||||
/// If `embeddings.len() % self.dimension != 0`, then the append operation fails.
|
||||
pub fn append(&mut self, mut embeddings: Vec<F>) -> Result<(), Vec<F>> {
|
||||
|
||||
@@ -64,6 +64,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -101,6 +101,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -333,6 +333,7 @@ fn criteria_ascdesc() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -142,6 +142,7 @@ fn test_typo_disabled_on_word() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
||||
@@ -82,6 +82,10 @@ pub struct BenchDeriveArgs {
|
||||
/// Reason for the benchmark invocation
|
||||
#[arg(short, long)]
|
||||
reason: Option<String>,
|
||||
|
||||
/// The maximum time in seconds we allow for fetching the task queue before timing out.
|
||||
#[arg(long, default_value_t = 60)]
|
||||
tasks_queue_timeout_secs: u64,
|
||||
}
|
||||
|
||||
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
@@ -127,7 +131,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
let meili_client = Client::new(
|
||||
Some("http://127.0.0.1:7700".into()),
|
||||
args.master_key.as_deref(),
|
||||
Some(std::time::Duration::from_secs(60)),
|
||||
Some(std::time::Duration::from_secs(args.tasks_queue_timeout_secs)),
|
||||
)?;
|
||||
|
||||
// enter runtime
|
||||
|
||||
@@ -16,6 +16,7 @@ struct ListFeaturesDeriveArgs {
|
||||
#[command(author, version, about, long_about)]
|
||||
#[command(name = "cargo xtask")]
|
||||
#[command(bin_name = "cargo xtask")]
|
||||
#[allow(clippy::large_enum_variant)] // please, that's enough...
|
||||
enum Command {
|
||||
ListFeatures(ListFeaturesDeriveArgs),
|
||||
Bench(BenchDeriveArgs),
|
||||
|
||||
105
workloads/hackernews-add-new-documents.json
Normal file
105
workloads/hackernews-add-new-documents.json
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"name": "hackernews.add_new_documents",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-05.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
111
workloads/hackernews-modify-facet-numbers.json
Normal file
111
workloads/hackernews-modify-facet-numbers.json
Normal file
@@ -0,0 +1,111 @@
|
||||
{
|
||||
"name": "hackernews.modify_facet_numbers",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-02-modified-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
|
||||
"sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02-modified-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
111
workloads/hackernews-modify-facet-strings.json
Normal file
111
workloads/hackernews-modify-facet-strings.json
Normal file
@@ -0,0 +1,111 @@
|
||||
{
|
||||
"name": "hackernews.modify_facet_strings",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-01-modified-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
|
||||
"sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01-modified-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
123
workloads/hackernews-modify-searchables.json
Normal file
123
workloads/hackernews-modify-searchables.json
Normal file
@@ -0,0 +1,123 @@
|
||||
{
|
||||
"name": "hackernews.modify_searchables",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-01-modified-searchables.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-searchables.ndjson",
|
||||
"sha256": "e5c08710c6af70031ac7212e0ba242c72ef29c8d4e1fce66c789544641452a7c"
|
||||
},
|
||||
"hackernews-02-modified-searchables.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-searchables.ndjson",
|
||||
"sha256": "098b029851117087b1e26ccb7ac408eda9bba54c3008213a2880d6fab607346e"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01-modified-searchables.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02-modified-searchables.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user