mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-22 06:11:01 +00:00
Compare commits
93 Commits
v1.12.0-rc
...
more-effic
Author | SHA1 | Date | |
---|---|---|---|
1cf14e765f | |||
4a082683df | |||
26be5e0733 | |||
bd5110a2fe | |||
fa8b9acdf6 | |||
2b74d1824b | |||
c77b00d3ac | |||
c77073efcc | |||
1537323eb9 | |||
a0a3b55700 | |||
214b51de87 | |||
95975944d7 | |||
9a9383643f | |||
cac355bfa7 | |||
9020a50df8 | |||
52843123d4 | |||
6298db5bea | |||
a003a0934a | |||
3a11e39c01 | |||
5f896b1050 | |||
d0c4e6da6b | |||
2da5584bb5 | |||
b7eb802ae6 | |||
2e32d0474c | |||
cb99ac6f7e | |||
be411435f5 | |||
29ef164530 | |||
739c52a3cd | |||
7a2af06b1e | |||
cb0c3a5aad | |||
8388698993 | |||
cbcf6c9ba3 | |||
bf742d81cf | |||
7458f0386c | |||
fc1df5793c | |||
3ded069042 | |||
261d2ceb06 | |||
1a17e2e572 | |||
5b8cd68abe | |||
5ce9acb0b9 | |||
953a82ca04 | |||
54341c2e80 | |||
96831ed9bb | |||
0459b1a242 | |||
8ecb726683 | |||
297e72e262 | |||
0ad2f57a92 | |||
71d53f413f | |||
054622bd16 | |||
e905a72d73 | |||
2e879c1df8 | |||
d040aff101 | |||
5e30731cad | |||
beeb31ce41 | |||
057143214d | |||
6a1d26a60c | |||
d78f4666a0 | |||
a439fa3e1a | |||
767259be7e | |||
e9f34fb4b1 | |||
d5c07ef7b3 | |||
5e218f3f4d | |||
bcab61ab1d | |||
263c5a348e | |||
be7d2fbe63 | |||
f7f9a131e4 | |||
5df5eb2db2 | |||
30eb0e5b5b | |||
5b860cb989 | |||
76d0623b11 | |||
db4eaf4d2d | |||
13f21206a6 | |||
14ee7aa84c | |||
8a35cd1743 | |||
3c7ac093d3 | |||
b57dd5c58e | |||
096a28656e | |||
cc4bd54669 | |||
58eab9a018 | |||
5c488e20cc | |||
da650f834e | |||
e83534a430 | |||
98d4a2909e | |||
a514ce472a | |||
cc63802115 | |||
acec45ad7c | |||
08d6413365 | |||
70802eb7c7 | |||
6ac5b3b136 | |||
e1e76f39d0 | |||
2094ce8a9a | |||
8442db8101 | |||
79671c9faa |
39
Cargo.lock
generated
39
Cargo.lock
generated
@ -489,6 +489,11 @@ version = "0.22.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
||||
|
||||
[[package]]
|
||||
name = "bbqueue"
|
||||
version = "0.5.1"
|
||||
source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.12.0"
|
||||
@ -1246,19 +1251,6 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-queue",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.13"
|
||||
@ -1918,6 +1910,15 @@ dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flume"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
|
||||
dependencies = [
|
||||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@ -2616,7 +2617,7 @@ dependencies = [
|
||||
"big_s",
|
||||
"bincode",
|
||||
"bumpalo",
|
||||
"crossbeam",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"derive_builder 0.20.0",
|
||||
"dump",
|
||||
@ -3611,6 +3612,7 @@ version = "1.12.0"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bbqueue",
|
||||
"big_s",
|
||||
"bimap",
|
||||
"bincode",
|
||||
@ -3630,6 +3632,7 @@ dependencies = [
|
||||
"enum-iterator",
|
||||
"filter-parser",
|
||||
"flatten-serde-json",
|
||||
"flume",
|
||||
"fst",
|
||||
"fxhash",
|
||||
"geoutils",
|
||||
@ -4743,8 +4746,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.6"
|
||||
source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#8ff028e484fb6192a0acf5a669eaf18c30cada6e"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@ -5186,6 +5190,9 @@ name = "spin"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spm_precompiled"
|
||||
|
@ -43,6 +43,3 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[patch.crates-io]
|
||||
roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
|
||||
|
@ -24,7 +24,7 @@ tempfile = "3.14.0"
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.6"
|
||||
roaring = "0.10.7"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
|
@ -16,6 +16,7 @@ use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@ -157,6 +158,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -223,6 +225,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -267,6 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -335,6 +339,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -411,6 +416,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -455,6 +461,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -495,6 +502,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -562,6 +570,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -628,6 +637,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -694,6 +704,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -759,6 +770,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -803,6 +815,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -870,6 +883,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -946,6 +960,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -991,6 +1006,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1032,6 +1048,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1098,6 +1115,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1163,6 +1181,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1207,6 +1226,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1274,6 +1294,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1321,6 +1342,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1385,6 +1407,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1429,6 +1452,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1469,6 +1493,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1558,6 +1583,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1648,6 +1674,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1730,6 +1757,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1796,6 +1824,7 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1861,6 +1890,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1905,6 +1935,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1972,6 +2003,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
@ -117,6 +117,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -17,7 +17,7 @@ http = "1.1.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
|
@ -135,6 +135,7 @@ fn main() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -24,7 +24,7 @@ meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.6.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
@ -45,7 +45,7 @@ bumpalo = "3.16.0"
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
crossbeam-channel = "0.5.13"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
@ -115,13 +115,6 @@ pub enum BatchKind {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
SettingsAndDocumentOperation {
|
||||
settings_ids: Vec<TaskId>,
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
@ -146,7 +139,6 @@ impl BatchKind {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
@ -154,10 +146,7 @@ impl BatchKind {
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. }
|
||||
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
|
||||
Some(primary_key.as_deref())
|
||||
}
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@ -275,8 +264,7 @@ impl BatchKind {
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
|
||||
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
@ -356,15 +344,9 @@ impl BatchKind {
|
||||
) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids: vec![id],
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
}),
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
@ -477,63 +459,7 @@ impl BatchKind {
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
other: operation_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
|
||||
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method: ReplaceDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk2,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method: UpdateDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk2,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// But we can't batch a settings and a doc op with another doc op
|
||||
// this MUST be AFTER the two previous branch
|
||||
(
|
||||
this @ BatchKind::SettingsAndDocumentOperation { .. },
|
||||
K::DocumentDeletion { .. } | K::DocumentImport { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
@ -808,30 +734,30 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_batch_with_settings() {
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// We ensure this kind of batch doesn't batch with forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -859,8 +785,8 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -907,50 +833,6 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
// Then the mixed cases.
|
||||
// The index already exists, whatever is the right of the tasks it shouldn't change the result.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
|
||||
// When the index doesn't exists yet it's more complicated.
|
||||
// Either the first task we encounter create it, in which case we can create a big batch with everything.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
// The right of the tasks following isn't really important.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
// Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index.
|
||||
// that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc.
|
||||
// All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whit what
|
||||
// follows because we first need to process the erronous batch.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -959,13 +841,13 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
|
@ -104,7 +104,6 @@ pub(crate) enum IndexOperation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
documents_counts: Vec<u64>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
@ -130,19 +129,6 @@ pub(crate) enum IndexOperation {
|
||||
index_uid: String,
|
||||
cleared_tasks: Vec<Task>,
|
||||
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
settings_tasks: Vec<Task>,
|
||||
},
|
||||
SettingsAndDocumentOperation {
|
||||
index_uid: String,
|
||||
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
documents_counts: Vec<u64>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
document_import_tasks: Vec<Task>,
|
||||
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
settings_tasks: Vec<Task>,
|
||||
@ -174,12 +160,7 @@ impl Batch {
|
||||
IndexOperation::DocumentEdition { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation {
|
||||
document_import_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
}
|
||||
| IndexOperation::DocumentClearAndSetting {
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
cleared_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
@ -239,8 +220,7 @@ impl IndexOperation {
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
|
||||
| IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -262,9 +242,6 @@ impl fmt::Display for IndexOperation {
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::SettingsAndDocumentOperation")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -330,21 +307,14 @@ impl IndexScheduler {
|
||||
})
|
||||
.flatten();
|
||||
|
||||
let mut documents_counts = Vec::new();
|
||||
let mut operations = Vec::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
content_file,
|
||||
documents_count,
|
||||
..
|
||||
} => {
|
||||
documents_counts.push(documents_count);
|
||||
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => {
|
||||
operations.push(DocumentOperation::Add(content_file));
|
||||
}
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
documents_counts.push(documents_ids.len() as u64);
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@ -356,7 +326,6 @@ impl IndexScheduler {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
@ -441,67 +410,6 @@ impl IndexScheduler {
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
} => {
|
||||
let settings = self.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid.clone(),
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?;
|
||||
|
||||
let document_import = self.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid.clone(),
|
||||
BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
},
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?;
|
||||
|
||||
match (document_import, settings) {
|
||||
(
|
||||
Some(Batch::IndexOperation {
|
||||
op:
|
||||
IndexOperation::DocumentOperation {
|
||||
primary_key,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks: document_import_tasks,
|
||||
..
|
||||
},
|
||||
..
|
||||
}),
|
||||
Some(Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { settings, tasks: settings_tasks, .. },
|
||||
..
|
||||
}),
|
||||
) => Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::SettingsAndDocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
document_import_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
},
|
||||
must_create_index,
|
||||
})),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
BatchKind::IndexCreation { id } => {
|
||||
let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
@ -589,7 +497,6 @@ impl IndexScheduler {
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
@ -599,6 +506,7 @@ impl IndexScheduler {
|
||||
index_name
|
||||
} else {
|
||||
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
|
||||
};
|
||||
|
||||
@ -1304,7 +1212,6 @@ impl IndexScheduler {
|
||||
index_uid: _,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts: _,
|
||||
operations,
|
||||
mut tasks,
|
||||
} => {
|
||||
@ -1351,7 +1258,10 @@ impl IndexScheduler {
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
@ -1399,21 +1309,19 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
if tasks.iter().any(|res| res.error.is_none()) {
|
||||
pool.install(|| {
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
@ -1489,34 +1397,34 @@ impl IndexScheduler {
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
pool.install(|| {
|
||||
let indexer =
|
||||
UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||
let document_changes = indexer.into_changes(&primary_key)?;
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||
let document_changes =
|
||||
pool.install(|| indexer.into_changes(&primary_key)).unwrap()?;
|
||||
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // cannot change primary key in DocumentEdition
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
|
||||
Result::Ok(())
|
||||
})
|
||||
.unwrap()?;
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // cannot change primary key in DocumentEdition
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
|
||||
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
@ -1641,7 +1549,10 @@ impl IndexScheduler {
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
@ -1652,21 +1563,19 @@ impl IndexScheduler {
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
|
||||
pool.install(|| {
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
|
||||
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
@ -1694,43 +1603,6 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
document_import_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
} => {
|
||||
let settings_tasks = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::Settings {
|
||||
index_uid: index_uid.clone(),
|
||||
settings,
|
||||
tasks: settings_tasks,
|
||||
},
|
||||
)?;
|
||||
|
||||
let mut import_tasks = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks: document_import_tasks,
|
||||
},
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
tasks.append(&mut import_tasks);
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
cleared_tasks,
|
||||
|
@ -407,7 +407,7 @@ pub struct IndexScheduler {
|
||||
///
|
||||
/// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation.
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
|
||||
test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
|
||||
|
||||
/// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler.
|
||||
///
|
||||
@ -476,7 +476,7 @@ impl IndexScheduler {
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub fn new(
|
||||
options: IndexSchedulerOptions,
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
|
||||
#[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>,
|
||||
) -> Result<Self> {
|
||||
std::fs::create_dir_all(&options.tasks_path)?;
|
||||
@ -1440,7 +1440,7 @@ impl IndexScheduler {
|
||||
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
}
|
||||
@ -1738,11 +1738,8 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
// We must re-add the canceled task so they're part of the same batch.
|
||||
// processed.processing |= canceled;
|
||||
ids |= canceled;
|
||||
|
||||
self.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
|
||||
#[cfg(test)]
|
||||
@ -1750,6 +1747,10 @@ impl IndexScheduler {
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
@ -2237,7 +2238,7 @@ mod tests {
|
||||
use std::time::Instant;
|
||||
|
||||
use big_s::S;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use crossbeam_channel::RecvTimeoutError;
|
||||
use file_store::File;
|
||||
use insta::assert_json_snapshot;
|
||||
use maplit::btreeset;
|
||||
@ -2289,7 +2290,7 @@ mod tests {
|
||||
configuration: impl Fn(&mut IndexSchedulerOptions),
|
||||
) -> (Self, IndexSchedulerHandle) {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let (sender, receiver) = crossbeam::channel::bounded(0);
|
||||
let (sender, receiver) = crossbeam_channel::bounded(0);
|
||||
|
||||
let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
|
||||
|
||||
@ -2421,7 +2422,7 @@ mod tests {
|
||||
pub struct IndexSchedulerHandle {
|
||||
_tempdir: TempDir,
|
||||
index_scheduler: IndexScheduler,
|
||||
test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
|
||||
test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>,
|
||||
last_breakpoint: Breakpoint,
|
||||
}
|
||||
|
||||
@ -4318,10 +4319,35 @@ mod tests {
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default())
|
||||
let (mut batches, _) = index_scheduler
|
||||
.get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // only the processing batch in the first tick
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"dumpCreation":1},"indexUids":{}}, }
|
||||
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[3,]
|
||||
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
@ -67,7 +67,7 @@ impl ProcessingBatch {
|
||||
task.batch_uid = Some(self.uid);
|
||||
// We don't store the statuses in the map since they're all enqueued but we must
|
||||
// still store them in the stats since that can be displayed.
|
||||
*self.stats.status.entry(task.status).or_default() += 1;
|
||||
*self.stats.status.entry(Status::Processing).or_default() += 1;
|
||||
|
||||
self.kinds.insert(task.kind.as_kind());
|
||||
*self.stats.types.entry(task.kind.as_kind()).or_default() += 1;
|
||||
@ -106,7 +106,7 @@ impl ProcessingBatch {
|
||||
self.stats.total_nb_tasks = 0;
|
||||
}
|
||||
|
||||
/// Update the timestamp of the tasks and the inner structure of this sturcture.
|
||||
/// Update the timestamp of the tasks and the inner structure of this structure.
|
||||
pub fn update(&mut self, task: &mut Task) {
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state
|
||||
|
@ -17,7 +17,7 @@ hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
|
@ -25,7 +25,7 @@ fst = "0.4.7"
|
||||
memmap2 = "0.9.4"
|
||||
milli = { path = "../milli" }
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.120"
|
||||
|
@ -214,7 +214,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
|
||||
// We memory map to be able to deserialize into a RawMap that
|
||||
// does not allocate when possible and only materialize the first/top level.
|
||||
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
|
||||
let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB
|
||||
let mut doc_alloc = Bump::with_capacity(1024 * 1024); // 1MiB
|
||||
|
||||
let mut out = BufWriter::new(output);
|
||||
let mut deserializer = serde_json::Deserializer::from_slice(&input);
|
||||
|
@ -279,6 +279,7 @@ InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
|
@ -103,7 +103,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.11"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.2"
|
||||
roaring = "0.10.7"
|
||||
mopa-maintained = "0.2.3"
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -20,14 +20,14 @@ use meilisearch::{
|
||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use mimalloc::MiMalloc;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: MiMalloc = MiMalloc;
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn default_log_route_layer() -> LogRouteType {
|
||||
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
|
||||
|
@ -654,8 +654,9 @@ impl Opt {
|
||||
|
||||
#[derive(Debug, Default, Clone, Parser, Deserialize)]
|
||||
pub struct IndexerOpts {
|
||||
/// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
|
||||
/// uses no more than two thirds of available memory.
|
||||
/// Specifies the maximum resident memory that Meilisearch can use for indexing.
|
||||
/// By default, Meilisearch limits the RAM usage to 5% of the total available memory.
|
||||
/// Note that the underlying store utilizes memory-mapping and makes use of the rest.
|
||||
#[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
|
||||
#[serde(default)]
|
||||
pub max_indexing_memory: MaxMemory,
|
||||
@ -714,7 +715,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// A type used to detect the max memory available and use 2/3 of it.
|
||||
/// A type used to detect the max resident memory available and use 5% of it.
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxMemory(Option<Byte>);
|
||||
|
||||
@ -728,7 +729,7 @@ impl FromStr for MaxMemory {
|
||||
|
||||
impl Default for MaxMemory {
|
||||
fn default() -> MaxMemory {
|
||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
|
||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 5 / 100).map(Byte::from_u64))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1407,6 +1407,13 @@ pub fn perform_facet_search(
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
if !index.facet_search(&rtxn)? {
|
||||
return Err(ResponseError::from_msg(
|
||||
"The facet search is disabled for this index".to_string(),
|
||||
Code::FacetSearchDisabled,
|
||||
));
|
||||
}
|
||||
|
||||
// In the faceted search context, we want to use the intersection between the locales provided by the user
|
||||
// and the locales of the facet string.
|
||||
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
|
||||
|
@ -224,7 +224,7 @@ async fn list_batches_status_and_type_filtered() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_batch_filter_error() {
|
||||
async fn list_batch_filter_error() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.batches_filter("lol=pied").await;
|
||||
|
@ -52,6 +52,25 @@ impl Value {
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Return `true` if the `status` field is set to `failed`.
|
||||
/// Panic if the `status` field doesn't exists.
|
||||
#[track_caller]
|
||||
pub fn is_fail(&self) -> bool {
|
||||
if !self["status"].is_string() {
|
||||
panic!("Called `is_fail` on {}", serde_json::to_string_pretty(&self.0).unwrap());
|
||||
}
|
||||
self["status"] == serde_json::Value::String(String::from("failed"))
|
||||
}
|
||||
|
||||
// Panic if the json doesn't contain the `status` field set to "succeeded"
|
||||
#[track_caller]
|
||||
pub fn failed(&self) -> &Self {
|
||||
if !self.is_fail() {
|
||||
panic!("Called failed on {}", serde_json::to_string_pretty(&self.0).unwrap());
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Value> for Value {
|
||||
|
@ -221,8 +221,15 @@ async fn add_documents_and_deactivate_facet_search() {
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The facet search is disabled for this index",
|
||||
"code": "facet_search_disabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -245,8 +252,15 @@ async fn deactivate_facet_search_and_add_documents() {
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The facet search is disabled for this index",
|
||||
"code": "facet_search_disabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -129,11 +129,11 @@ async fn perform_on_demand_snapshot() {
|
||||
|
||||
index.load_test_set().await;
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(task.uid()).await.failed();
|
||||
|
||||
let (task, code) = server.create_snapshot().await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
@ -42,7 +42,7 @@ obkv = "0.3.0"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.1"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
rstar = { version = "0.12.0", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
|
||||
@ -98,6 +98,8 @@ allocator-api2 = "0.2.18"
|
||||
rustc-hash = "2.0.0"
|
||||
uell = "0.1.0"
|
||||
enum-iterator = "2.1.0"
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
|
@ -3,6 +3,7 @@ use std::convert::Infallible;
|
||||
use std::fmt::Write;
|
||||
use std::{io, str};
|
||||
|
||||
use bstr::BString;
|
||||
use heed::{Error as HeedError, MdbError};
|
||||
use rayon::ThreadPoolBuildError;
|
||||
use rhai::EvalAltResult;
|
||||
@ -62,9 +63,9 @@ pub enum InternalError {
|
||||
#[error(transparent)]
|
||||
Store(#[from] MdbError),
|
||||
#[error("Cannot delete {key:?} from database {database_name}: {error}")]
|
||||
StoreDeletion { database_name: &'static str, key: Vec<u8>, error: heed::Error },
|
||||
StoreDeletion { database_name: &'static str, key: BString, error: heed::Error },
|
||||
#[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
|
||||
StorePut { database_name: &'static str, key: Vec<u8>, value_length: usize, error: heed::Error },
|
||||
StorePut { database_name: &'static str, key: BString, value_length: usize, error: heed::Error },
|
||||
#[error(transparent)]
|
||||
Utf8(#[from] str::Utf8Error),
|
||||
#[error("An indexation process was explicitly aborted")]
|
||||
|
@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![value.size];
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
|
@ -27,18 +27,27 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
|
||||
pub fn serialize_into_vec(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
|
||||
Self::serialize_into_writer(roaring, vec).unwrap()
|
||||
}
|
||||
|
||||
pub fn serialize_into_writer<W: io::Write>(
|
||||
roaring: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
) -> io::Result<()> {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
for integer in roaring {
|
||||
vec.write_u32::<NativeEndian>(integer).unwrap();
|
||||
writer.write_u32::<NativeEndian>(integer)?;
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
|
||||
roaring.serialize_into(vec).unwrap();
|
||||
roaring.serialize_into(writer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
@ -143,7 +152,7 @@ impl CboRoaringBitmapCodec {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into(&previous, buffer);
|
||||
Self::serialize_into_vec(&previous, buffer);
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
@ -169,7 +178,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec);
|
||||
Self::serialize_into_vec(item, &mut vec);
|
||||
Ok(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
@ -1821,6 +1821,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1911,6 +1912,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -1991,6 +1993,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -1,6 +1,7 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[cfg(test)]
|
||||
#[global_allocator]
|
||||
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
@ -83,6 +83,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -2155,6 +2155,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2216,6 +2217,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2268,6 +2270,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2319,6 +2322,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2372,6 +2376,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2430,6 +2435,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2481,6 +2487,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2532,6 +2539,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2725,6 +2733,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2783,6 +2792,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -2838,6 +2848,7 @@ mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,6 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use either::Either;
|
||||
use heed::RoTxn;
|
||||
use raw_collections::RawMap;
|
||||
use serde_json::value::RawValue;
|
||||
@ -209,29 +210,34 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
|
||||
for MergedDocument<'d, 'doc, 't, Mapper>
|
||||
{
|
||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'d str, &'d RawValue)>> {
|
||||
let mut new_doc_it = self.new_doc.iter_top_level_fields();
|
||||
let mut db_it = self.db.iter().flat_map(|db| db.iter_top_level_fields());
|
||||
let mut seen_fields = BTreeSet::new();
|
||||
match &self.db {
|
||||
Some(db) => {
|
||||
let mut new_doc_it = self.new_doc.iter_top_level_fields();
|
||||
let mut db_it = db.iter_top_level_fields();
|
||||
let mut seen_fields = BTreeSet::new();
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
if let Some(next) = new_doc_it.next() {
|
||||
if let Ok((name, _)) = next {
|
||||
seen_fields.insert(name);
|
||||
}
|
||||
return Some(next);
|
||||
}
|
||||
loop {
|
||||
match db_it.next()? {
|
||||
Ok((name, value)) => {
|
||||
if seen_fields.contains(name) {
|
||||
continue;
|
||||
Either::Left(std::iter::from_fn(move || {
|
||||
if let Some(next) = new_doc_it.next() {
|
||||
if let Ok((name, _)) = next {
|
||||
seen_fields.insert(name);
|
||||
}
|
||||
return Some(Ok((name, value)));
|
||||
return Some(next);
|
||||
}
|
||||
Err(err) => return Some(Err(err)),
|
||||
}
|
||||
loop {
|
||||
match db_it.next()? {
|
||||
Ok((name, value)) => {
|
||||
if seen_fields.contains(name) {
|
||||
continue;
|
||||
}
|
||||
return Some(Ok((name, value)));
|
||||
}
|
||||
Err(err) => return Some(Err(err)),
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
})
|
||||
None => Either::Right(self.new_doc.iter_top_level_fields()),
|
||||
}
|
||||
}
|
||||
|
||||
fn vectors_field(&self) -> Result<Option<&'d RawValue>> {
|
||||
|
@ -1,7 +1,10 @@
|
||||
use bumpalo::Bump;
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
|
||||
use super::document::{
|
||||
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
||||
};
|
||||
use super::extract::perm_json_p;
|
||||
use super::vector_document::{
|
||||
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
||||
};
|
||||
@ -164,6 +167,80 @@ impl<'doc> Update<'doc> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
|
||||
///
|
||||
/// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
|
||||
/// Otherwise `false`.
|
||||
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
|
||||
&self,
|
||||
fields: Option<&[&str]>,
|
||||
rtxn: &'t RoTxn,
|
||||
index: &'t Index,
|
||||
mapper: &'t Mapper,
|
||||
) -> Result<bool> {
|
||||
let mut changed = false;
|
||||
let mut cached_current = None;
|
||||
let mut updated_selected_field_count = 0;
|
||||
|
||||
for entry in self.updated().iter_top_level_fields() {
|
||||
let (key, updated_value) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
continue;
|
||||
}
|
||||
|
||||
updated_selected_field_count += 1;
|
||||
let current = match cached_current {
|
||||
Some(current) => current,
|
||||
None => self.current(rtxn, index, mapper)?,
|
||||
};
|
||||
let current_value = current.top_level_field(key)?;
|
||||
let Some(current_value) = current_value else {
|
||||
changed = true;
|
||||
break;
|
||||
};
|
||||
|
||||
if current_value.get() != updated_value.get() {
|
||||
changed = true;
|
||||
break;
|
||||
}
|
||||
cached_current = Some(current);
|
||||
}
|
||||
|
||||
if !self.has_deletion {
|
||||
// no field deletion, so fields that don't appear in `updated` cannot have changed
|
||||
return Ok(changed);
|
||||
}
|
||||
|
||||
if changed {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// we saw all updated fields, and set `changed` if any field wasn't in `current`.
|
||||
// so if there are as many fields in `current` as in `updated`, then nothing changed.
|
||||
// If there is any more fields in `current`, then they are missing in `updated`.
|
||||
let has_deleted_fields = {
|
||||
let current = match cached_current {
|
||||
Some(current) => current,
|
||||
None => self.current(rtxn, index, mapper)?,
|
||||
};
|
||||
|
||||
let mut current_selected_field_count = 0;
|
||||
for entry in current.iter_top_level_fields() {
|
||||
let (key, _) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
continue;
|
||||
}
|
||||
current_selected_field_count += 1;
|
||||
}
|
||||
|
||||
current_selected_field_count != updated_selected_field_count
|
||||
};
|
||||
|
||||
Ok(has_deleted_fields)
|
||||
}
|
||||
|
||||
pub fn updated_vectors(
|
||||
&self,
|
||||
doc_alloc: &'doc Bump,
|
||||
|
@ -415,21 +415,21 @@ fn spill_entry_to_sorter(
|
||||
match deladd {
|
||||
DelAddRoaringBitmap { del: Some(del), add: None } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
||||
@ -466,12 +466,13 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
|
||||
Ok(bucket_caches)
|
||||
}
|
||||
|
||||
/// Merges the caches that must be all associated to the same bucket.
|
||||
/// Merges the caches that must be all associated to the same bucket
|
||||
/// but make sure to sort the different buckets before performing the merges.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If the bucket IDs in these frozen caches are not exactly the same.
|
||||
pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||
pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||
where
|
||||
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
||||
{
|
||||
@ -543,12 +544,12 @@ where
|
||||
|
||||
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
||||
while let Some(mut map) = maps.pop() {
|
||||
for (key, bbbul) in map.iter_mut() {
|
||||
// Make sure we don't try to work with entries already managed by the spilled
|
||||
if bbbul.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Make sure we don't try to work with entries already managed by the spilled
|
||||
let mut ordered_entries: Vec<_> =
|
||||
map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
|
||||
ordered_entries.sort_unstable_by_key(|(key, _)| *key);
|
||||
|
||||
for (key, bbbul) in ordered_entries {
|
||||
let mut output = DelAddRoaringBitmap::empty();
|
||||
output.union_and_clear_bbbul(bbbul);
|
||||
|
||||
|
@ -12,13 +12,14 @@ use crate::update::new::thread_local::FullySend;
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::Result;
|
||||
pub struct DocumentsExtractor<'a> {
|
||||
document_sender: &'a DocumentsSender<'a>,
|
||||
|
||||
pub struct DocumentsExtractor<'a, 'b> {
|
||||
document_sender: DocumentsSender<'a, 'b>,
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
}
|
||||
|
||||
impl<'a> DocumentsExtractor<'a> {
|
||||
pub fn new(document_sender: &'a DocumentsSender<'a>, embedders: &'a EmbeddingConfigs) -> Self {
|
||||
impl<'a, 'b> DocumentsExtractor<'a, 'b> {
|
||||
pub fn new(document_sender: DocumentsSender<'a, 'b>, embedders: &'a EmbeddingConfigs) -> Self {
|
||||
Self { document_sender, embedders }
|
||||
}
|
||||
}
|
||||
@ -29,7 +30,7 @@ pub struct DocumentExtractorData {
|
||||
pub field_distribution_delta: HashMap<String, i64>,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
|
||||
type Data = FullySend<RefCell<DocumentExtractorData>>;
|
||||
|
||||
fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
|
@ -25,14 +25,14 @@ use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
pub struct FacetedExtractorData<'a> {
|
||||
pub struct FacetedExtractorData<'a, 'b> {
|
||||
attributes_to_extract: &'a [&'a str],
|
||||
sender: &'a FieldIdDocidFacetSender<'a>,
|
||||
sender: &'a FieldIdDocidFacetSender<'a, 'b>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
buckets: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
|
||||
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||
|
||||
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
@ -97,6 +97,15 @@ impl FacetedDocidsExtractor {
|
||||
},
|
||||
),
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
Some(attributes_to_extract),
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
@ -318,7 +327,7 @@ impl<'doc> DelAddFacetValue<'doc> {
|
||||
docid: DocumentId,
|
||||
sender: &FieldIdDocidFacetSender,
|
||||
doc_alloc: &Bump,
|
||||
) -> std::result::Result<(), crossbeam_channel::SendError<()>> {
|
||||
) -> crate::Result<()> {
|
||||
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
for ((fid, value), deladd) in self.strings {
|
||||
if let Ok(s) = std::str::from_utf8(&value) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::cell::RefCell;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
|
||||
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
|
||||
use std::{iter, mem, result};
|
||||
|
||||
use bumpalo::Bump;
|
||||
@ -97,30 +97,34 @@ pub struct FrozenGeoExtractorData<'extractor> {
|
||||
impl<'extractor> FrozenGeoExtractorData<'extractor> {
|
||||
pub fn iter_and_clear_removed(
|
||||
&mut self,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
mem::take(&mut self.removed)
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
Ok(mem::take(&mut self.removed)
|
||||
.iter()
|
||||
.copied()
|
||||
.map(Ok)
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_removed))
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_removed)?))
|
||||
}
|
||||
|
||||
pub fn iter_and_clear_inserted(
|
||||
&mut self,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
mem::take(&mut self.inserted)
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
Ok(mem::take(&mut self.inserted)
|
||||
.iter()
|
||||
.copied()
|
||||
.map(Ok)
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted))
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted)?))
|
||||
}
|
||||
}
|
||||
|
||||
fn iterator_over_spilled_geopoints(
|
||||
spilled: &mut Option<BufReader<File>>,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
let mut spilled = spilled.take();
|
||||
iter::from_fn(move || match &mut spilled {
|
||||
if let Some(spilled) = &mut spilled {
|
||||
spilled.rewind()?;
|
||||
}
|
||||
|
||||
Ok(iter::from_fn(move || match &mut spilled {
|
||||
Some(file) => {
|
||||
let geopoint_bytes = &mut [0u8; mem::size_of::<ExtractedGeoPoint>()];
|
||||
match file.read_exact(geopoint_bytes) {
|
||||
@ -130,7 +134,7 @@ fn iterator_over_spilled_geopoints(
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
||||
impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
@ -157,7 +161,9 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
let mut data_ref = context.data.borrow_mut_or_yield();
|
||||
|
||||
for change in changes {
|
||||
if max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm) {
|
||||
if data_ref.spilled_removed.is_none()
|
||||
&& max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm)
|
||||
{
|
||||
// We must spill as we allocated too much memory
|
||||
data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||
data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||
|
@ -6,7 +6,9 @@ mod searchable;
|
||||
mod vectors;
|
||||
|
||||
use bumpalo::Bump;
|
||||
pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap};
|
||||
pub use cache::{
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
};
|
||||
pub use documents::*;
|
||||
pub use faceted::*;
|
||||
pub use geo::*;
|
||||
|
@ -351,6 +351,15 @@ impl WordDocidsExtractors {
|
||||
)?;
|
||||
}
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
document_tokenizer.attribute_to_extract,
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
context.db_fields_ids_map,
|
||||
)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
||||
cached_sorter.insert_del_u32(
|
||||
fid,
|
||||
|
@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
)?;
|
||||
}
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
document_tokenizer.attribute_to_extract,
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
|
||||
process_document_tokens(
|
||||
document,
|
||||
|
@ -18,17 +18,17 @@ use crate::vector::error::{
|
||||
use crate::vector::{Embedder, Embedding, EmbeddingConfigs};
|
||||
use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
|
||||
|
||||
pub struct EmbeddingExtractor<'a> {
|
||||
pub struct EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
}
|
||||
|
||||
impl<'a> EmbeddingExtractor<'a> {
|
||||
impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||
pub fn new(
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
field_distribution: &'a FieldDistribution,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
) -> Self {
|
||||
@ -43,7 +43,7 @@ pub struct EmbeddingExtractorData<'extractor>(
|
||||
|
||||
unsafe impl MostlySend for EmbeddingExtractorData<'_> {}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
||||
type Data = RefCell<EmbeddingExtractorData<'extractor>>;
|
||||
|
||||
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||
@ -259,7 +259,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
// Currently this is the case as:
|
||||
// 1. BVec are inside of the bumaplo
|
||||
// 2. All other fields are either trivial (u8) or references.
|
||||
struct Chunks<'a, 'extractor> {
|
||||
struct Chunks<'a, 'b, 'extractor> {
|
||||
texts: BVec<'a, &'a str>,
|
||||
ids: BVec<'a, DocumentId>,
|
||||
|
||||
@ -270,11 +270,11 @@ struct Chunks<'a, 'extractor> {
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
has_manual_generation: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
embedder: &'a Embedder,
|
||||
@ -284,7 +284,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
doc_alloc: &'a Bump,
|
||||
) -> Self {
|
||||
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
|
||||
@ -368,7 +368,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
sender: &EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
has_manual_generation: Option<&'a str>,
|
||||
) -> Result<()> {
|
||||
if let Some(external_docid) = has_manual_generation {
|
||||
|
@ -70,7 +70,7 @@ impl<
|
||||
F: FnOnce(&'extractor Bump) -> Result<T>,
|
||||
{
|
||||
let doc_alloc =
|
||||
doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024))));
|
||||
doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024))));
|
||||
let doc_alloc = doc_alloc.0.take();
|
||||
let fields_ids_map = fields_ids_map_store
|
||||
.get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into());
|
||||
|
@ -62,6 +62,7 @@ mod update_by_function;
|
||||
pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &'index Index,
|
||||
pool: &ThreadPoolNoAbort,
|
||||
grenad_parameters: GrenadParameters,
|
||||
db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||
new_fields_ids_map: FieldsIdsMap,
|
||||
@ -76,9 +77,30 @@ where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
SP: Fn(Progress) + Sync,
|
||||
{
|
||||
let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000);
|
||||
let mut bbbuffers = Vec::new();
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
|
||||
// We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
|
||||
let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
|
||||
let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
|
||||
(grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
|
||||
|max_memory| {
|
||||
// 2% of the indexing memory
|
||||
let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
|
||||
let new_grenad_parameters = GrenadParameters {
|
||||
max_memory: Some(
|
||||
max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
|
||||
),
|
||||
..grenad_parameters
|
||||
};
|
||||
(new_grenad_parameters, total_bbbuffer_capacity)
|
||||
},
|
||||
);
|
||||
|
||||
let (extractor_sender, mut writer_receiver) = pool
|
||||
.install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
|
||||
.unwrap();
|
||||
|
||||
let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
|
||||
let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
|
||||
let new_fields_ids_map = RwLock::new(new_fields_ids_map);
|
||||
@ -96,6 +118,7 @@ where
|
||||
send_progress,
|
||||
};
|
||||
|
||||
let mut index_embeddings = index.embedding_configs(wtxn)?;
|
||||
let mut field_distribution = index.field_distribution(wtxn)?;
|
||||
let mut document_ids = index.documents_ids(wtxn)?;
|
||||
|
||||
@ -107,261 +130,261 @@ where
|
||||
let field_distribution = &mut field_distribution;
|
||||
let document_ids = &mut document_ids;
|
||||
let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
|
||||
let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
|
||||
let _entered = span.enter();
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
|
||||
// document but we need to create a function that collects and compresses documents.
|
||||
let document_sender = extractor_sender.documents();
|
||||
let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
|
||||
pool.install(move || {
|
||||
let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
|
||||
let _entered = span.enter();
|
||||
extract(document_changes,
|
||||
&document_extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::ExtractingDocuments,
|
||||
)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
|
||||
let _entered = span.enter();
|
||||
for document_extractor_data in datastore {
|
||||
let document_extractor_data = document_extractor_data.0.into_inner();
|
||||
for (field, delta) in document_extractor_data.field_distribution_delta {
|
||||
let current = field_distribution.entry(field).or_default();
|
||||
// adding the delta should never cause a negative result, as we are removing fields that previously existed.
|
||||
*current = current.saturating_add_signed(delta);
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
// document but we need to create a function that collects and compresses documents.
|
||||
let document_sender = extractor_sender.documents();
|
||||
let document_extractor = DocumentsExtractor::new(document_sender, embedders);
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
|
||||
let _entered = span.enter();
|
||||
extract(
|
||||
document_changes,
|
||||
&document_extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::ExtractingDocuments,
|
||||
)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
|
||||
let _entered = span.enter();
|
||||
for document_extractor_data in datastore {
|
||||
let document_extractor_data = document_extractor_data.0.into_inner();
|
||||
for (field, delta) in document_extractor_data.field_distribution_delta {
|
||||
let current = field_distribution.entry(field).or_default();
|
||||
// adding the delta should never cause a negative result, as we are removing fields that previously existed.
|
||||
*current = current.saturating_add_signed(delta);
|
||||
}
|
||||
document_extractor_data.docids_delta.apply_to(document_ids);
|
||||
}
|
||||
document_extractor_data.docids_delta.apply_to(document_ids);
|
||||
|
||||
field_distribution.retain(|_, v| *v != 0);
|
||||
}
|
||||
|
||||
field_distribution.retain(|_, v| *v != 0);
|
||||
}
|
||||
let facet_field_ids_delta;
|
||||
|
||||
let facet_field_ids_delta;
|
||||
{
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
|
||||
{
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
FacetedDocidsExtractor::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&extractor_sender.field_id_docid_facet_sender(),
|
||||
Step::ExtractingFacets
|
||||
)?
|
||||
};
|
||||
|
||||
FacetedDocidsExtractor::run_extraction(
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
|
||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||
caches,
|
||||
FacetDatabases::new(index),
|
||||
index,
|
||||
extractor_sender.facet_docids(),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let WordDocidsCaches {
|
||||
word_docids,
|
||||
word_fid_docids,
|
||||
exact_word_docids,
|
||||
word_position_docids,
|
||||
fid_word_count_docids,
|
||||
} = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
WordDocidsExtractors::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&extractor_sender.field_id_docid_facet_sender(),
|
||||
Step::ExtractingFacets
|
||||
Step::ExtractingWords
|
||||
)?
|
||||
};
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_docids,
|
||||
index.word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||
caches,
|
||||
FacetDatabases::new(index),
|
||||
index,
|
||||
extractor_sender.facet_docids(),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_fid_docids,
|
||||
index.word_fid_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordFidDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
exact_word_docids,
|
||||
index.exact_word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<ExactWordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_position_docids,
|
||||
index.word_position_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPositionDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
|
||||
|
||||
let WordDocidsCaches {
|
||||
word_docids,
|
||||
word_fid_docids,
|
||||
exact_word_docids,
|
||||
word_position_docids,
|
||||
fid_word_count_docids,
|
||||
} = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
WordDocidsExtractors::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWords
|
||||
)?
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_docids,
|
||||
index.word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
fid_word_count_docids,
|
||||
index.field_id_word_count_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<FidWordCountDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_fid_docids,
|
||||
index.word_fid_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordFidDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
// run the proximity extraction only if the precision is by word
|
||||
// this works only if the settings didn't change during this transaction.
|
||||
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
|
||||
if proximity_precision == ProximityPrecision::ByWord {
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWordProximity,
|
||||
)?
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
index.word_pair_proximity_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPairProximityDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
exact_word_docids,
|
||||
index.exact_word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<ExactWordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
'vectors: {
|
||||
if index_embeddings.is_empty() {
|
||||
break 'vectors;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
word_position_docids,
|
||||
index.word_position_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPositionDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
let embedding_sender = extractor_sender.embeddings();
|
||||
let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_docids(
|
||||
fid_word_count_docids,
|
||||
index.field_id_word_count_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<FidWordCountDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::ExtractingEmbeddings,
|
||||
)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
// run the proximity extraction only if the precision is by word
|
||||
// this works only if the settings didn't change during this transaction.
|
||||
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
|
||||
if proximity_precision == ProximityPrecision::ByWord {
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWordProximity,
|
||||
)?
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
index.word_pair_proximity_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPairProximityDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
'vectors: {
|
||||
|
||||
let mut index_embeddings = index.embedding_configs(&rtxn)?;
|
||||
if index_embeddings.is_empty() {
|
||||
break 'vectors;
|
||||
}
|
||||
|
||||
let embedding_sender = extractor_sender.embeddings();
|
||||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
for config in &mut index_embeddings {
|
||||
'data: for data in datastore.iter_mut() {
|
||||
let data = &mut data.get_mut().0;
|
||||
let Some(deladd) = data.remove(&config.name) else { continue 'data; };
|
||||
deladd.apply_to(&mut config.user_provided);
|
||||
for config in &mut index_embeddings {
|
||||
'data: for data in datastore.iter_mut() {
|
||||
let data = &mut data.get_mut().0;
|
||||
let Some(deladd) = data.remove(&config.name) else { continue 'data; };
|
||||
deladd.apply_to(&mut config.user_provided);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
embedding_sender.finish(index_embeddings).unwrap();
|
||||
}
|
||||
'geo: {
|
||||
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
||||
break 'geo;
|
||||
};
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
|
||||
'geo: {
|
||||
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
||||
break 'geo;
|
||||
};
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||
let _entered = span.enter();
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||
let _entered = span.enter();
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::WritingGeoPoints
|
||||
)?;
|
||||
}
|
||||
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
Step::WritingGeoPoints
|
||||
merge_and_send_rtree(
|
||||
datastore,
|
||||
&rtxn,
|
||||
index,
|
||||
extractor_sender.geo(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
merge_and_send_rtree(
|
||||
datastore,
|
||||
&rtxn,
|
||||
index,
|
||||
extractor_sender.geo(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
Result::Ok(facet_field_ids_delta)
|
||||
Result::Ok((facet_field_ids_delta, index_embeddings))
|
||||
}).unwrap()
|
||||
})?;
|
||||
|
||||
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||
|
||||
let vector_arroy = index.vector_arroy;
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
let indexer_span = tracing::Span::current();
|
||||
let arroy_writers: Result<HashMap<_, _>> = embedders
|
||||
.inner_as_ref()
|
||||
@ -384,7 +407,11 @@ where
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Used by by the ArroySetVector to copy the embedding into an
|
||||
// aligned memory area, required by arroy to accept a new vector.
|
||||
let mut aligned_embedding = Vec::new();
|
||||
let mut arroy_writers = arroy_writers?;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "all");
|
||||
let _entered = span.enter();
|
||||
@ -392,110 +419,93 @@ where
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "post_merge");
|
||||
let mut _entered_post_merge = None;
|
||||
|
||||
for operation in writer_receiver {
|
||||
while let Some(action) = writer_receiver.recv_action() {
|
||||
if _entered_post_merge.is_none()
|
||||
&& finished_extraction.load(std::sync::atomic::Ordering::Relaxed)
|
||||
{
|
||||
_entered_post_merge = Some(span.enter());
|
||||
}
|
||||
match operation {
|
||||
WriterOperation::DbOperation(db_operation) => {
|
||||
let database = db_operation.database(index);
|
||||
let database_name = db_operation.database_name();
|
||||
match db_operation.entry() {
|
||||
EntryOperation::Delete(e) => match database.delete(wtxn, e.entry()) {
|
||||
Ok(false) => unreachable!("We tried to delete an unknown key"),
|
||||
Ok(_) => (),
|
||||
Err(error) => {
|
||||
return Err(Error::InternalError(
|
||||
InternalError::StoreDeletion {
|
||||
database_name,
|
||||
key: e.entry().to_owned(),
|
||||
error,
|
||||
},
|
||||
));
|
||||
}
|
||||
},
|
||||
EntryOperation::Write(e) => {
|
||||
if let Err(error) = database.put(wtxn, e.key(), e.value()) {
|
||||
return Err(Error::InternalError(InternalError::StorePut {
|
||||
database_name,
|
||||
key: e.key().to_owned(),
|
||||
value_length: e.value().len(),
|
||||
error,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
match action {
|
||||
ReceiverAction::WakeUp => (),
|
||||
ReceiverAction::LargeEntry(LargeEntry { database, key, value }) => {
|
||||
let database_name = database.database_name();
|
||||
let database = database.database(index);
|
||||
if let Err(error) = database.put(wtxn, &key, &value) {
|
||||
return Err(Error::InternalError(InternalError::StorePut {
|
||||
database_name,
|
||||
key: bstr::BString::from(&key[..]),
|
||||
value_length: value.len(),
|
||||
error,
|
||||
}));
|
||||
}
|
||||
}
|
||||
WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
|
||||
ArroyOperation::DeleteVectors { docid } => {
|
||||
for (
|
||||
_embedder_index,
|
||||
(_embedder_name, _embedder, writer, dimensions),
|
||||
) in &mut arroy_writers
|
||||
{
|
||||
let dimensions = *dimensions;
|
||||
writer.del_items(wtxn, dimensions, docid)?;
|
||||
}
|
||||
ReceiverAction::LargeVectors(large_vectors) => {
|
||||
let LargeVectors { docid, embedder_id, .. } = large_vectors;
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
for embedding in large_vectors.read_embeddings(*dimensions) {
|
||||
embeddings.push(embedding.to_vec()).unwrap();
|
||||
}
|
||||
ArroyOperation::SetVectors {
|
||||
docid,
|
||||
embedder_id,
|
||||
embeddings: raw_embeddings,
|
||||
} => {
|
||||
let (_, _, writer, dimensions) = arroy_writers
|
||||
.get(&embedder_id)
|
||||
.expect("requested a missing embedder");
|
||||
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
for embedding in raw_embeddings {
|
||||
embeddings.append(embedding).unwrap();
|
||||
}
|
||||
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_items(wtxn, docid, &embeddings)?;
|
||||
}
|
||||
ArroyOperation::SetVector { docid, embedder_id, embedding } => {
|
||||
let (_, _, writer, dimensions) = arroy_writers
|
||||
.get(&embedder_id)
|
||||
.expect("requested a missing embedder");
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_item(wtxn, docid, &embedding)?;
|
||||
}
|
||||
ArroyOperation::Finish { configs } => {
|
||||
let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
|
||||
let _entered = span.enter();
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(
|
||||
Step::WritingEmbeddingsToDatabase,
|
||||
));
|
||||
|
||||
for (
|
||||
_embedder_index,
|
||||
(_embedder_name, _embedder, writer, dimensions),
|
||||
) in &mut arroy_writers
|
||||
{
|
||||
let dimensions = *dimensions;
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
&mut rng,
|
||||
dimensions,
|
||||
false,
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
index.put_embedding_configs(wtxn, configs)?;
|
||||
}
|
||||
},
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_items(wtxn, docid, &embeddings)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Every time the is a message in the channel we search
|
||||
// for new entries in the BBQueue buffers.
|
||||
write_from_bbqueue(
|
||||
&mut writer_receiver,
|
||||
index,
|
||||
wtxn,
|
||||
&arroy_writers,
|
||||
&mut aligned_embedding,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Once the extractor/writer channel is closed
|
||||
// we must process the remaining BBQueue messages.
|
||||
write_from_bbqueue(
|
||||
&mut writer_receiver,
|
||||
index,
|
||||
wtxn,
|
||||
&arroy_writers,
|
||||
&mut aligned_embedding,
|
||||
)?;
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
|
||||
|
||||
let facet_field_ids_delta = extractor_handle.join().unwrap()?;
|
||||
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
|
||||
|
||||
'vectors: {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
|
||||
let _entered = span.enter();
|
||||
|
||||
if index_embeddings.is_empty() {
|
||||
break 'vectors;
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(
|
||||
Step::WritingEmbeddingsToDatabase,
|
||||
));
|
||||
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
|
||||
let dimensions = *dimensions;
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
&mut rng,
|
||||
dimensions,
|
||||
false,
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
index.put_embedding_configs(wtxn, index_embeddings)?;
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
|
||||
|
||||
@ -537,6 +547,72 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A function dedicated to manage all the available BBQueue frames.
|
||||
///
|
||||
/// It reads all the available frames, do the corresponding database operations
|
||||
/// and stops when no frame are available.
|
||||
fn write_from_bbqueue(
|
||||
writer_receiver: &mut WriterBbqueueReceiver<'_>,
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
|
||||
aligned_embedding: &mut Vec<f32>,
|
||||
) -> crate::Result<()> {
|
||||
while let Some(frame_with_header) = writer_receiver.recv_frame() {
|
||||
match frame_with_header.header() {
|
||||
EntryHeader::DbOperation(operation) => {
|
||||
let database_name = operation.database.database_name();
|
||||
let database = operation.database.database(index);
|
||||
let frame = frame_with_header.frame();
|
||||
match operation.key_value(frame) {
|
||||
(key, Some(value)) => {
|
||||
if let Err(error) = database.put(wtxn, key, value) {
|
||||
return Err(Error::InternalError(InternalError::StorePut {
|
||||
database_name,
|
||||
key: key.into(),
|
||||
value_length: value.len(),
|
||||
error,
|
||||
}));
|
||||
}
|
||||
}
|
||||
(key, None) => match database.delete(wtxn, key) {
|
||||
Ok(false) => {
|
||||
unreachable!("We tried to delete an unknown key: {key:?}")
|
||||
}
|
||||
Ok(_) => (),
|
||||
Err(error) => {
|
||||
return Err(Error::InternalError(InternalError::StoreDeletion {
|
||||
database_name,
|
||||
key: key.into(),
|
||||
error,
|
||||
}));
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
|
||||
for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers {
|
||||
let dimensions = *dimensions;
|
||||
writer.del_items(wtxn, dimensions, docid)?;
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroySetVectors(asvs) => {
|
||||
let ArroySetVectors { docid, embedder_id, .. } = asvs;
|
||||
let frame = frame_with_header.frame();
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
|
||||
embeddings.append(all_embeddings.to_vec()).unwrap();
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_items(wtxn, docid, &embeddings)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
fn compute_prefix_database(
|
||||
index: &Index,
|
||||
|
@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::channel::*;
|
||||
use super::extract::{
|
||||
merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
|
||||
GeoExtractorData,
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
FacetKind, GeoExtractorData,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||
|
||||
@ -19,7 +19,7 @@ pub fn merge_and_send_rtree<'extractor, MSP>(
|
||||
datastore: impl IntoIterator<Item = RefCell<GeoExtractorData<'extractor>>>,
|
||||
rtxn: &RoTxn,
|
||||
index: &Index,
|
||||
geo_sender: GeoSender<'_>,
|
||||
geo_sender: GeoSender<'_, '_>,
|
||||
must_stop_processing: &MSP,
|
||||
) -> Result<()>
|
||||
where
|
||||
@ -34,7 +34,7 @@ where
|
||||
}
|
||||
|
||||
let mut frozen = data.into_inner().freeze()?;
|
||||
for result in frozen.iter_and_clear_removed() {
|
||||
for result in frozen.iter_and_clear_removed()? {
|
||||
let extracted_geo_point = result?;
|
||||
let removed = rtree.remove(&GeoPoint::from(extracted_geo_point));
|
||||
debug_assert!(removed.is_some());
|
||||
@ -42,7 +42,7 @@ where
|
||||
debug_assert!(removed);
|
||||
}
|
||||
|
||||
for result in frozen.iter_and_clear_inserted() {
|
||||
for result in frozen.iter_and_clear_inserted()? {
|
||||
let extracted_geo_point = result?;
|
||||
rtree.insert(GeoPoint::from(extracted_geo_point));
|
||||
let inserted = faceted.insert(extracted_geo_point.docid);
|
||||
@ -56,38 +56,37 @@ where
|
||||
|
||||
let rtree_mmap = unsafe { Mmap::map(&file)? };
|
||||
geo_sender.set_rtree(rtree_mmap).unwrap();
|
||||
geo_sender.set_geo_faceted(&faceted).unwrap();
|
||||
geo_sender.set_geo_faceted(&faceted)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||
pub fn merge_and_send_docids<'extractor, MSP>(
|
||||
pub fn merge_and_send_docids<'extractor, MSP, D>(
|
||||
mut caches: Vec<BalancedCaches<'extractor>>,
|
||||
database: Database<Bytes, Bytes>,
|
||||
index: &Index,
|
||||
docids_sender: impl DocidsSender + Sync,
|
||||
docids_sender: WordDocidsSender<D>,
|
||||
must_stop_processing: &MSP,
|
||||
) -> Result<()>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
D: DatabaseType + Sync,
|
||||
{
|
||||
transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
|
||||
let rtxn = index.read_txn()?;
|
||||
let mut buffer = Vec::new();
|
||||
if must_stop_processing() {
|
||||
return Err(InternalError::AbortedIndexation.into());
|
||||
}
|
||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
|
||||
docids_sender.write(key, value).unwrap();
|
||||
docids_sender.write(key, &bitmap)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Delete => {
|
||||
docids_sender.delete(key).unwrap();
|
||||
docids_sender.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Ignore => Ok(()),
|
||||
@ -101,26 +100,24 @@ pub fn merge_and_send_facet_docids<'extractor>(
|
||||
mut caches: Vec<BalancedCaches<'extractor>>,
|
||||
database: FacetDatabases,
|
||||
index: &Index,
|
||||
docids_sender: impl DocidsSender + Sync,
|
||||
docids_sender: FacetDocidsSender,
|
||||
) -> Result<FacetFieldIdsDelta> {
|
||||
transpose_and_freeze_caches(&mut caches)?
|
||||
.into_par_iter()
|
||||
.map(|frozen| {
|
||||
let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
|
||||
let rtxn = index.read_txn()?;
|
||||
let mut buffer = Vec::new();
|
||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
facet_field_ids_delta.register_from_key(key);
|
||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
|
||||
docids_sender.write(key, value).unwrap();
|
||||
docids_sender.write(key, &bitmap)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Delete => {
|
||||
facet_field_ids_delta.register_from_key(key);
|
||||
docids_sender.delete(key).unwrap();
|
||||
docids_sender.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Ignore => Ok(()),
|
||||
@ -252,10 +249,3 @@ fn merge_cbo_bitmaps(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO Return the slice directly from the serialize_into method
|
||||
fn cbo_bitmap_serialize_into_vec<'b>(bitmap: &RoaringBitmap, buffer: &'b mut Vec<u8>) -> &'b [u8] {
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(bitmap, buffer);
|
||||
buffer.as_slice()
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ pub trait RefCellExt<T: ?Sized> {
|
||||
&self,
|
||||
) -> std::result::Result<RefMut<'_, T>, std::cell::BorrowMutError>;
|
||||
|
||||
#[track_caller]
|
||||
fn borrow_mut_or_yield(&self) -> RefMut<'_, T> {
|
||||
self.try_borrow_mut_or_yield().unwrap()
|
||||
}
|
||||
|
@ -11,8 +11,8 @@ pub enum Step {
|
||||
ExtractingEmbeddings,
|
||||
WritingGeoPoints,
|
||||
WritingToDatabase,
|
||||
WritingEmbeddingsToDatabase,
|
||||
WaitingForExtractors,
|
||||
WritingEmbeddingsToDatabase,
|
||||
PostProcessingFacets,
|
||||
PostProcessingWords,
|
||||
Finalizing,
|
||||
@ -29,8 +29,8 @@ impl Step {
|
||||
Step::ExtractingEmbeddings => "extracting embeddings",
|
||||
Step::WritingGeoPoints => "writing geo points",
|
||||
Step::WritingToDatabase => "writing to database",
|
||||
Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||
Step::WaitingForExtractors => "waiting for extractors",
|
||||
Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||
Step::PostProcessingFacets => "post-processing facets",
|
||||
Step::PostProcessingWords => "post-processing words",
|
||||
Step::Finalizing => "finalizing",
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::BufWriter;
|
||||
|
||||
use fst::{Set, SetBuilder, Streamer};
|
||||
@ -75,8 +75,8 @@ pub struct PrefixData {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PrefixDelta {
|
||||
pub modified: HashSet<Prefix>,
|
||||
pub deleted: HashSet<Prefix>,
|
||||
pub modified: BTreeSet<Prefix>,
|
||||
pub deleted: BTreeSet<Prefix>,
|
||||
}
|
||||
|
||||
struct PrefixFstBuilder {
|
||||
@ -86,7 +86,7 @@ struct PrefixFstBuilder {
|
||||
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
||||
current_prefix: Vec<Prefix>,
|
||||
current_prefix_count: Vec<usize>,
|
||||
modified_prefixes: HashSet<Prefix>,
|
||||
modified_prefixes: BTreeSet<Prefix>,
|
||||
current_prefix_is_modified: Vec<bool>,
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ impl PrefixFstBuilder {
|
||||
prefix_fst_builders,
|
||||
current_prefix: vec![Prefix::new(); max_prefix_length],
|
||||
current_prefix_count: vec![0; max_prefix_length],
|
||||
modified_prefixes: HashSet::new(),
|
||||
modified_prefixes: BTreeSet::new(),
|
||||
current_prefix_is_modified: vec![false; max_prefix_length],
|
||||
})
|
||||
}
|
||||
@ -180,7 +180,7 @@ impl PrefixFstBuilder {
|
||||
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
|
||||
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
|
||||
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
|
||||
let mut deleted_prefixes = HashSet::new();
|
||||
let mut deleted_prefixes = BTreeSet::new();
|
||||
{
|
||||
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
|
||||
while let Some(prefix) = deleted_prefixes_stream.next() {
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
@ -37,8 +37,8 @@ impl WordPrefixDocids {
|
||||
fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
@ -48,7 +48,7 @@ impl WordPrefixDocids {
|
||||
fn recompute_modified_prefixes(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||
@ -76,7 +76,7 @@ impl WordPrefixDocids {
|
||||
.union()?;
|
||||
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&output, buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
index.push(PrefixEntry { prefix, serialized_length: buffer.len() });
|
||||
file.write_all(buffer)
|
||||
})?;
|
||||
@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a HashSet<Prefix>,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
let database = database.remap_data_type::<Bytes>();
|
||||
|
||||
@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
|
||||
fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
|
||||
fn recompute_modified_prefixes(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||
@ -211,7 +211,7 @@ impl WordPrefixIntegerDocids {
|
||||
.union()?;
|
||||
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(&output, buffer);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() });
|
||||
file.write_all(buffer)?;
|
||||
}
|
||||
@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a HashSet<Prefix>,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
let database = database.remap_data_type::<Bytes>();
|
||||
|
||||
@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
|
||||
fn delete_prefixes(
|
||||
wtxn: &mut RwTxn,
|
||||
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We remove all the entries that are no more required in this word prefix docids database.
|
||||
for prefix in prefixes {
|
||||
@ -309,8 +309,8 @@ fn delete_prefixes(
|
||||
pub fn compute_word_prefix_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixDocids::new(
|
||||
@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
|
||||
pub fn compute_exact_word_prefix_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixDocids::new(
|
||||
@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
|
||||
pub fn compute_word_prefix_fid_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
|
||||
pub fn compute_word_prefix_position_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
|
@ -475,7 +475,7 @@ impl<F> Embeddings<F> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Append a flat vector of embeddings a the end of the embeddings.
|
||||
/// Append a flat vector of embeddings at the end of the embeddings.
|
||||
///
|
||||
/// If `embeddings.len() % self.dimension != 0`, then the append operation fails.
|
||||
pub fn append(&mut self, mut embeddings: Vec<F>) -> Result<(), Vec<F>> {
|
||||
|
@ -64,6 +64,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -101,6 +101,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -333,6 +333,7 @@ fn criteria_ascdesc() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -142,6 +142,7 @@ fn test_typo_disabled_on_word() {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
|
@ -82,6 +82,10 @@ pub struct BenchDeriveArgs {
|
||||
/// Reason for the benchmark invocation
|
||||
#[arg(short, long)]
|
||||
reason: Option<String>,
|
||||
|
||||
/// The maximum time in seconds we allow for fetching the task queue before timing out.
|
||||
#[arg(long, default_value_t = 60)]
|
||||
tasks_queue_timeout_secs: u64,
|
||||
}
|
||||
|
||||
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
@ -127,7 +131,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
let meili_client = Client::new(
|
||||
Some("http://127.0.0.1:7700".into()),
|
||||
args.master_key.as_deref(),
|
||||
Some(std::time::Duration::from_secs(60)),
|
||||
Some(std::time::Duration::from_secs(args.tasks_queue_timeout_secs)),
|
||||
)?;
|
||||
|
||||
// enter runtime
|
||||
|
@ -16,6 +16,7 @@ struct ListFeaturesDeriveArgs {
|
||||
#[command(author, version, about, long_about)]
|
||||
#[command(name = "cargo xtask")]
|
||||
#[command(bin_name = "cargo xtask")]
|
||||
#[allow(clippy::large_enum_variant)] // please, that's enough...
|
||||
enum Command {
|
||||
ListFeatures(ListFeaturesDeriveArgs),
|
||||
Bench(BenchDeriveArgs),
|
||||
|
105
workloads/hackernews-add-new-documents.json
Normal file
105
workloads/hackernews-add-new-documents.json
Normal file
@ -0,0 +1,105 @@
|
||||
{
|
||||
"name": "hackernews.add_new_documents",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-05.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
111
workloads/hackernews-modify-facet-numbers.json
Normal file
111
workloads/hackernews-modify-facet-numbers.json
Normal file
@ -0,0 +1,111 @@
|
||||
{
|
||||
"name": "hackernews.modify_facet_numbers",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-02-modified-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
|
||||
"sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02-modified-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
111
workloads/hackernews-modify-facet-strings.json
Normal file
111
workloads/hackernews-modify-facet-strings.json
Normal file
@ -0,0 +1,111 @@
|
||||
{
|
||||
"name": "hackernews.modify_facet_strings",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-01-modified-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
|
||||
"sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01-modified-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
123
workloads/hackernews-modify-searchables.json
Normal file
123
workloads/hackernews-modify-searchables.json
Normal file
@ -0,0 +1,123 @@
|
||||
{
|
||||
"name": "hackernews.modify_searchables",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-01-modified-searchables.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-searchables.ndjson",
|
||||
"sha256": "e5c08710c6af70031ac7212e0ba242c72ef29c8d4e1fce66c789544641452a7c"
|
||||
},
|
||||
"hackernews-02-modified-searchables.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-searchables.ndjson",
|
||||
"sha256": "098b029851117087b1e26ccb7ac408eda9bba54c3008213a2880d6fab607346e"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01-modified-searchables.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02-modified-searchables.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
Reference in New Issue
Block a user