Change implementation of MergedDocuments::iter_top_level_fields

Merge #5131
5131: Ignore documents whose selected fields didn't change r=dureuill a=dureuill Attempts to improve the new indexer performance by ignoring documents whose selected fields didn't change: - Add `Update::has_changed_for_fields` function - Ignore documents whose searchable attributes didn't change for word docids and word pair proximity extraction - Ignore documents whose faceted attributes didn't change for facet extraction Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-22 06:11:01 +00:00 · 2024-12-09 09:38:21 +01:00 · 2024-12-05 16:04:16 +00:00 · 2024-12-05 15:21:55 +00:00 · 2024-12-05 16:13:07 +01:00 · 2024-12-05 16:12:52 +01:00
79 changed files with 2066 additions and 1144 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -489,6 +489,11 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"

+[[package]]
+name = "bbqueue"
+version = "0.5.1"
+source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
+
 [[package]]
 name = "benchmarks"
 version = "1.12.0"
@ -1246,19 +1251,6 @@ dependencies = [
 "itertools 0.10.5",
 ]

-[[package]]
-name = "crossbeam"
-version = "0.8.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
-dependencies = [
- "crossbeam-channel",
- "crossbeam-deque",
- "crossbeam-epoch",
- "crossbeam-queue",
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.13"
@ -1918,6 +1910,15 @@ dependencies = [
 "serde_json",
 ]

+[[package]]
+name = "flume"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
+dependencies = [
+ "spin",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@ -2616,7 +2617,7 @@ dependencies = [
 "big_s",
 "bincode",
 "bumpalo",
- "crossbeam",
+ "crossbeam-channel",
 "csv",
 "derive_builder 0.20.0",
 "dump",
@ -3611,6 +3612,7 @@ version = "1.12.0"
 dependencies = [
 "allocator-api2",
 "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bbqueue",
 "big_s",
 "bimap",
 "bincode",
@ -3630,6 +3632,7 @@ dependencies = [
 "enum-iterator",
 "filter-parser",
 "flatten-serde-json",
+ "flume",
 "fst",
 "fxhash",
 "geoutils",
@ -4743,8 +4746,9 @@ dependencies = [

 [[package]]
 name = "roaring"
-version = "0.10.6"
-source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#8ff028e484fb6192a0acf5a669eaf18c30cada6e"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88"
 dependencies = [
 "bytemuck",
 "byteorder",
@ -5186,6 +5190,9 @@ name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]

 [[package]]
 name = "spm_precompiled"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -43,6 +43,3 @@ opt-level = 3
 opt-level = 3
 [profile.dev.package.roaring]
 opt-level = 3
-
-[patch.crates-io]
-roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
--- a/crates/benchmarks/Cargo.toml
+++ b/crates/benchmarks/Cargo.toml
@ -24,7 +24,7 @@ tempfile = "3.14.0"
 criterion = { version = "0.5.1", features = ["html_reports"] }
 rand = "0.8.5"
 rand_chacha = "0.3.1"
-roaring = "0.10.6"
+roaring = "0.10.7"

 [build-dependencies]
 anyhow = "1.0.86"
--- a/crates/benchmarks/benches/indexing.rs
+++ b/crates/benchmarks/benches/indexing.rs
@ -16,6 +16,7 @@ use rand::seq::SliceRandom;
 use rand_chacha::rand_core::SeedableRng;
 use roaring::RoaringBitmap;

+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

@ -157,6 +158,7 @@ fn indexing_songs_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -223,6 +225,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -267,6 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -335,6 +339,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -411,6 +416,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -455,6 +461,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -495,6 +502,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -562,6 +570,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -628,6 +637,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -694,6 +704,7 @@ fn indexing_wiki(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -759,6 +770,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -803,6 +815,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -870,6 +883,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -946,6 +960,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -991,6 +1006,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1032,6 +1048,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1098,6 +1115,7 @@ fn indexing_movies_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1163,6 +1181,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1207,6 +1226,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1274,6 +1294,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1321,6 +1342,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
        indexer::index(
            &mut wtxn,
            &index,
+            &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -1385,6 +1407,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1429,6 +1452,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1469,6 +1493,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1558,6 +1583,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1648,6 +1674,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1730,6 +1757,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1796,6 +1824,7 @@ fn indexing_geo(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1861,6 +1890,7 @@ fn reindexing_geo(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1905,6 +1935,7 @@ fn reindexing_geo(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1972,6 +2003,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                indexer::index(
                    &mut wtxn,
                    &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
--- a/crates/benchmarks/benches/search_geo.rs
+++ b/crates/benchmarks/benches/search_geo.rs
@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
 use milli::update::Settings;
 use utils::Conf;

+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

--- a/crates/benchmarks/benches/search_songs.rs
+++ b/crates/benchmarks/benches/search_songs.rs
@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
 use milli::update::Settings;
 use utils::Conf;

+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

--- a/crates/benchmarks/benches/search_wiki.rs
+++ b/crates/benchmarks/benches/search_wiki.rs
@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
 use milli::update::Settings;
 use utils::Conf;

+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

--- a/crates/benchmarks/benches/utils.rs
+++ b/crates/benchmarks/benches/utils.rs
@ -117,6 +117,7 @@ pub fn base_setup(conf: &Conf) -> Index {
    indexer::index(
        &mut wtxn,
        &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
--- a/crates/dump/Cargo.toml
+++ b/crates/dump/Cargo.toml
@ -17,7 +17,7 @@ http = "1.1.0"
 meilisearch-types = { path = "../meilisearch-types" }
 once_cell = "1.19.0"
 regex = "1.10.5"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 tar = "0.4.41"
--- a/crates/fuzzers/src/bin/fuzz-indexing.rs
+++ b/crates/fuzzers/src/bin/fuzz-indexing.rs
@ -135,6 +135,7 @@ fn main() {
                            indexer::index(
                                &mut wtxn,
                                &index,
+                                &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                                indexer_config.grenad_parameters(),
                                &db_fields_ids_map,
                                new_fields_ids_map,
--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@ -24,7 +24,7 @@ meilisearch-types = { path = "../meilisearch-types" }
 page_size = "0.6.0"
 raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
 rayon = "1.10.0"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 synchronoise = "1.0.1"
@ -45,7 +45,7 @@ bumpalo = "3.16.0"
 [dev-dependencies]
 arroy = "0.5.0"
 big_s = "1.0.2"
-crossbeam = "0.8.4"
+crossbeam-channel = "0.5.13"
 insta = { version = "1.39.0", features = ["json", "redactions"] }
 maplit = "1.0.2"
 meili-snap = { path = "../meili-snap" }
--- a/crates/index-scheduler/src/autobatcher.rs
+++ b/crates/index-scheduler/src/autobatcher.rs
@ -115,13 +115,6 @@ pub enum BatchKind {
        allow_index_creation: bool,
        settings_ids: Vec<TaskId>,
    },
-    SettingsAndDocumentOperation {
-        settings_ids: Vec<TaskId>,
-        method: IndexDocumentsMethod,
-        allow_index_creation: bool,
-        primary_key: Option<String>,
-        operation_ids: Vec<TaskId>,
-    },
    Settings {
        allow_index_creation: bool,
        settings_ids: Vec<TaskId>,
@ -146,7 +139,6 @@ impl BatchKind {
        match self {
            BatchKind::DocumentOperation { allow_index_creation, .. }
            | BatchKind::ClearAndSettings { allow_index_creation, .. }
-            | BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
            | BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
            _ => None,
        }
@ -154,10 +146,7 @@ impl BatchKind {

    fn primary_key(&self) -> Option<Option<&str>> {
        match self {
-            BatchKind::DocumentOperation { primary_key, .. }
-            | BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
-                Some(primary_key.as_deref())
-            }
+            BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
            _ => None,
        }
    }
@ -275,8 +264,7 @@ impl BatchKind {
                Break(BatchKind::IndexDeletion { ids })
            }
            (
-                BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
-                | BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
+                BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
                K::IndexDeletion,
            ) => {
                ids.push(id);
@ -356,15 +344,9 @@ impl BatchKind {
            ) => Break(this),

            (
-                BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
+                this @ BatchKind::DocumentOperation { .. },
                K::Settings { .. },
-            ) => Continue(BatchKind::SettingsAndDocumentOperation {
-                settings_ids: vec![id],
-                method,
-                allow_index_creation,
-                primary_key,
-                operation_ids,
-            }),
+            ) => Break(this),

            (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
                deletion_ids.push(id);
@ -477,63 +459,7 @@ impl BatchKind {
                    allow_index_creation,
                })
            }
-            (
-                BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
-                K::DocumentClear,
-            ) => {
-                operation_ids.push(id);
-                Continue(BatchKind::ClearAndSettings {
-                    settings_ids,
-                    other: operation_ids,
-                    allow_index_creation,
-                })
-            }

-            (
-                BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
-                K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
-            ) => {
-                operation_ids.push(id);
-                Continue(BatchKind::SettingsAndDocumentOperation {
-                    settings_ids,
-                    method: ReplaceDocuments,
-                    allow_index_creation,
-                        primary_key: pk2,
-                    operation_ids,
-                })
-            }
-            (
-                BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
-                K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
-            ) => {
-                operation_ids.push(id);
-                Continue(BatchKind::SettingsAndDocumentOperation {
-                    settings_ids,
-                    method: UpdateDocuments,
-                    allow_index_creation,
-                    primary_key: pk2,
-                    operation_ids,
-                })
-            }
-            // But we can't batch a settings and a doc op with another doc op
-            // this MUST be AFTER the two previous branch
-            (
-                this @ BatchKind::SettingsAndDocumentOperation { .. },
-                K::DocumentDeletion { .. } | K::DocumentImport { .. },
-            ) => Break(this),
-            (
-                BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
-                K::Settings { .. },
-            ) => {
-                settings_ids.push(id);
-                Continue(BatchKind::SettingsAndDocumentOperation {
-                    settings_ids,
-                    method,
-                    allow_index_creation,
-                    primary_key,
-                    operation_ids,
-                })
-            }
            (
                BatchKind::IndexCreation { .. }
                | BatchKind::IndexDeletion { .. }
@ -808,30 +734,30 @@ mod tests {
    }

    #[test]
-    fn document_addition_batch_with_settings() {
+    fn document_addition_doesnt_batch_with_settings() {
        // simple case
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");

        // multiple settings and doc addition
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");

        // addition and setting unordered
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");

-        // We ensure this kind of batch doesn't batch with forbidden operations
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        // Doesn't batch with other forbidden operations
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
    }

    #[test]
@ -859,8 +785,8 @@ mod tests {
        debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");

        debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
    }

    #[test]
@ -907,50 +833,6 @@ mod tests {
        debug_snapshot!(autobatch_from(false,None,  [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
        debug_snapshot!(autobatch_from(false,None,  [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
        debug_snapshot!(autobatch_from(false,None,  [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
-
-        // Then the mixed cases.
-        // The index already exists, whatever is the right of the tasks it shouldn't change the result.
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-
-        // When the index doesn't exists yet it's more complicated.
-        // Either the first task we encounter create it, in which case we can create a big batch with everything.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        // The right of the tasks following isn't really important.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        // Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index.
-        // that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc.
-        // All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        // The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whit what
-        // follows because we first need to process the erronous batch.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
    }

    #[test]
@ -959,13 +841,13 @@ mod tests {
        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");

        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");

        // batch deletion and addition
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@ -104,7 +104,6 @@ pub(crate) enum IndexOperation {
        index_uid: String,
        primary_key: Option<String>,
        method: IndexDocumentsMethod,
-        documents_counts: Vec<u64>,
        operations: Vec<DocumentOperation>,
        tasks: Vec<Task>,
    },
@ -130,19 +129,6 @@ pub(crate) enum IndexOperation {
        index_uid: String,
        cleared_tasks: Vec<Task>,

-        // The boolean indicates if it's a settings deletion or creation.
-        settings: Vec<(bool, Settings<Unchecked>)>,
-        settings_tasks: Vec<Task>,
-    },
-    SettingsAndDocumentOperation {
-        index_uid: String,
-
-        primary_key: Option<String>,
-        method: IndexDocumentsMethod,
-        documents_counts: Vec<u64>,
-        operations: Vec<DocumentOperation>,
-        document_import_tasks: Vec<Task>,
-
        // The boolean indicates if it's a settings deletion or creation.
        settings: Vec<(bool, Settings<Unchecked>)>,
        settings_tasks: Vec<Task>,
@ -174,12 +160,7 @@ impl Batch {
                IndexOperation::DocumentEdition { task, .. } => {
                    RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
                }
-                IndexOperation::SettingsAndDocumentOperation {
-                    document_import_tasks: tasks,
-                    settings_tasks: other,
-                    ..
-                }
-                | IndexOperation::DocumentClearAndSetting {
+                IndexOperation::DocumentClearAndSetting {
                    cleared_tasks: tasks,
                    settings_tasks: other,
                    ..
@ -239,8 +220,7 @@ impl IndexOperation {
            | IndexOperation::DocumentDeletion { index_uid, .. }
            | IndexOperation::DocumentClear { index_uid, .. }
            | IndexOperation::Settings { index_uid, .. }
-            | IndexOperation::DocumentClearAndSetting { index_uid, .. }
-            | IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
+            | IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
        }
    }
 }
@ -262,9 +242,6 @@ impl fmt::Display for IndexOperation {
            IndexOperation::DocumentClearAndSetting { .. } => {
                f.write_str("IndexOperation::DocumentClearAndSetting")
            }
-            IndexOperation::SettingsAndDocumentOperation { .. } => {
-                f.write_str("IndexOperation::SettingsAndDocumentOperation")
-            }
        }
    }
 }
@ -330,21 +307,14 @@ impl IndexScheduler {
                    })
                    .flatten();

-                let mut documents_counts = Vec::new();
                let mut operations = Vec::new();

                for task in tasks.iter() {
                    match task.kind {
-                        KindWithContent::DocumentAdditionOrUpdate {
-                            content_file,
-                            documents_count,
-                            ..
-                        } => {
-                            documents_counts.push(documents_count);
+                        KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => {
                            operations.push(DocumentOperation::Add(content_file));
                        }
                        KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
-                            documents_counts.push(documents_ids.len() as u64);
                            operations.push(DocumentOperation::Delete(documents_ids.clone()));
                        }
                        _ => unreachable!(),
@ -356,7 +326,6 @@ impl IndexScheduler {
                        index_uid,
                        primary_key,
                        method,
-                        documents_counts,
                        operations,
                        tasks,
                    },
@ -441,67 +410,6 @@ impl IndexScheduler {
                    must_create_index,
                }))
            }
-            BatchKind::SettingsAndDocumentOperation {
-                settings_ids,
-                method,
-                allow_index_creation,
-                primary_key,
-                operation_ids,
-            } => {
-                let settings = self.create_next_batch_index(
-                    rtxn,
-                    index_uid.clone(),
-                    BatchKind::Settings { settings_ids, allow_index_creation },
-                    current_batch,
-                    must_create_index,
-                )?;
-
-                let document_import = self.create_next_batch_index(
-                    rtxn,
-                    index_uid.clone(),
-                    BatchKind::DocumentOperation {
-                        method,
-                        allow_index_creation,
-                        primary_key,
-                        operation_ids,
-                    },
-                    current_batch,
-                    must_create_index,
-                )?;
-
-                match (document_import, settings) {
-                    (
-                        Some(Batch::IndexOperation {
-                            op:
-                                IndexOperation::DocumentOperation {
-                                    primary_key,
-                                    documents_counts,
-                                    operations,
-                                    tasks: document_import_tasks,
-                                    ..
-                                },
-                            ..
-                        }),
-                        Some(Batch::IndexOperation {
-                            op: IndexOperation::Settings { settings, tasks: settings_tasks, .. },
-                            ..
-                        }),
-                    ) => Ok(Some(Batch::IndexOperation {
-                        op: IndexOperation::SettingsAndDocumentOperation {
-                            index_uid,
-                            primary_key,
-                            method,
-                            documents_counts,
-                            operations,
-                            document_import_tasks,
-                            settings,
-                            settings_tasks,
-                        },
-                        must_create_index,
-                    })),
-                    _ => unreachable!(),
-                }
-            }
            BatchKind::IndexCreation { id } => {
                let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
                current_batch.processing(Some(&mut task));
@ -589,7 +497,6 @@ impl IndexScheduler {
        // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
        let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
        let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
-        current_batch.processing(Some(&mut task));

        // If the task is not associated with any index, verify that it is an index swap and
        // create the batch directly. Otherwise, get the index name associated with the task
@ -599,6 +506,7 @@ impl IndexScheduler {
            index_name
        } else {
            assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
+            current_batch.processing(Some(&mut task));
            return Ok(Some((Batch::IndexSwap { task }, current_batch)));
        };

@ -1304,7 +1212,6 @@ impl IndexScheduler {
                index_uid: _,
                primary_key,
                method,
-                documents_counts: _,
                operations,
                mut tasks,
            } => {
@ -1351,7 +1258,10 @@ impl IndexScheduler {
                let pool = match &indexer_config.thread_pool {
                    Some(pool) => pool,
                    None => {
-                        local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
+                        local_pool = ThreadPoolNoAbortBuilder::new()
+                            .thread_name(|i| format!("indexing-thread-{i}"))
+                            .build()
+                            .unwrap();
                        &local_pool
                    }
                };
@ -1399,21 +1309,19 @@ impl IndexScheduler {
                }

                if tasks.iter().any(|res| res.error.is_none()) {
-                    pool.install(|| {
-                        indexer::index(
-                            index_wtxn,
-                            index,
-                            indexer_config.grenad_parameters(),
-                            &db_fields_ids_map,
-                            new_fields_ids_map,
-                            primary_key,
-                            &document_changes,
-                            embedders,
-                            &|| must_stop_processing.get(),
-                            &send_progress,
-                        )
-                    })
-                    .unwrap()?;
+                    indexer::index(
+                        index_wtxn,
+                        index,
+                        pool,
+                        indexer_config.grenad_parameters(),
+                        &db_fields_ids_map,
+                        new_fields_ids_map,
+                        primary_key,
+                        &document_changes,
+                        embedders,
+                        &|| must_stop_processing.get(),
+                        &send_progress,
+                    )?;

                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                }
@ -1489,34 +1397,34 @@ impl IndexScheduler {
                    let pool = match &indexer_config.thread_pool {
                        Some(pool) => pool,
                        None => {
-                            local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
+                            local_pool = ThreadPoolNoAbortBuilder::new()
+                                .thread_name(|i| format!("indexing-thread-{i}"))
+                                .build()
+                                .unwrap();
                            &local_pool
                        }
                    };

-                    pool.install(|| {
-                        let indexer =
-                            UpdateByFunction::new(candidates, context.clone(), code.clone());
-                        let document_changes = indexer.into_changes(&primary_key)?;
-                        let embedders = index.embedding_configs(index_wtxn)?;
-                        let embedders = self.embedders(embedders)?;
+                    let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
+                    let document_changes =
+                        pool.install(|| indexer.into_changes(&primary_key)).unwrap()?;

-                        indexer::index(
-                            index_wtxn,
-                            index,
-                            indexer_config.grenad_parameters(),
-                            &db_fields_ids_map,
-                            new_fields_ids_map,
-                            None, // cannot change primary key in DocumentEdition
-                            &document_changes,
-                            embedders,
-                            &|| must_stop_processing.get(),
-                            &send_progress,
-                        )?;
+                    let embedders = index.embedding_configs(index_wtxn)?;
+                    let embedders = self.embedders(embedders)?;

-                        Result::Ok(())
-                    })
-                    .unwrap()?;
+                    indexer::index(
+                        index_wtxn,
+                        index,
+                        pool,
+                        indexer_config.grenad_parameters(),
+                        &db_fields_ids_map,
+                        new_fields_ids_map,
+                        None, // cannot change primary key in DocumentEdition
+                        &document_changes,
+                        embedders,
+                        &|| must_stop_processing.get(),
+                        &send_progress,
+                    )?;

                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                }
@ -1641,7 +1549,10 @@ impl IndexScheduler {
                    let pool = match &indexer_config.thread_pool {
                        Some(pool) => pool,
                        None => {
-                            local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
+                            local_pool = ThreadPoolNoAbortBuilder::new()
+                                .thread_name(|i| format!("indexing-thread-{i}"))
+                                .build()
+                                .unwrap();
                            &local_pool
                        }
                    };
@ -1652,21 +1563,19 @@ impl IndexScheduler {
                    let embedders = index.embedding_configs(index_wtxn)?;
                    let embedders = self.embedders(embedders)?;

-                    pool.install(|| {
-                        indexer::index(
-                            index_wtxn,
-                            index,
-                            indexer_config.grenad_parameters(),
-                            &db_fields_ids_map,
-                            new_fields_ids_map,
-                            None, // document deletion never changes primary key
-                            &document_changes,
-                            embedders,
-                            &|| must_stop_processing.get(),
-                            &send_progress,
-                        )
-                    })
-                    .unwrap()?;
+                    indexer::index(
+                        index_wtxn,
+                        index,
+                        pool,
+                        indexer_config.grenad_parameters(),
+                        &db_fields_ids_map,
+                        new_fields_ids_map,
+                        None, // document deletion never changes primary key
+                        &document_changes,
+                        embedders,
+                        &|| must_stop_processing.get(),
+                        &send_progress,
+                    )?;

                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                }
@ -1694,43 +1603,6 @@ impl IndexScheduler {

                Ok(tasks)
            }
-            IndexOperation::SettingsAndDocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations,
-                document_import_tasks,
-                settings,
-                settings_tasks,
-            } => {
-                let settings_tasks = self.apply_index_operation(
-                    index_wtxn,
-                    index,
-                    IndexOperation::Settings {
-                        index_uid: index_uid.clone(),
-                        settings,
-                        tasks: settings_tasks,
-                    },
-                )?;
-
-                let mut import_tasks = self.apply_index_operation(
-                    index_wtxn,
-                    index,
-                    IndexOperation::DocumentOperation {
-                        index_uid,
-                        primary_key,
-                        method,
-                        documents_counts,
-                        operations,
-                        tasks: document_import_tasks,
-                    },
-                )?;
-
-                let mut tasks = settings_tasks;
-                tasks.append(&mut import_tasks);
-                Ok(tasks)
-            }
            IndexOperation::DocumentClearAndSetting {
                index_uid,
                cleared_tasks,
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@ -407,7 +407,7 @@ pub struct IndexScheduler {
    ///
    /// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation.
    #[cfg(test)]
-    test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
+    test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,

    /// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler.
    ///
@ -476,7 +476,7 @@ impl IndexScheduler {
    /// Create an index scheduler and start its run loop.
    pub fn new(
        options: IndexSchedulerOptions,
-        #[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
+        #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
        #[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>,
    ) -> Result<Self> {
        std::fs::create_dir_all(&options.tasks_path)?;
@ -1440,7 +1440,7 @@ impl IndexScheduler {

        // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
        if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
-            && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
+            && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
        {
            return Err(Error::NoSpaceLeftInTaskQueue);
        }
@ -1738,11 +1738,8 @@ impl IndexScheduler {
            }
        }

-        self.processing_tasks.write().unwrap().stop_processing();
        // We must re-add the canceled task so they're part of the same batch.
-        // processed.processing |= canceled;
        ids |= canceled;
-
        self.write_batch(&mut wtxn, processing_batch, &ids)?;

        #[cfg(test)]
@ -1750,6 +1747,10 @@ impl IndexScheduler {

        wtxn.commit().map_err(Error::HeedTransaction)?;

+        // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
+        // and then become « not found » for some time until the commit everything is written and the final commit is made.
+        self.processing_tasks.write().unwrap().stop_processing();
+
        // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
        tracing::debug!("Deleting the update files");

@ -2237,7 +2238,7 @@ mod tests {
    use std::time::Instant;

    use big_s::S;
-    use crossbeam::channel::RecvTimeoutError;
+    use crossbeam_channel::RecvTimeoutError;
    use file_store::File;
    use insta::assert_json_snapshot;
    use maplit::btreeset;
@ -2289,7 +2290,7 @@ mod tests {
            configuration: impl Fn(&mut IndexSchedulerOptions),
        ) -> (Self, IndexSchedulerHandle) {
            let tempdir = TempDir::new().unwrap();
-            let (sender, receiver) = crossbeam::channel::bounded(0);
+            let (sender, receiver) = crossbeam_channel::bounded(0);

            let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };

@ -2421,7 +2422,7 @@ mod tests {
    pub struct IndexSchedulerHandle {
        _tempdir: TempDir,
        index_scheduler: IndexScheduler,
-        test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
+        test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>,
        last_breakpoint: Breakpoint,
    }

@ -4318,10 +4319,35 @@ mod tests {
        let proc = index_scheduler.processing_tasks.read().unwrap().clone();

        let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
-        let (batches, _) = index_scheduler
-            .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default())
+        let (mut batches, _) = index_scheduler
+            .get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default())
            .unwrap();
-        snapshot!(snapshot_bitmap(&batches), @"[0,]"); // only the processing batch in the first tick
+        assert_eq!(batches.len(), 1);
+        batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
+        // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
+        let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
+        snapshot!(batch, @r#"
+        {
+          "uid": 0,
+          "details": {
+            "primaryKey": "mouse"
+          },
+          "stats": {
+            "totalNbTasks": 1,
+            "status": {
+              "processing": 1
+            },
+            "types": {
+              "indexCreation": 1
+            },
+            "indexUids": {
+              "catto": 1
+            }
+          },
+          "startedAt": "1970-01-01T00:00:00Z",
+          "finishedAt": null
+        }
+        "#);

        let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
        let (batches, _) = index_scheduler
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(1):
 [1,]
-{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
+{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(1):
 [1,]
-{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
+{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"dumpCreation":1},"indexUids":{}}, }
+{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
+{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
+{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
+{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(1):
 [1,]
-{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
+{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
--- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap
@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [3,]
-{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"taskDeletion":1},"indexUids":{}}, }
+{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
--- a/crates/index-scheduler/src/utils.rs
+++ b/crates/index-scheduler/src/utils.rs
@ -67,7 +67,7 @@ impl ProcessingBatch {
            task.batch_uid = Some(self.uid);
            // We don't store the statuses in the map since they're all enqueued but we must
            // still store them in the stats since that can be displayed.
-            *self.stats.status.entry(task.status).or_default() += 1;
+            *self.stats.status.entry(Status::Processing).or_default() += 1;

            self.kinds.insert(task.kind.as_kind());
            *self.stats.types.entry(task.kind.as_kind()).or_default() += 1;
@ -106,7 +106,7 @@ impl ProcessingBatch {
        self.stats.total_nb_tasks = 0;
    }

-    /// Update the timestamp of the tasks and the inner structure of this sturcture.
+    /// Update the timestamp of the tasks and the inner structure of this structure.
    pub fn update(&mut self, task: &mut Task) {
        // We must re-set this value in case we're dealing with a task that has been added between
        // the `processing` and `finished` state
--- a/crates/meilisearch-auth/Cargo.toml
+++ b/crates/meilisearch-auth/Cargo.toml
@ -17,7 +17,7 @@ hmac = "0.12.1"
 maplit = "1.0.2"
 meilisearch-types = { path = "../meilisearch-types" }
 rand = "0.8.5"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 sha2 = "0.10.8"
--- a/crates/meilisearch-types/Cargo.toml
+++ b/crates/meilisearch-types/Cargo.toml
@ -25,7 +25,7 @@ fst = "0.4.7"
 memmap2 = "0.9.4"
 milli = { path = "../milli" }
 raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde-cs = "0.2.4"
 serde_json = "1.0.120"
--- a/crates/meilisearch-types/src/document_formats.rs
+++ b/crates/meilisearch-types/src/document_formats.rs
@ -214,7 +214,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
    // We memory map to be able to deserialize into a RawMap that
    // does not allocate when possible and only materialize the first/top level.
    let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
-    let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB
+    let mut doc_alloc = Bump::with_capacity(1024 * 1024); // 1MiB

    let mut out = BufWriter::new(output);
    let mut deserializer = serde_json::Deserializer::from_slice(&input);
--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@ -279,6 +279,7 @@ InvalidSearchPage                     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchQ                        , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchQuery               , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchName                , InvalidRequest       , BAD_REQUEST ;
+FacetSearchDisabled                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchVector                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowMatchesPosition      , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowRankingScore         , InvalidRequest       , BAD_REQUEST ;
--- a/crates/meilisearch/Cargo.toml
+++ b/crates/meilisearch/Cargo.toml
@ -103,7 +103,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.11"
 build-info = { version = "1.7.0", path = "../build-info" }
-roaring = "0.10.2"
+roaring = "0.10.7"
 mopa-maintained = "0.2.3"

 [dev-dependencies]
--- a/crates/meilisearch/src/main.rs
+++ b/crates/meilisearch/src/main.rs
@ -20,14 +20,14 @@ use meilisearch::{
    LogStderrType, Opt, SubscriberForSecondLayer,
 };
 use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
-use mimalloc::MiMalloc;
 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
 use tracing::level_filters::LevelFilter;
 use tracing_subscriber::layer::SubscriberExt as _;
 use tracing_subscriber::Layer;

+#[cfg(not(windows))]
 #[global_allocator]
-static ALLOC: MiMalloc = MiMalloc;
+static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

 fn default_log_route_layer() -> LogRouteType {
    None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
--- a/crates/meilisearch/src/option.rs
+++ b/crates/meilisearch/src/option.rs
@ -654,8 +654,9 @@ impl Opt {

 #[derive(Debug, Default, Clone, Parser, Deserialize)]
 pub struct IndexerOpts {
-    /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
-    /// uses no more than two thirds of available memory.
+    /// Specifies the maximum resident memory that Meilisearch can use for indexing.
+    /// By default, Meilisearch limits the RAM usage to 5% of the total available memory.
+    /// Note that the underlying store utilizes memory-mapping and makes use of the rest.
    #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
    #[serde(default)]
    pub max_indexing_memory: MaxMemory,
@ -714,7 +715,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
    }
 }

-/// A type used to detect the max memory available and use 2/3 of it.
+/// A type used to detect the max resident memory available and use 5% of it.
 #[derive(Debug, Clone, Copy, Deserialize, Serialize)]
 pub struct MaxMemory(Option<Byte>);

@ -728,7 +729,7 @@ impl FromStr for MaxMemory {

 impl Default for MaxMemory {
    fn default() -> MaxMemory {
-        MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
+        MaxMemory(total_memory_bytes().map(|bytes| bytes * 5 / 100).map(Byte::from_u64))
    }
 }

--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@ -1407,6 +1407,13 @@ pub fn perform_facet_search(
        None => TimeBudget::default(),
    };

+    if !index.facet_search(&rtxn)? {
+        return Err(ResponseError::from_msg(
+            "The facet search is disabled for this index".to_string(),
+            Code::FacetSearchDisabled,
+        ));
+    }
+
    // In the faceted search context, we want to use the intersection between the locales provided by the user
    // and the locales of the facet string.
    // If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
--- a/crates/meilisearch/tests/batches/mod.rs
+++ b/crates/meilisearch/tests/batches/mod.rs
@ -224,7 +224,7 @@ async fn list_batches_status_and_type_filtered() {
 }

 #[actix_rt::test]
-async fn get_batch_filter_error() {
+async fn list_batch_filter_error() {
    let server = Server::new().await;

    let (response, code) = server.batches_filter("lol=pied").await;
--- a/crates/meilisearch/tests/common/mod.rs
+++ b/crates/meilisearch/tests/common/mod.rs
@ -52,6 +52,25 @@ impl Value {
        }
        self
    }
+
+    /// Return `true` if the `status` field is set to `failed`.
+    /// Panic if the `status` field doesn't exists.
+    #[track_caller]
+    pub fn is_fail(&self) -> bool {
+        if !self["status"].is_string() {
+            panic!("Called `is_fail` on {}", serde_json::to_string_pretty(&self.0).unwrap());
+        }
+        self["status"] == serde_json::Value::String(String::from("failed"))
+    }
+
+    // Panic if the json doesn't contain the `status` field set to "succeeded"
+    #[track_caller]
+    pub fn failed(&self) -> &Self {
+        if !self.is_fail() {
+            panic!("Called failed on {}", serde_json::to_string_pretty(&self.0).unwrap());
+        }
+        self
+    }
 }

 impl From<serde_json::Value> for Value {
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@ -221,8 +221,15 @@ async fn add_documents_and_deactivate_facet_search() {
    let (response, code) =
        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;

-    assert_eq!(code, 200, "{}", response);
-    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
+    assert_eq!(code, 400, "{}", response);
+    snapshot!(response, @r###"
+    {
+      "message": "The facet search is disabled for this index",
+      "code": "facet_search_disabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#facet_search_disabled"
+    }
+    "###);
 }

 #[actix_rt::test]
@ -245,8 +252,15 @@ async fn deactivate_facet_search_and_add_documents() {
    let (response, code) =
        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;

-    assert_eq!(code, 200, "{}", response);
-    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
+    assert_eq!(code, 400, "{}", response);
+    snapshot!(response, @r###"
+    {
+      "message": "The facet search is disabled for this index",
+      "code": "facet_search_disabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#facet_search_disabled"
+    }
+    "###);
 }

 #[actix_rt::test]
--- a/crates/meilisearch/tests/snapshot/mod.rs
+++ b/crates/meilisearch/tests/snapshot/mod.rs
@ -129,11 +129,11 @@ async fn perform_on_demand_snapshot() {

    index.load_test_set().await;

-    server.index("doggo").create(Some("bone")).await;
-    index.wait_task(2).await;
+    let (task, _) = server.index("doggo").create(Some("bone")).await;
+    index.wait_task(task.uid()).await.succeeded();

-    server.index("doggo").create(Some("bone")).await;
-    index.wait_task(2).await;
+    let (task, _) = server.index("doggo").create(Some("bone")).await;
+    index.wait_task(task.uid()).await.failed();

    let (task, code) = server.create_snapshot().await;
    snapshot!(code, @"202 Accepted");
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@ -42,7 +42,7 @@ obkv = "0.3.0"
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 rayon = "1.10.0"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 rstar = { version = "0.12.0", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
@ -98,6 +98,8 @@ allocator-api2 = "0.2.18"
 rustc-hash = "2.0.0"
 uell = "0.1.0"
 enum-iterator = "2.1.0"
+bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
+flume = { version = "0.11.1", default-features = false }

 [dev-dependencies]
 mimalloc = { version = "0.1.43", default-features = false }
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@ -3,6 +3,7 @@ use std::convert::Infallible;
 use std::fmt::Write;
 use std::{io, str};

+use bstr::BString;
 use heed::{Error as HeedError, MdbError};
 use rayon::ThreadPoolBuildError;
 use rhai::EvalAltResult;
@ -62,9 +63,9 @@ pub enum InternalError {
    #[error(transparent)]
    Store(#[from] MdbError),
    #[error("Cannot delete {key:?} from database {database_name}: {error}")]
-    StoreDeletion { database_name: &'static str, key: Vec<u8>, error: heed::Error },
+    StoreDeletion { database_name: &'static str, key: BString, error: heed::Error },
    #[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
-    StorePut { database_name: &'static str, key: Vec<u8>, value_length: usize, error: heed::Error },
+    StorePut { database_name: &'static str, key: BString, value_length: usize, error: heed::Error },
    #[error(transparent)]
    Utf8(#[from] str::Utf8Error),
    #[error("An indexation process was explicitly aborted")]
--- a/crates/milli/src/heed_codec/facet/mod.rs
+++ b/crates/milli/src/heed_codec/facet/mod.rs
@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {

    fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
        let mut v = vec![value.size];
-        CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
+        CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
        Ok(Cow::Owned(v))
    }
 }
--- a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -27,18 +27,27 @@ impl CboRoaringBitmapCodec {
        }
    }

-    pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
+    pub fn serialize_into_vec(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
+        Self::serialize_into_writer(roaring, vec).unwrap()
+    }
+
+    pub fn serialize_into_writer<W: io::Write>(
+        roaring: &RoaringBitmap,
+        mut writer: W,
+    ) -> io::Result<()> {
        if roaring.len() <= THRESHOLD as u64 {
            // If the number of items (u32s) to encode is less than or equal to the threshold
            // it means that it would weigh the same or less than the RoaringBitmap
            // header, so we directly encode them using ByteOrder instead.
            for integer in roaring {
-                vec.write_u32::<NativeEndian>(integer).unwrap();
+                writer.write_u32::<NativeEndian>(integer)?;
            }
        } else {
            // Otherwise, we use the classic RoaringBitmapCodec that writes a header.
-            roaring.serialize_into(vec).unwrap();
+            roaring.serialize_into(writer)?;
        }
+
+        Ok(())
    }

    pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
@ -143,7 +152,7 @@ impl CboRoaringBitmapCodec {
            return Ok(None);
        }

-        Self::serialize_into(&previous, buffer);
+        Self::serialize_into_vec(&previous, buffer);
        Ok(Some(&buffer[..]))
    }
 }
@ -169,7 +178,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {

    fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
        let mut vec = Vec::with_capacity(Self::serialized_size(item));
-        Self::serialize_into(item, &mut vec);
+        Self::serialize_into_vec(item, &mut vec);
        Ok(Cow::Owned(vec))
    }
 }
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@ -1821,6 +1821,7 @@ pub(crate) mod tests {
                indexer::index(
                    wtxn,
                    &self.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    indexer_config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1911,6 +1912,7 @@ pub(crate) mod tests {
                indexer::index(
                    wtxn,
                    &self.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    indexer_config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1991,6 +1993,7 @@ pub(crate) mod tests {
                indexer::index(
                    &mut wtxn,
                    &index.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    indexer_config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@ -1,6 +1,7 @@
 #![cfg_attr(all(test, fuzzing), feature(no_coverage))]
 #![allow(clippy::type_complexity)]

+#[cfg(not(windows))]
 #[cfg(test)]
 #[global_allocator]
 pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@ -83,6 +83,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    indexer::index(
        &mut wtxn,
        &index,
+        &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@ -2155,6 +2155,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2216,6 +2217,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2268,6 +2270,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2319,6 +2322,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2372,6 +2376,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2430,6 +2435,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2481,6 +2487,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2532,6 +2539,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2725,6 +2733,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2783,6 +2792,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2838,6 +2848,7 @@ mod tests {
        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
--- a/crates/milli/src/update/new/document.rs
+++ b/crates/milli/src/update/new/document.rs
@ -1,5 +1,6 @@
 use std::collections::{BTreeMap, BTreeSet};

+use either::Either;
 use heed::RoTxn;
 use raw_collections::RawMap;
 use serde_json::value::RawValue;
@ -209,29 +210,34 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
    for MergedDocument<'d, 'doc, 't, Mapper>
 {
    fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'d str, &'d RawValue)>> {
-        let mut new_doc_it = self.new_doc.iter_top_level_fields();
-        let mut db_it = self.db.iter().flat_map(|db| db.iter_top_level_fields());
-        let mut seen_fields = BTreeSet::new();
+        match &self.db {
+            Some(db) => {
+                let mut new_doc_it = self.new_doc.iter_top_level_fields();
+                let mut db_it = db.iter_top_level_fields();
+                let mut seen_fields = BTreeSet::new();

-        std::iter::from_fn(move || {
-            if let Some(next) = new_doc_it.next() {
-                if let Ok((name, _)) = next {
-                    seen_fields.insert(name);
-                }
-                return Some(next);
-            }
-            loop {
-                match db_it.next()? {
-                    Ok((name, value)) => {
-                        if seen_fields.contains(name) {
-                            continue;
+                Either::Left(std::iter::from_fn(move || {
+                    if let Some(next) = new_doc_it.next() {
+                        if let Ok((name, _)) = next {
+                            seen_fields.insert(name);
                        }
-                        return Some(Ok((name, value)));
+                        return Some(next);
                    }
-                    Err(err) => return Some(Err(err)),
-                }
+                    loop {
+                        match db_it.next()? {
+                            Ok((name, value)) => {
+                                if seen_fields.contains(name) {
+                                    continue;
+                                }
+                                return Some(Ok((name, value)));
+                            }
+                            Err(err) => return Some(Err(err)),
+                        }
+                    }
+                }))
            }
-        })
+            None => Either::Right(self.new_doc.iter_top_level_fields()),
+        }
    }

    fn vectors_field(&self) -> Result<Option<&'d RawValue>> {
--- a/crates/milli/src/update/new/document_change.rs
+++ b/crates/milli/src/update/new/document_change.rs
@ -1,7 +1,10 @@
 use bumpalo::Bump;
 use heed::RoTxn;

-use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
+use super::document::{
+    Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
+};
+use super::extract::perm_json_p;
 use super::vector_document::{
    MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
 };
@ -164,6 +167,80 @@ impl<'doc> Update<'doc> {
        }
    }

+    /// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
+    ///
+    /// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
+    /// Otherwise `false`.
+    pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
+        &self,
+        fields: Option<&[&str]>,
+        rtxn: &'t RoTxn,
+        index: &'t Index,
+        mapper: &'t Mapper,
+    ) -> Result<bool> {
+        let mut changed = false;
+        let mut cached_current = None;
+        let mut updated_selected_field_count = 0;
+
+        for entry in self.updated().iter_top_level_fields() {
+            let (key, updated_value) = entry?;
+
+            if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
+                continue;
+            }
+
+            updated_selected_field_count += 1;
+            let current = match cached_current {
+                Some(current) => current,
+                None => self.current(rtxn, index, mapper)?,
+            };
+            let current_value = current.top_level_field(key)?;
+            let Some(current_value) = current_value else {
+                changed = true;
+                break;
+            };
+
+            if current_value.get() != updated_value.get() {
+                changed = true;
+                break;
+            }
+            cached_current = Some(current);
+        }
+
+        if !self.has_deletion {
+            // no field deletion, so fields that don't appear in `updated` cannot have changed
+            return Ok(changed);
+        }
+
+        if changed {
+            return Ok(true);
+        }
+
+        // we saw all updated fields, and set `changed` if any field wasn't in `current`.
+        // so if there are as many fields in `current` as in `updated`, then nothing changed.
+        // If there is any more fields in `current`, then they are missing in `updated`.
+        let has_deleted_fields = {
+            let current = match cached_current {
+                Some(current) => current,
+                None => self.current(rtxn, index, mapper)?,
+            };
+
+            let mut current_selected_field_count = 0;
+            for entry in current.iter_top_level_fields() {
+                let (key, _) = entry?;
+
+                if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
+                    continue;
+                }
+                current_selected_field_count += 1;
+            }
+
+            current_selected_field_count != updated_selected_field_count
+        };
+
+        Ok(has_deleted_fields)
+    }
+
    pub fn updated_vectors(
        &self,
        doc_alloc: &'doc Bump,
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@ -415,21 +415,21 @@ fn spill_entry_to_sorter(
    match deladd {
        DelAddRoaringBitmap { del: Some(del), add: None } => {
            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
            value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
        }
        DelAddRoaringBitmap { del: None, add: Some(add) } => {
            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
            value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
        }
        DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
            value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;

            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
            value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
        }
        DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
@ -466,12 +466,13 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
    Ok(bucket_caches)
 }

-/// Merges the caches that must be all associated to the same bucket.
+/// Merges the caches that must be all associated to the same bucket
+/// but make sure to sort the different buckets before performing the merges.
 ///
 /// # Panics
 ///
 /// - If the bucket IDs in these frozen caches are not exactly the same.
-pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
+pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
 where
    F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
 {
@ -543,12 +544,12 @@ where

    // Then manage the content on the HashMap entries that weren't taken (mem::take).
    while let Some(mut map) = maps.pop() {
-        for (key, bbbul) in map.iter_mut() {
-            // Make sure we don't try to work with entries already managed by the spilled
-            if bbbul.is_empty() {
-                continue;
-            }
+        // Make sure we don't try to work with entries already managed by the spilled
+        let mut ordered_entries: Vec<_> =
+            map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
+        ordered_entries.sort_unstable_by_key(|(key, _)| *key);

+        for (key, bbbul) in ordered_entries {
            let mut output = DelAddRoaringBitmap::empty();
            output.union_and_clear_bbbul(bbbul);

--- a/crates/milli/src/update/new/extract/documents.rs
+++ b/crates/milli/src/update/new/extract/documents.rs
@ -12,13 +12,14 @@ use crate::update::new::thread_local::FullySend;
 use crate::update::new::DocumentChange;
 use crate::vector::EmbeddingConfigs;
 use crate::Result;
-pub struct DocumentsExtractor<'a> {
-    document_sender: &'a DocumentsSender<'a>,
+
+pub struct DocumentsExtractor<'a, 'b> {
+    document_sender: DocumentsSender<'a, 'b>,
    embedders: &'a EmbeddingConfigs,
 }

-impl<'a> DocumentsExtractor<'a> {
-    pub fn new(document_sender: &'a DocumentsSender<'a>, embedders: &'a EmbeddingConfigs) -> Self {
+impl<'a, 'b> DocumentsExtractor<'a, 'b> {
+    pub fn new(document_sender: DocumentsSender<'a, 'b>, embedders: &'a EmbeddingConfigs) -> Self {
        Self { document_sender, embedders }
    }
 }
@ -29,7 +30,7 @@ pub struct DocumentExtractorData {
    pub field_distribution_delta: HashMap<String, i64>,
 }

-impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
    type Data = FullySend<RefCell<DocumentExtractorData>>;

    fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@ -25,14 +25,14 @@ use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
 use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};

-pub struct FacetedExtractorData<'a> {
+pub struct FacetedExtractorData<'a, 'b> {
    attributes_to_extract: &'a [&'a str],
-    sender: &'a FieldIdDocidFacetSender<'a>,
+    sender: &'a FieldIdDocidFacetSender<'a, 'b>,
    grenad_parameters: GrenadParameters,
    buckets: usize,
 }

-impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
    type Data = RefCell<BalancedCaches<'extractor>>;

    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
@ -97,6 +97,15 @@ impl FacetedDocidsExtractor {
                },
            ),
            DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    Some(attributes_to_extract),
+                    rtxn,
+                    index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                extract_document_facets(
                    attributes_to_extract,
                    inner.current(rtxn, index, context.db_fields_ids_map)?,
@ -318,7 +327,7 @@ impl<'doc> DelAddFacetValue<'doc> {
        docid: DocumentId,
        sender: &FieldIdDocidFacetSender,
        doc_alloc: &Bump,
-    ) -> std::result::Result<(), crossbeam_channel::SendError<()>> {
+    ) -> crate::Result<()> {
        let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
        for ((fid, value), deladd) in self.strings {
            if let Ok(s) = std::str::from_utf8(&value) {
--- a/crates/milli/src/update/new/extract/geo/mod.rs
+++ b/crates/milli/src/update/new/extract/geo/mod.rs
@ -1,6 +1,6 @@
 use std::cell::RefCell;
 use std::fs::File;
-use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
+use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
 use std::{iter, mem, result};

 use bumpalo::Bump;
@ -97,30 +97,34 @@ pub struct FrozenGeoExtractorData<'extractor> {
 impl<'extractor> FrozenGeoExtractorData<'extractor> {
    pub fn iter_and_clear_removed(
        &mut self,
-    ) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
-        mem::take(&mut self.removed)
+    ) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
+        Ok(mem::take(&mut self.removed)
            .iter()
            .copied()
            .map(Ok)
-            .chain(iterator_over_spilled_geopoints(&mut self.spilled_removed))
+            .chain(iterator_over_spilled_geopoints(&mut self.spilled_removed)?))
    }

    pub fn iter_and_clear_inserted(
        &mut self,
-    ) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
-        mem::take(&mut self.inserted)
+    ) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
+        Ok(mem::take(&mut self.inserted)
            .iter()
            .copied()
            .map(Ok)
-            .chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted))
+            .chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted)?))
    }
 }

 fn iterator_over_spilled_geopoints(
    spilled: &mut Option<BufReader<File>>,
-) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
+) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
    let mut spilled = spilled.take();
-    iter::from_fn(move || match &mut spilled {
+    if let Some(spilled) = &mut spilled {
+        spilled.rewind()?;
+    }
+
+    Ok(iter::from_fn(move || match &mut spilled {
        Some(file) => {
            let geopoint_bytes = &mut [0u8; mem::size_of::<ExtractedGeoPoint>()];
            match file.read_exact(geopoint_bytes) {
@ -130,7 +134,7 @@ fn iterator_over_spilled_geopoints(
            }
        }
        None => None,
-    })
+    }))
 }

 impl<'extractor> Extractor<'extractor> for GeoExtractor {
@ -157,7 +161,9 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
        let mut data_ref = context.data.borrow_mut_or_yield();

        for change in changes {
-            if max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm) {
+            if data_ref.spilled_removed.is_none()
+                && max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm)
+            {
                // We must spill as we allocated too much memory
                data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
                data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
--- a/crates/milli/src/update/new/extract/mod.rs
+++ b/crates/milli/src/update/new/extract/mod.rs
@ -6,7 +6,9 @@ mod searchable;
 mod vectors;

 use bumpalo::Bump;
-pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap};
+pub use cache::{
+    merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
+};
 pub use documents::*;
 pub use faceted::*;
 pub use geo::*;
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@ -351,6 +351,15 @@ impl WordDocidsExtractors {
                )?;
            }
            DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    document_tokenizer.attribute_to_extract,
+                    &context.rtxn,
+                    context.index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                let mut token_fn = |fname: &str, fid, pos, word: &str| {
                    cached_sorter.insert_del_u32(
                        fid,
--- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
                )?;
            }
            DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    document_tokenizer.attribute_to_extract,
+                    rtxn,
+                    index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
                process_document_tokens(
                    document,
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@ -18,17 +18,17 @@ use crate::vector::error::{
 use crate::vector::{Embedder, Embedding, EmbeddingConfigs};
 use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};

-pub struct EmbeddingExtractor<'a> {
+pub struct EmbeddingExtractor<'a, 'b> {
    embedders: &'a EmbeddingConfigs,
-    sender: &'a EmbeddingSender<'a>,
+    sender: EmbeddingSender<'a, 'b>,
    possible_embedding_mistakes: PossibleEmbeddingMistakes,
    threads: &'a ThreadPoolNoAbort,
 }

-impl<'a> EmbeddingExtractor<'a> {
+impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
    pub fn new(
        embedders: &'a EmbeddingConfigs,
-        sender: &'a EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
        field_distribution: &'a FieldDistribution,
        threads: &'a ThreadPoolNoAbort,
    ) -> Self {
@ -43,7 +43,7 @@ pub struct EmbeddingExtractorData<'extractor>(

 unsafe impl MostlySend for EmbeddingExtractorData<'_> {}

-impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
    type Data = RefCell<EmbeddingExtractorData<'extractor>>;

    fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
@ -259,7 +259,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
 // Currently this is the case as:
 // 1. BVec are inside of the bumaplo
 // 2. All other fields are either trivial (u8) or references.
-struct Chunks<'a, 'extractor> {
+struct Chunks<'a, 'b, 'extractor> {
    texts: BVec<'a, &'a str>,
    ids: BVec<'a, DocumentId>,

@ -270,11 +270,11 @@ struct Chunks<'a, 'extractor> {
    possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
    user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
    threads: &'a ThreadPoolNoAbort,
-    sender: &'a EmbeddingSender<'a>,
+    sender: EmbeddingSender<'a, 'b>,
    has_manual_generation: Option<&'a str>,
 }

-impl<'a, 'extractor> Chunks<'a, 'extractor> {
+impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
    #[allow(clippy::too_many_arguments)]
    pub fn new(
        embedder: &'a Embedder,
@ -284,7 +284,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
        user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
        possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
        threads: &'a ThreadPoolNoAbort,
-        sender: &'a EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
        doc_alloc: &'a Bump,
    ) -> Self {
        let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
@ -368,7 +368,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
        possible_embedding_mistakes: &PossibleEmbeddingMistakes,
        unused_vectors_distribution: &UnusedVectorsDistributionBump,
        threads: &ThreadPoolNoAbort,
-        sender: &EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
        has_manual_generation: Option<&'a str>,
    ) -> Result<()> {
        if let Some(external_docid) = has_manual_generation {
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@ -70,7 +70,7 @@ impl<
        F: FnOnce(&'extractor Bump) -> Result<T>,
    {
        let doc_alloc =
-            doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024))));
+            doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024))));
        let doc_alloc = doc_alloc.0.take();
        let fields_ids_map = fields_ids_map_store
            .get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into());
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@ -62,6 +62,7 @@ mod update_by_function;
 pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
    wtxn: &mut RwTxn,
    index: &'index Index,
+    pool: &ThreadPoolNoAbort,
    grenad_parameters: GrenadParameters,
    db_fields_ids_map: &'indexer FieldsIdsMap,
    new_fields_ids_map: FieldsIdsMap,
@ -76,9 +77,30 @@ where
    MSP: Fn() -> bool + Sync,
    SP: Fn(Progress) + Sync,
 {
-    let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000);
+    let mut bbbuffers = Vec::new();
    let finished_extraction = AtomicBool::new(false);

+    // We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
+    let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
+    let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
+        (grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
+        |max_memory| {
+            // 2% of the indexing memory
+            let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
+            let new_grenad_parameters = GrenadParameters {
+                max_memory: Some(
+                    max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
+                ),
+                ..grenad_parameters
+            };
+            (new_grenad_parameters, total_bbbuffer_capacity)
+        },
+    );
+
+    let (extractor_sender, mut writer_receiver) = pool
+        .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
+        .unwrap();
+
    let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
    let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
    let new_fields_ids_map = RwLock::new(new_fields_ids_map);
@ -96,6 +118,7 @@ where
        send_progress,
    };

+    let mut index_embeddings = index.embedding_configs(wtxn)?;
    let mut field_distribution = index.field_distribution(wtxn)?;
    let mut document_ids = index.documents_ids(wtxn)?;

@ -107,261 +130,261 @@ where
        let field_distribution = &mut field_distribution;
        let document_ids = &mut document_ids;
        let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
-            let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
-            let _entered = span.enter();
-
-            let rtxn = index.read_txn()?;
-
-
-            // document but we need to create a function that collects and compresses documents.
-            let document_sender = extractor_sender.documents();
-            let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
-            let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-            {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
+            pool.install(move || {
+                let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
                let _entered = span.enter();
-                extract(document_changes,
-                    &document_extractor,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    &datastore,
-                    Step::ExtractingDocuments,
-                )?;
-            }
-            {
-                let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
-                let _entered = span.enter();
-                for document_extractor_data in datastore {
-                    let document_extractor_data = document_extractor_data.0.into_inner();
-                    for (field, delta) in document_extractor_data.field_distribution_delta {
-                        let current = field_distribution.entry(field).or_default();
-                        // adding the delta should never cause a negative result, as we are removing fields that previously existed.
-                        *current = current.saturating_add_signed(delta);
+
+                let rtxn = index.read_txn()?;
+
+                // document but we need to create a function that collects and compresses documents.
+                let document_sender = extractor_sender.documents();
+                let document_extractor = DocumentsExtractor::new(document_sender, embedders);
+                let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
+                    let _entered = span.enter();
+                    extract(
+                        document_changes,
+                        &document_extractor,
+                        indexing_context,
+                        &mut extractor_allocs,
+                        &datastore,
+                        Step::ExtractingDocuments,
+                    )?;
+                }
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
+                    let _entered = span.enter();
+                    for document_extractor_data in datastore {
+                        let document_extractor_data = document_extractor_data.0.into_inner();
+                        for (field, delta) in document_extractor_data.field_distribution_delta {
+                            let current = field_distribution.entry(field).or_default();
+                            // adding the delta should never cause a negative result, as we are removing fields that previously existed.
+                            *current = current.saturating_add_signed(delta);
+                        }
+                        document_extractor_data.docids_delta.apply_to(document_ids);
                    }
-                    document_extractor_data.docids_delta.apply_to(document_ids);
+
+                    field_distribution.retain(|_, v| *v != 0);
                }

-                field_distribution.retain(|_, v| *v != 0);
-            }
+                let facet_field_ids_delta;

-            let facet_field_ids_delta;
+                {
+                    let caches = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
+                        let _entered = span.enter();

-            {
-                let caches = {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
-                    let _entered = span.enter();
+                        FacetedDocidsExtractor::run_extraction(
+                                grenad_parameters,
+                                document_changes,
+                                indexing_context,
+                                &mut extractor_allocs,
+                                &extractor_sender.field_id_docid_facet_sender(),
+                                Step::ExtractingFacets
+                            )?
+                    };

-                    FacetedDocidsExtractor::run_extraction(
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
+                        let _entered = span.enter();
+
+                        facet_field_ids_delta = merge_and_send_facet_docids(
+                            caches,
+                            FacetDatabases::new(index),
+                            index,
+                            extractor_sender.facet_docids(),
+                        )?;
+                    }
+                }
+
+                {
+                    let WordDocidsCaches {
+                        word_docids,
+                        word_fid_docids,
+                        exact_word_docids,
+                        word_position_docids,
+                        fid_word_count_docids,
+                    } = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
+                        let _entered = span.enter();
+
+                        WordDocidsExtractors::run_extraction(
                            grenad_parameters,
                            document_changes,
                            indexing_context,
                            &mut extractor_allocs,
-                            &extractor_sender.field_id_docid_facet_sender(),
-                            Step::ExtractingFacets
+                            Step::ExtractingWords
                        )?
-                };
+                    };

-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
-                    let _entered = span.enter();
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_docids,
+                            index.word_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }

-                    facet_field_ids_delta = merge_and_send_facet_docids(
-                        caches,
-                        FacetDatabases::new(index),
-                        index,
-                        extractor_sender.facet_docids(),
-                    )?;
-                }
-            }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_fid_docids,
+                            index.word_fid_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordFidDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }

-            {
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            exact_word_docids,
+                            index.exact_word_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<ExactWordDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }

+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_position_docids,
+                            index.word_position_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordPositionDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }

-
-
-                let WordDocidsCaches {
-                    word_docids,
-                    word_fid_docids,
-                    exact_word_docids,
-                    word_position_docids,
-                    fid_word_count_docids,
-                } = {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
-                    let _entered = span.enter();
-
-                    WordDocidsExtractors::run_extraction(
-                        grenad_parameters,
-                        document_changes,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        Step::ExtractingWords
-                    )?
-                };
-
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_docids,
-                        index.word_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            fid_word_count_docids,
+                            index.field_id_word_count_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<FidWordCountDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
                }

-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_fid_docids,
-                        index.word_fid_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordFidDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
+                // run the proximity extraction only if the precision is by word
+                // this works only if the settings didn't change during this transaction.
+                let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
+                if proximity_precision == ProximityPrecision::ByWord {
+                    let caches = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
+                        let _entered = span.enter();
+
+                        <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
+                            grenad_parameters,
+                            document_changes,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            Step::ExtractingWordProximity,
+                        )?
+                    };
+
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
+                        let _entered = span.enter();
+
+                        merge_and_send_docids(
+                            caches,
+                            index.word_pair_proximity_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordPairProximityDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
                }

-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        exact_word_docids,
-                        index.exact_word_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<ExactWordDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
+                'vectors: {
+                    if index_embeddings.is_empty() {
+                        break 'vectors;
+                    }

-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_position_docids,
-                        index.word_position_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordPositionDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
+                    let embedding_sender = extractor_sender.embeddings();
+                    let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
+                    let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
+                        let _entered = span.enter();

-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        fid_word_count_docids,
-                        index.field_id_word_count_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<FidWordCountDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
-            }
+                        extract(
+                            document_changes,
+                            &extractor,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            &datastore,
+                            Step::ExtractingEmbeddings,
+                        )?;
+                    }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
+                        let _entered = span.enter();

-            // run the proximity extraction only if the precision is by word
-            // this works only if the settings didn't change during this transaction.
-            let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
-            if proximity_precision == ProximityPrecision::ByWord {
-                let caches = {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
-                    let _entered = span.enter();
-
-                    <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
-                        grenad_parameters,
-                        document_changes,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        Step::ExtractingWordProximity,
-                    )?
-                };
-
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
-                    let _entered = span.enter();
-
-                    merge_and_send_docids(
-                        caches,
-                        index.word_pair_proximity_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordPairProximityDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
-            }
-
-            'vectors: {
-
-                let mut index_embeddings = index.embedding_configs(&rtxn)?;
-                if index_embeddings.is_empty() {
-                    break 'vectors;
-                }
-
-                let embedding_sender = extractor_sender.embeddings();
-                let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
-                let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
-                    let _entered = span.enter();
-
-                    extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
-                }
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
-                    let _entered = span.enter();
-
-                    for config in &mut index_embeddings {
-                        'data: for data in datastore.iter_mut() {
-                            let data = &mut data.get_mut().0;
-                            let Some(deladd) = data.remove(&config.name) else { continue 'data; };
-                            deladd.apply_to(&mut config.user_provided);
+                        for config in &mut index_embeddings {
+                            'data: for data in datastore.iter_mut() {
+                                let data = &mut data.get_mut().0;
+                                let Some(deladd) = data.remove(&config.name) else { continue 'data; };
+                                deladd.apply_to(&mut config.user_provided);
+                            }
                        }
                    }
                }

-                embedding_sender.finish(index_embeddings).unwrap();
-            }
+                'geo: {
+                    let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
+                        break 'geo;
+                    };
+                    let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());

-            'geo: {
-                let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
-                    break 'geo;
-                };
-                let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
+                        let _entered = span.enter();

-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
-                    let _entered = span.enter();
+                        extract(
+                            document_changes,
+                            &extractor,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            &datastore,
+                            Step::WritingGeoPoints
+                        )?;
+                    }

-                    extract(
-                        document_changes,
-                        &extractor,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        &datastore,
-                        Step::WritingGeoPoints
+                    merge_and_send_rtree(
+                        datastore,
+                        &rtxn,
+                        index,
+                        extractor_sender.geo(),
+                        &indexing_context.must_stop_processing,
                    )?;
                }

-                merge_and_send_rtree(
-                    datastore,
-                    &rtxn,
-                    index,
-                    extractor_sender.geo(),
-                    &indexing_context.must_stop_processing,
-                )?;
-            }
+                (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));

-            (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
+                finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);

-            finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
-
-            Result::Ok(facet_field_ids_delta)
+                Result::Ok((facet_field_ids_delta, index_embeddings))
+            }).unwrap()
        })?;

        let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);

        let vector_arroy = index.vector_arroy;
-        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
        let indexer_span = tracing::Span::current();
        let arroy_writers: Result<HashMap<_, _>> = embedders
            .inner_as_ref()
@ -384,7 +407,11 @@ where
            })
            .collect();

+        // Used by by the ArroySetVector to copy the embedding into an
+        // aligned memory area, required by arroy to accept a new vector.
+        let mut aligned_embedding = Vec::new();
        let mut arroy_writers = arroy_writers?;
+
        {
            let span = tracing::trace_span!(target: "indexing::write_db", "all");
            let _entered = span.enter();
@ -392,110 +419,93 @@ where
            let span = tracing::trace_span!(target: "indexing::write_db", "post_merge");
            let mut _entered_post_merge = None;

-            for operation in writer_receiver {
+            while let Some(action) = writer_receiver.recv_action() {
                if _entered_post_merge.is_none()
                    && finished_extraction.load(std::sync::atomic::Ordering::Relaxed)
                {
                    _entered_post_merge = Some(span.enter());
                }
-                match operation {
-                    WriterOperation::DbOperation(db_operation) => {
-                        let database = db_operation.database(index);
-                        let database_name = db_operation.database_name();
-                        match db_operation.entry() {
-                            EntryOperation::Delete(e) => match database.delete(wtxn, e.entry()) {
-                                Ok(false) => unreachable!("We tried to delete an unknown key"),
-                                Ok(_) => (),
-                                Err(error) => {
-                                    return Err(Error::InternalError(
-                                        InternalError::StoreDeletion {
-                                            database_name,
-                                            key: e.entry().to_owned(),
-                                            error,
-                                        },
-                                    ));
-                                }
-                            },
-                            EntryOperation::Write(e) => {
-                                if let Err(error) = database.put(wtxn, e.key(), e.value()) {
-                                    return Err(Error::InternalError(InternalError::StorePut {
-                                        database_name,
-                                        key: e.key().to_owned(),
-                                        value_length: e.value().len(),
-                                        error,
-                                    }));
-                                }
-                            }
+
+                match action {
+                    ReceiverAction::WakeUp => (),
+                    ReceiverAction::LargeEntry(LargeEntry { database, key, value }) => {
+                        let database_name = database.database_name();
+                        let database = database.database(index);
+                        if let Err(error) = database.put(wtxn, &key, &value) {
+                            return Err(Error::InternalError(InternalError::StorePut {
+                                database_name,
+                                key: bstr::BString::from(&key[..]),
+                                value_length: value.len(),
+                                error,
+                            }));
                        }
                    }
-                    WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
-                        ArroyOperation::DeleteVectors { docid } => {
-                            for (
-                                _embedder_index,
-                                (_embedder_name, _embedder, writer, dimensions),
-                            ) in &mut arroy_writers
-                            {
-                                let dimensions = *dimensions;
-                                writer.del_items(wtxn, dimensions, docid)?;
-                            }
+                    ReceiverAction::LargeVectors(large_vectors) => {
+                        let LargeVectors { docid, embedder_id, .. } = large_vectors;
+                        let (_, _, writer, dimensions) =
+                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                        let mut embeddings = Embeddings::new(*dimensions);
+                        for embedding in large_vectors.read_embeddings(*dimensions) {
+                            embeddings.push(embedding.to_vec()).unwrap();
                        }
-                        ArroyOperation::SetVectors {
-                            docid,
-                            embedder_id,
-                            embeddings: raw_embeddings,
-                        } => {
-                            let (_, _, writer, dimensions) = arroy_writers
-                                .get(&embedder_id)
-                                .expect("requested a missing embedder");
-
-                            let mut embeddings = Embeddings::new(*dimensions);
-                            for embedding in raw_embeddings {
-                                embeddings.append(embedding).unwrap();
-                            }
-
-                            writer.del_items(wtxn, *dimensions, docid)?;
-                            writer.add_items(wtxn, docid, &embeddings)?;
-                        }
-                        ArroyOperation::SetVector { docid, embedder_id, embedding } => {
-                            let (_, _, writer, dimensions) = arroy_writers
-                                .get(&embedder_id)
-                                .expect("requested a missing embedder");
-                            writer.del_items(wtxn, *dimensions, docid)?;
-                            writer.add_item(wtxn, docid, &embedding)?;
-                        }
-                        ArroyOperation::Finish { configs } => {
-                            let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
-                            let _entered = span.enter();
-
-                            (indexing_context.send_progress)(Progress::from_step(
-                                Step::WritingEmbeddingsToDatabase,
-                            ));
-
-                            for (
-                                _embedder_index,
-                                (_embedder_name, _embedder, writer, dimensions),
-                            ) in &mut arroy_writers
-                            {
-                                let dimensions = *dimensions;
-                                writer.build_and_quantize(
-                                    wtxn,
-                                    &mut rng,
-                                    dimensions,
-                                    false,
-                                    &indexing_context.must_stop_processing,
-                                )?;
-                            }
-
-                            index.put_embedding_configs(wtxn, configs)?;
-                        }
-                    },
+                        writer.del_items(wtxn, *dimensions, docid)?;
+                        writer.add_items(wtxn, docid, &embeddings)?;
+                    }
                }
+
+                // Every time the is a message in the channel we search
+                // for new entries in the BBQueue buffers.
+                write_from_bbqueue(
+                    &mut writer_receiver,
+                    index,
+                    wtxn,
+                    &arroy_writers,
+                    &mut aligned_embedding,
+                )?;
            }
+
+            // Once the extractor/writer channel is closed
+            // we must process the remaining BBQueue messages.
+            write_from_bbqueue(
+                &mut writer_receiver,
+                index,
+                wtxn,
+                &arroy_writers,
+                &mut aligned_embedding,
+            )?;
        }

        (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));

-        let facet_field_ids_delta = extractor_handle.join().unwrap()?;
+        let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
+
+        'vectors: {
+            let span =
+                tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
+            let _entered = span.enter();
+
+            if index_embeddings.is_empty() {
+                break 'vectors;
+            }
+
+            (indexing_context.send_progress)(Progress::from_step(
+                Step::WritingEmbeddingsToDatabase,
+            ));
+
+            let mut rng = rand::rngs::StdRng::seed_from_u64(42);
+            for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
+                let dimensions = *dimensions;
+                writer.build_and_quantize(
+                    wtxn,
+                    &mut rng,
+                    dimensions,
+                    false,
+                    &indexing_context.must_stop_processing,
+                )?;
+            }
+
+            index.put_embedding_configs(wtxn, index_embeddings)?;
+        }

        (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));

@ -537,6 +547,72 @@ where
    Ok(())
 }

+/// A function dedicated to manage all the available BBQueue frames.
+///
+/// It reads all the available frames, do the corresponding database operations
+/// and stops when no frame are available.
+fn write_from_bbqueue(
+    writer_receiver: &mut WriterBbqueueReceiver<'_>,
+    index: &Index,
+    wtxn: &mut RwTxn<'_>,
+    arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
+    aligned_embedding: &mut Vec<f32>,
+) -> crate::Result<()> {
+    while let Some(frame_with_header) = writer_receiver.recv_frame() {
+        match frame_with_header.header() {
+            EntryHeader::DbOperation(operation) => {
+                let database_name = operation.database.database_name();
+                let database = operation.database.database(index);
+                let frame = frame_with_header.frame();
+                match operation.key_value(frame) {
+                    (key, Some(value)) => {
+                        if let Err(error) = database.put(wtxn, key, value) {
+                            return Err(Error::InternalError(InternalError::StorePut {
+                                database_name,
+                                key: key.into(),
+                                value_length: value.len(),
+                                error,
+                            }));
+                        }
+                    }
+                    (key, None) => match database.delete(wtxn, key) {
+                        Ok(false) => {
+                            unreachable!("We tried to delete an unknown key: {key:?}")
+                        }
+                        Ok(_) => (),
+                        Err(error) => {
+                            return Err(Error::InternalError(InternalError::StoreDeletion {
+                                database_name,
+                                key: key.into(),
+                                error,
+                            }));
+                        }
+                    },
+                }
+            }
+            EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
+                for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers {
+                    let dimensions = *dimensions;
+                    writer.del_items(wtxn, dimensions, docid)?;
+                }
+            }
+            EntryHeader::ArroySetVectors(asvs) => {
+                let ArroySetVectors { docid, embedder_id, .. } = asvs;
+                let frame = frame_with_header.frame();
+                let (_, _, writer, dimensions) =
+                    arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                let mut embeddings = Embeddings::new(*dimensions);
+                let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
+                embeddings.append(all_embeddings.to_vec()).unwrap();
+                writer.del_items(wtxn, *dimensions, docid)?;
+                writer.add_items(wtxn, docid, &embeddings)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
 fn compute_prefix_database(
    index: &Index,
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@ -9,8 +9,8 @@ use roaring::RoaringBitmap;

 use super::channel::*;
 use super::extract::{
-    merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
-    GeoExtractorData,
+    merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
+    FacetKind, GeoExtractorData,
 };
 use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};

@ -19,7 +19,7 @@ pub fn merge_and_send_rtree<'extractor, MSP>(
    datastore: impl IntoIterator<Item = RefCell<GeoExtractorData<'extractor>>>,
    rtxn: &RoTxn,
    index: &Index,
-    geo_sender: GeoSender<'_>,
+    geo_sender: GeoSender<'_, '_>,
    must_stop_processing: &MSP,
 ) -> Result<()>
 where
@ -34,7 +34,7 @@ where
        }

        let mut frozen = data.into_inner().freeze()?;
-        for result in frozen.iter_and_clear_removed() {
+        for result in frozen.iter_and_clear_removed()? {
            let extracted_geo_point = result?;
            let removed = rtree.remove(&GeoPoint::from(extracted_geo_point));
            debug_assert!(removed.is_some());
@ -42,7 +42,7 @@ where
            debug_assert!(removed);
        }

-        for result in frozen.iter_and_clear_inserted() {
+        for result in frozen.iter_and_clear_inserted()? {
            let extracted_geo_point = result?;
            rtree.insert(GeoPoint::from(extracted_geo_point));
            let inserted = faceted.insert(extracted_geo_point.docid);
@ -56,38 +56,37 @@ where

    let rtree_mmap = unsafe { Mmap::map(&file)? };
    geo_sender.set_rtree(rtree_mmap).unwrap();
-    geo_sender.set_geo_faceted(&faceted).unwrap();
+    geo_sender.set_geo_faceted(&faceted)?;

    Ok(())
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
-pub fn merge_and_send_docids<'extractor, MSP>(
+pub fn merge_and_send_docids<'extractor, MSP, D>(
    mut caches: Vec<BalancedCaches<'extractor>>,
    database: Database<Bytes, Bytes>,
    index: &Index,
-    docids_sender: impl DocidsSender + Sync,
+    docids_sender: WordDocidsSender<D>,
    must_stop_processing: &MSP,
 ) -> Result<()>
 where
    MSP: Fn() -> bool + Sync,
+    D: DatabaseType + Sync,
 {
    transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
        let rtxn = index.read_txn()?;
-        let mut buffer = Vec::new();
        if must_stop_processing() {
            return Err(InternalError::AbortedIndexation.into());
        }
-        merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
+        merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
            let current = database.get(&rtxn, key)?;
            match merge_cbo_bitmaps(current, del, add)? {
                Operation::Write(bitmap) => {
-                    let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
-                    docids_sender.write(key, value).unwrap();
+                    docids_sender.write(key, &bitmap)?;
                    Ok(())
                }
                Operation::Delete => {
-                    docids_sender.delete(key).unwrap();
+                    docids_sender.delete(key)?;
                    Ok(())
                }
                Operation::Ignore => Ok(()),
@ -101,26 +100,24 @@ pub fn merge_and_send_facet_docids<'extractor>(
    mut caches: Vec<BalancedCaches<'extractor>>,
    database: FacetDatabases,
    index: &Index,
-    docids_sender: impl DocidsSender + Sync,
+    docids_sender: FacetDocidsSender,
 ) -> Result<FacetFieldIdsDelta> {
    transpose_and_freeze_caches(&mut caches)?
        .into_par_iter()
        .map(|frozen| {
            let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
            let rtxn = index.read_txn()?;
-            let mut buffer = Vec::new();
-            merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
+            merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
                let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
                match merge_cbo_bitmaps(current, del, add)? {
                    Operation::Write(bitmap) => {
                        facet_field_ids_delta.register_from_key(key);
-                        let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
-                        docids_sender.write(key, value).unwrap();
+                        docids_sender.write(key, &bitmap)?;
                        Ok(())
                    }
                    Operation::Delete => {
                        facet_field_ids_delta.register_from_key(key);
-                        docids_sender.delete(key).unwrap();
+                        docids_sender.delete(key)?;
                        Ok(())
                    }
                    Operation::Ignore => Ok(()),
@ -252,10 +249,3 @@ fn merge_cbo_bitmaps(
        }
    }
 }
-
-/// TODO Return the slice directly from the serialize_into method
-fn cbo_bitmap_serialize_into_vec<'b>(bitmap: &RoaringBitmap, buffer: &'b mut Vec<u8>) -> &'b [u8] {
-    buffer.clear();
-    CboRoaringBitmapCodec::serialize_into(bitmap, buffer);
-    buffer.as_slice()
-}
--- a/crates/milli/src/update/new/ref_cell_ext.rs
+++ b/crates/milli/src/update/new/ref_cell_ext.rs
@ -5,6 +5,7 @@ pub trait RefCellExt<T: ?Sized> {
        &self,
    ) -> std::result::Result<RefMut<'_, T>, std::cell::BorrowMutError>;

+    #[track_caller]
    fn borrow_mut_or_yield(&self) -> RefMut<'_, T> {
        self.try_borrow_mut_or_yield().unwrap()
    }
--- a/crates/milli/src/update/new/steps.rs
+++ b/crates/milli/src/update/new/steps.rs
@ -11,8 +11,8 @@ pub enum Step {
    ExtractingEmbeddings,
    WritingGeoPoints,
    WritingToDatabase,
-    WritingEmbeddingsToDatabase,
    WaitingForExtractors,
+    WritingEmbeddingsToDatabase,
    PostProcessingFacets,
    PostProcessingWords,
    Finalizing,
@ -29,8 +29,8 @@ impl Step {
            Step::ExtractingEmbeddings => "extracting embeddings",
            Step::WritingGeoPoints => "writing geo points",
            Step::WritingToDatabase => "writing to database",
-            Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
            Step::WaitingForExtractors => "waiting for extractors",
+            Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
            Step::PostProcessingFacets => "post-processing facets",
            Step::PostProcessingWords => "post-processing words",
            Step::Finalizing => "finalizing",
--- a/crates/milli/src/update/new/word_fst_builder.rs
+++ b/crates/milli/src/update/new/word_fst_builder.rs
@ -1,4 +1,4 @@
-use std::collections::HashSet;
+use std::collections::BTreeSet;
 use std::io::BufWriter;

 use fst::{Set, SetBuilder, Streamer};
@ -75,8 +75,8 @@ pub struct PrefixData {

 #[derive(Debug)]
 pub struct PrefixDelta {
-    pub modified: HashSet<Prefix>,
-    pub deleted: HashSet<Prefix>,
+    pub modified: BTreeSet<Prefix>,
+    pub deleted: BTreeSet<Prefix>,
 }

 struct PrefixFstBuilder {
@ -86,7 +86,7 @@ struct PrefixFstBuilder {
    prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
    current_prefix: Vec<Prefix>,
    current_prefix_count: Vec<usize>,
-    modified_prefixes: HashSet<Prefix>,
+    modified_prefixes: BTreeSet<Prefix>,
    current_prefix_is_modified: Vec<bool>,
 }

@ -110,7 +110,7 @@ impl PrefixFstBuilder {
            prefix_fst_builders,
            current_prefix: vec![Prefix::new(); max_prefix_length],
            current_prefix_count: vec![0; max_prefix_length],
-            modified_prefixes: HashSet::new(),
+            modified_prefixes: BTreeSet::new(),
            current_prefix_is_modified: vec![false; max_prefix_length],
        })
    }
@ -180,7 +180,7 @@ impl PrefixFstBuilder {
        let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
        let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
        let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
-        let mut deleted_prefixes = HashSet::new();
+        let mut deleted_prefixes = BTreeSet::new();
        {
            let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
            while let Some(prefix) = deleted_prefixes_stream.next() {
--- a/crates/milli/src/update/new/words_prefix_docids.rs
+++ b/crates/milli/src/update/new/words_prefix_docids.rs
@ -1,5 +1,5 @@
 use std::cell::RefCell;
-use std::collections::HashSet;
+use std::collections::BTreeSet;
 use std::io::{BufReader, BufWriter, Read, Seek, Write};

 use hashbrown::HashMap;
@ -37,8 +37,8 @@ impl WordPrefixDocids {
    fn execute(
        self,
        wtxn: &mut heed::RwTxn,
-        prefix_to_compute: &HashSet<Prefix>,
-        prefix_to_delete: &HashSet<Prefix>,
+        prefix_to_compute: &BTreeSet<Prefix>,
+        prefix_to_delete: &BTreeSet<Prefix>,
    ) -> Result<()> {
        delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
        self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -48,7 +48,7 @@ impl WordPrefixDocids {
    fn recompute_modified_prefixes(
        &self,
        wtxn: &mut RwTxn,
-        prefixes: &HashSet<Prefix>,
+        prefixes: &BTreeSet<Prefix>,
    ) -> Result<()> {
        // We fetch the docids associated to the newly added word prefix fst only.
        // And collect the CboRoaringBitmaps pointers in an HashMap.
@ -76,7 +76,7 @@ impl WordPrefixDocids {
                .union()?;

            buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&output, buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
            index.push(PrefixEntry { prefix, serialized_length: buffer.len() });
            file.write_all(buffer)
        })?;
@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
    pub fn from_prefixes(
        database: Database<Bytes, CboRoaringBitmapCodec>,
        rtxn: &'rtxn RoTxn,
-        prefixes: &'a HashSet<Prefix>,
+        prefixes: &'a BTreeSet<Prefix>,
    ) -> heed::Result<Self> {
        let database = database.remap_data_type::<Bytes>();

@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
    fn execute(
        self,
        wtxn: &mut heed::RwTxn,
-        prefix_to_compute: &HashSet<Prefix>,
-        prefix_to_delete: &HashSet<Prefix>,
+        prefix_to_compute: &BTreeSet<Prefix>,
+        prefix_to_delete: &BTreeSet<Prefix>,
    ) -> Result<()> {
        delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
        self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
    fn recompute_modified_prefixes(
        &self,
        wtxn: &mut RwTxn,
-        prefixes: &HashSet<Prefix>,
+        prefixes: &BTreeSet<Prefix>,
    ) -> Result<()> {
        // We fetch the docids associated to the newly added word prefix fst only.
        // And collect the CboRoaringBitmaps pointers in an HashMap.
@ -211,7 +211,7 @@ impl WordPrefixIntegerDocids {
                    .union()?;

                buffer.clear();
-                CboRoaringBitmapCodec::serialize_into(&output, buffer);
+                CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
                index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() });
                file.write_all(buffer)?;
            }
@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
    pub fn from_prefixes(
        database: Database<Bytes, CboRoaringBitmapCodec>,
        rtxn: &'rtxn RoTxn,
-        prefixes: &'a HashSet<Prefix>,
+        prefixes: &'a BTreeSet<Prefix>,
    ) -> heed::Result<Self> {
        let database = database.remap_data_type::<Bytes>();

@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
 fn delete_prefixes(
    wtxn: &mut RwTxn,
    prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
-    prefixes: &HashSet<Prefix>,
+    prefixes: &BTreeSet<Prefix>,
 ) -> Result<()> {
    // We remove all the entries that are no more required in this word prefix docids database.
    for prefix in prefixes {
@ -309,8 +309,8 @@ fn delete_prefixes(
 pub fn compute_word_prefix_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixDocids::new(
@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
 pub fn compute_exact_word_prefix_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixDocids::new(
@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
 pub fn compute_word_prefix_fid_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixIntegerDocids::new(
@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
 pub fn compute_word_prefix_position_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixIntegerDocids::new(
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@ -475,7 +475,7 @@ impl<F> Embeddings<F> {
        Ok(())
    }

-    /// Append a flat vector of embeddings a the end of the embeddings.
+    /// Append a flat vector of embeddings at the end of the embeddings.
    ///
    /// If `embeddings.len() % self.dimension != 0`, then the append operation fails.
    pub fn append(&mut self, mut embeddings: Vec<F>) -> Result<(), Vec<F>> {
--- a/crates/milli/tests/search/facet_distribution.rs
+++ b/crates/milli/tests/search/facet_distribution.rs
@ -64,6 +64,7 @@ fn test_facet_distribution_with_no_facet_values() {
    indexer::index(
        &mut wtxn,
        &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
--- a/crates/milli/tests/search/mod.rs
+++ b/crates/milli/tests/search/mod.rs
@ -101,6 +101,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    indexer::index(
        &mut wtxn,
        &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
--- a/crates/milli/tests/search/query_criteria.rs
+++ b/crates/milli/tests/search/query_criteria.rs
@ -333,6 +333,7 @@ fn criteria_ascdesc() {
    indexer::index(
        &mut wtxn,
        &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
--- a/crates/milli/tests/search/typo_tolerance.rs
+++ b/crates/milli/tests/search/typo_tolerance.rs
@ -142,6 +142,7 @@ fn test_typo_disabled_on_word() {
    indexer::index(
        &mut wtxn,
        &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
--- a/crates/xtask/src/bench/mod.rs
+++ b/crates/xtask/src/bench/mod.rs
@ -82,6 +82,10 @@ pub struct BenchDeriveArgs {
    /// Reason for the benchmark invocation
    #[arg(short, long)]
    reason: Option<String>,
+
+    /// The maximum time in seconds we allow for fetching the task queue before timing out.
+    #[arg(long, default_value_t = 60)]
+    tasks_queue_timeout_secs: u64,
 }

 pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
@ -127,7 +131,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
    let meili_client = Client::new(
        Some("http://127.0.0.1:7700".into()),
        args.master_key.as_deref(),
-        Some(std::time::Duration::from_secs(60)),
+        Some(std::time::Duration::from_secs(args.tasks_queue_timeout_secs)),
    )?;

    // enter runtime
--- a/crates/xtask/src/main.rs
+++ b/crates/xtask/src/main.rs
@ -16,6 +16,7 @@ struct ListFeaturesDeriveArgs {
 #[command(author, version, about, long_about)]
 #[command(name = "cargo xtask")]
 #[command(bin_name = "cargo xtask")]
+#[allow(clippy::large_enum_variant)] // please, that's enough...
 enum Command {
    ListFeatures(ListFeaturesDeriveArgs),
    Bench(BenchDeriveArgs),
--- a/workloads/hackernews-add-new-documents.json
+++ b/workloads/hackernews-add-new-documents.json
@ -0,0 +1,105 @@
+{
+  "name": "hackernews.add_new_documents",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+      "hackernews-01.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+      },
+      "hackernews-02.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+      },
+      "hackernews-03.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+      },
+      "hackernews-04.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+      },
+      "hackernews-05.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+      }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-05.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+  ]
+}
--- a/workloads/hackernews-modify-facet-numbers.json
+++ b/workloads/hackernews-modify-facet-numbers.json
@ -0,0 +1,111 @@
+{
+  "name": "hackernews.modify_facet_numbers",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-01.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+      "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+    },
+    "hackernews-02.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+      "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+    },
+    "hackernews-03.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+      "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+    },
+    "hackernews-04.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+      "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+    },
+    "hackernews-05.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+      "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+    },
+    "hackernews-02-modified-filters.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
+      "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-02-modified-filters.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+  ]
+}
+  
--- a/workloads/hackernews-modify-facet-strings.json
+++ b/workloads/hackernews-modify-facet-strings.json
@ -0,0 +1,111 @@
+{
+  "name": "hackernews.modify_facet_strings",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-01.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+      "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+    },
+    "hackernews-02.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+      "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+    },
+    "hackernews-03.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+      "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+    },
+    "hackernews-04.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+      "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+    },
+    "hackernews-05.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+      "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+    },
+    "hackernews-01-modified-filters.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
+      "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-01-modified-filters.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+  ]
+}
+ 
--- a/workloads/hackernews-modify-searchables.json
+++ b/workloads/hackernews-modify-searchables.json
@ -0,0 +1,123 @@
+{
+  "name": "hackernews.modify_searchables",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-01.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+      "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+    },
+    "hackernews-02.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+      "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+    },
+    "hackernews-03.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+      "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+    },
+    "hackernews-04.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+      "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+    },
+    "hackernews-05.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+      "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+    },
+    "hackernews-01-modified-searchables.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-searchables.ndjson",
+      "sha256": "e5c08710c6af70031ac7212e0ba242c72ef29c8d4e1fce66c789544641452a7c"
+    },
+    "hackernews-02-modified-searchables.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-searchables.ndjson",
+      "sha256": "098b029851117087b1e26ccb7ac408eda9bba54c3008213a2880d6fab607346e"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-01-modified-searchables.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-02-modified-searchables.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+  ]
+}
Author	SHA1	Message	Date
Louis Dureuil	1cf14e765f	Change implementation of MergedDocuments::iter_top_level_fields	2024-12-09 09:38:21 +01:00
meili-bors[bot]	4a082683df	Merge #5131 Some checks failed Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 21s Test suite / Tests on ubuntu-20.04 (push) Failing after 10s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 10s Test suite / Run Rustfmt (push) Successful in 1m25s Test suite / Run Clippy (push) Successful in 5m54s Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled 5131: Ignore documents whose selected fields didn't change r=dureuill a=dureuill Attempts to improve the new indexer performance by ignoring documents whose selected fields didn't change: - Add `Update::has_changed_for_fields` function - Ignore documents whose searchable attributes didn't change for word docids and word pair proximity extraction - Ignore documents whose faceted attributes didn't change for facet extraction Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-12-05 16:04:16 +00:00
meili-bors[bot]	26be5e0733	Merge #5123 5123: Fix batch details r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5079 Fixes https://github.com/meilisearch/meilisearch/issues/5112 ## What does this PR do? - Make the processing tasks actually processing in the stats of the batch instead of enqueued - Stop counting one extra task for all non-prioritized batches in the stats - Add a test Co-authored-by: Tamo <tamo@meilisearch.com>	2024-12-05 15:21:55 +00:00
Louis Dureuil	bd5110a2fe	Fix clippy warnings	2024-12-05 16:13:07 +01:00
Louis Dureuil	fa8b9acdf6	Ignore documents that didn't change in facets	2024-12-05 16:12:52 +01:00
Louis Dureuil	2b74d1824b	Ignore documents that didn't change any field in word pair proximity	2024-12-05 15:56:22 +01:00
Louis Dureuil	c77b00d3ac	Don't extract word docids when no searchable changed	2024-12-05 15:51:58 +01:00
Louis Dureuil	c77073efcc	Update::has_changed_for_fields	2024-12-05 15:50:12 +01:00
meili-bors[bot]	1537323eb9	Merge #5119 5119: Settings opt out error msg r=Kerollmops a=ManyTheFish # Pull Request ## Related issue PRD: https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-fff4b06b651f8108ade3f858aeb16b14?pvs=4 ## What does this PR do? Add a new error code and message when the user tries a facet search on an index where the facet search is disabled: ```json { "message": "The facet search is disabled for this index", "code": "facet_search_disabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_facet_search_disabled" } ``` Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-12-05 13:51:11 +00:00
ManyTheFish	a0a3b55700	Change error code	2024-12-05 14:48:29 +01:00
Tamo	214b51de87	try to fix the snapshot on demand flaky test	2024-12-05 14:45:54 +01:00
Tamo	95975944d7	fix the dumps missing the empty swap index tasks	2024-12-05 14:23:38 +01:00
meili-bors[bot]	9a9383643f	Merge #5125 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 37s Test suite / Tests on ubuntu-20.04 (push) Failing after 15s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 12s Test suite / Run Rustfmt (push) Successful in 2m14s Test suite / Run Clippy (push) Successful in 12m4s 5125: Change the default max memory usage to 5% of the total memory r=ManyTheFish a=Kerollmops After thorough testing, we found that giving 5% of the total available memory to allocate resident memory (caches and channels) is the best approach. The main reason is that the new indexer is highly memory-map oriented, with LMDB, and reads the database while performing the indexation. So, by allowing the maximum amount of memory available to LMDB and the OS, it will perform the key-value store reads and all other indexation operations faster by keeping more pages hot in the cache. In #5124, we also sorted the entries to merge to improve the read speed of LMDB. This is common in database management systems: Reading stuff on the disk is much faster when done in lexicographic order (the default sorted order of key values). The entries have a great chance of already being in the OS memory cache, as they were loaded in a previous read, and reading stuff on the disk is very slow compared to reading memory. Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-05 10:11:25 +00:00
meili-bors[bot]	cac355bfa7	Merge #5124 5124: Optimize Prefixes and Merges r=ManyTheFish a=Kerollmops In this PR, we plan to optimize the read of LMDB to use read the entries in lexicographic order and better use the memory-mapping OS cache: - Optimize the prefix generation for word position docids (`@manythefish)` - Optimize the parallel merging of the caches to sort entries before merging the caches (`@kerollmops)` ## Benchmarks on 1cpu 2gb gpo3 (5k IOps) Before on the tag meilisearch-v1.12.0-rc.3. ``` word_position_docids:merge_and_send_docids: 988s compute_word_fst: 23.3s word_pair_proximity_docids:merge_and_send_docids: 428s compute_word_prefix_fid_docids:recompute_modified_prefixes: 76.3s compute_word_prefix_position_docids:recompute_modified_prefixes:from_prefixes: 429s ``` After sorting the whole `HashMap`s in a `Vec` on this branch. ``` word_position_docids:merge_and_send_docids: 202s compute_word_fst: 20.4s word_pair_proximity_docids:merge_and_send_docids: 427s compute_word_prefix_fid_docids:recompute_modified_prefixes: 65.5s compute_word_prefix_position_docids:recompute_modified_prefixes:from_prefixes: 62.5s ``` Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-05 09:35:52 +00:00
Kerollmops	9020a50df8	Change the default max memory usage to 5% of the total memory	2024-12-05 10:14:46 +01:00
Kerollmops	52843123d4	Clean up and remove the non-sorted merge_caches function	2024-12-05 10:03:05 +01:00
meili-bors[bot]	6298db5bea	Merge #5113 5113: Fix the Minimum BBQueue channel threshold r=Kerollmops a=Kerollmops Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-12-05 09:01:02 +00:00
meili-bors[bot]	a003a0934a	Merge #5121 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 11s Test suite / Run tests in debug (push) Failing after 9s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s Test suite / Run Rustfmt (push) Successful in 1m19s Test suite / Run Clippy (push) Successful in 5m32s 5121: Make the tasks pulling timeout configurable r=dureuill a=Kerollmops Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-04 17:04:14 +00:00
Louis Dureuil	3a11e39c01	Force max_memory to a min of 100MiB	2024-12-04 17:53:30 +01:00
Louis Dureuil	5f896b1050	Fix geo when spilling	2024-12-04 17:51:12 +01:00
Kerollmops	d0c4e6da6b	Make clippy happy	2024-12-04 17:39:10 +01:00
Kerollmops	2da5584bb5	Make the tasks pulling timeout configurable	2024-12-04 17:39:07 +01:00
meili-bors[bot]	b7eb802ae6	Merge #5120 5120: Add cross tasks r=Kerollmops a=ManyTheFish Add 4 xtask bench workloads: - `hackernews-add-new-documents`: adds new documents on a db already containing documents - `hackernews-modify-facet-numbers`: modify filterable fields containing numbers of documents on a db already containing documents - `hackernews-modify-facet-strings`: modify filterable fields containing strings of documents on a db already containing documents - `hackernews-modify-searchables`: modify searchable fields of documents on a db already containing documents Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-12-04 16:16:57 +00:00
Kerollmops	2e32d0474c	Lexicographically sort all the map to merge	2024-12-04 17:05:11 +01:00
Kerollmops	cb99ac6f7e	Consume vec instead of draining	2024-12-04 17:00:22 +01:00
Kerollmops	be411435f5	Use the merge_caches_alt function in the docids merging	2024-12-04 16:37:29 +01:00
Kerollmops	29ef164530	Introduce a new semi ordered merge function	2024-12-04 16:33:35 +01:00
ManyTheFish	739c52a3cd	Replace HashSets by BTreeSets for the prefixes	2024-12-04 16:16:48 +01:00
Tamo	7a2af06b1e	update the impacted snapshots	2024-12-04 15:52:24 +01:00
Tamo	cb0c3a5aad	stop adding one enqueued tasks to all unprioritized batches	2024-12-04 15:48:28 +01:00
ManyTheFish	8388698993	Fix dat hash	2024-12-04 15:09:10 +01:00
Tamo	cbcf6c9ba3	make the processing tasks as processing in a batch	2024-12-04 14:48:48 +01:00
Tamo	bf742d81cf	add a test	2024-12-04 14:47:02 +01:00
ManyTheFish	7458f0386c	fix asset name	2024-12-04 14:44:57 +01:00
ManyTheFish	fc1df5793c	fix tests	2024-12-04 14:35:20 +01:00
meili-bors[bot]	3ded069042	Merge #5122 5122: Yield the BBQueue writing loop r=ManyTheFish a=Kerollmops We prefer yielding to let the writing thread do its job instead of spin looping. Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-04 13:33:51 +00:00
Kerollmops	261d2ceb06	Yield the BBQueue writer instead of spin looping	2024-12-04 14:16:40 +01:00
ManyTheFish	1a17e2e572	fix formating	2024-12-04 13:57:06 +01:00
meili-bors[bot]	5b8cd68abe	Merge #5110 5110: Increase margin on deletion of task r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5077 ## What does this PR do? - Increase the margin we keep to enqueue task deletion The issue was that we had not enough space on the reserved memory to write both the batch and the deletion task we just enqueued. We could fix it only for this test as it’s not an issue in production where we have 10GiB of margin, but I thought it wasn’t a bad idea either to increase our margin a bit since we’re effectively writing more to lmdb. Co-authored-by: Tamo <tamo@meilisearch.com>	2024-12-04 12:54:48 +00:00
ManyTheFish	5ce9acb0b9	Add workloads	2024-12-04 12:19:19 +01:00
ManyTheFish	953a82ca04	Add new error message	2024-12-04 11:15:29 +01:00
meili-bors[bot]	54341c2e80	Merge #5118 5118: Change the reserve and grant function to accept a closure r=ManyTheFish a=Kerollmops This simplifies the usage of the grant and commits it at the right time, just after having written in it. Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-04 10:12:39 +00:00
Kerollmops	96831ed9bb	Send the WakeUp message if necessary in the reserve function	2024-12-04 11:03:01 +01:00
Kerollmops	0459b1a242	Change the reserve and grant function to accept a closure	2024-12-04 10:32:25 +01:00
Kerollmops	8ecb726683	Fix the minimun BBQueue channel threshold	2024-12-03 15:49:11 +01:00
meili-bors[bot]	297e72e262	Merge #5111 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 43s Test suite / Tests on ubuntu-20.04 (push) Failing after 11s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 9s Test suite / Run Clippy (push) Successful in 7m18s Test suite / Run Rustfmt (push) Successful in 1m32s 5111: Update BBQueue repo to point to the Meilisearch org r=curquiza a=Kerollmops This PR updates the milli dependencies to make BBQueue point to the Meilisearch org repo. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-12-03 14:27:04 +00:00
Clément Renault	0ad2f57a92	Update bbqueue repo to point to the meilisearch org	2024-12-03 12:00:04 +01:00
Tamo	71d53f413f	increase the margin allowed to delete task	2024-12-03 11:07:03 +01:00
meili-bors[bot]	054622bd16	Merge #5094 5094: Implement a bbqueue channel between the extractors and the writer r=dureuill a=Kerollmops This PR switches from a bounded crossbeam channel only with allocated entries for the communication between the extractors and the writer to a [BBQueue](https://github.com/jamesmunns/bbqueue)-based system with a Single Producer Single Consumer kind of Circular/Ring Buffers channel. - [x] Implement the BBQueue channel system... - [x] with a crossbeam channel to wake up the receiver. - [x] Manage the BBQueue allocated memory dynamically. - [x] Support content that doesn't fit in the bbqueues. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-12-03 08:00:55 +00:00
Louis Dureuil	e905a72d73	remove mimalloc on Windows	2024-12-02 18:13:56 +01:00
meili-bors[bot]	2e879c1df8	Merge #5109 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 11s Test suite / Run tests in debug (push) Failing after 11s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s Test suite / Run Rustfmt (push) Successful in 1m22s Test suite / Run Clippy (push) Successful in 6m29s 5109: Fix autobatch r=dureuill a=dureuill Fixes most SDK tests and flaky failures Changes: - Make sure that the settings are not autobatched with document operations, as the new indexer no longer supports this operating mode Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-12-02 16:30:51 +00:00
Louis Dureuil	d040aff101	Stop allocating 1GiB for documents	2024-12-02 16:30:14 +01:00
meili-bors[bot]	5e30731cad	Merge #5107 5107: While spamming the batches route we could see a processing batch becoming missing and then finished, this commit ensures the batches goes from processing to finished directly r=irevoire a=irevoire # Pull Request ## Related issue Fixes the failed tests from this PR: https://github.com/meilisearch/meilisearch-js/pull/1775 See [this message](https://meilisearch.slack.com/archives/CD7Q2UKGB/p1732784680450749) [private link] for more context ## What does this PR do? - Ensure we never enter a state where a processing batches (only existing in RAM) becomes « Not found » by removing the processing batches AFTER writing them to disk - This should also theoretically avoid an issue where a task could go from processing to enqueued and then finished Co-authored-by: Tamo <tamo@meilisearch.com>	2024-12-02 14:36:29 +00:00
Tamo	beeb31ce41	Update crates/index-scheduler/src/lib.rs	2024-12-02 15:32:16 +01:00
Louis Dureuil	057143214d	Fix warnings	2024-12-02 14:42:31 +01:00
Louis Dureuil	6a1d26a60c	Update autobatching tests	2024-12-02 14:15:15 +01:00
Louis Dureuil	d78f4666a0	Fix autobatching of documents and settings	2024-12-02 12:25:01 +01:00
Tamo	a439fa3e1a	While spamming the batches route we could see a processing batch becoming missing and then finished, this commit ensures the batches goes from processing to finished directly	2024-12-02 12:02:16 +01:00
Clément Renault	767259be7e	Prefer returning a abort indexation rather than throwing a panic	2024-12-02 11:53:42 +01:00
Clément Renault	e9f34fb4b1	Make the frame consumer pulling fair	2024-12-02 11:49:01 +01:00
Clément Renault	d5c07ef7b3	Manage key length conversion error correctly	2024-12-02 11:03:00 +01:00
Clément Renault	5e218f3f4d	Remove a sync_all (mark my words)	2024-12-02 11:03:00 +01:00
Clément Renault	bcab61ab1d	Do spurious wake ups on the receiver side	2024-12-02 11:03:00 +01:00
Clément Renault	263c5a348e	Move the spin looping for BBQueue frames into a dedicated function	2024-12-02 10:33:49 +01:00
Clément Renault	be7d2fbe63	Move the EntryHeader up in the file and document the safety related to the size	2024-12-02 10:19:11 +01:00
Clément Renault	f7f9a131e4	Improve copying bytes into aligned memory area	2024-12-02 10:15:58 +01:00
Clément Renault	5df5eb2db2	Clarify a method name	2024-12-02 10:10:48 +01:00
Clément Renault	30eb0e5b5b	Rename recv and read methods to recv_action and recv_frame	2024-12-02 10:08:01 +01:00
Clément Renault	5b860cb989	Fix english in the doc	2024-12-02 10:06:35 +01:00
Clément Renault	76d0623b11	Reduce the number of unwraps	2024-12-02 10:05:06 +01:00
Clément Renault	db4eaf4d2d	Rename serialize_into into serialize_into_writer	2024-12-02 10:03:27 +01:00
Clément Renault	13f21206a6	Call the serialize_into_writer method from the serialize_into one	2024-12-02 10:03:01 +01:00
Clément Renault	14ee7aa84c	Make sure the BBQueue is at least 50 MiB	2024-11-28 18:02:48 +01:00
Clément Renault	8a35cd1743	Adjust the BBQueue buffers to use 2% instead of 10%	2024-11-28 16:00:15 +01:00
Clément Renault	3c7ac093d3	Take the BBQueue capacity into account in the max memory	2024-11-28 15:43:14 +01:00
Clément Renault	b57dd5c58e	Remove the Vector variant and use the Vectors	2024-11-28 15:20:43 +01:00
Clément Renault	096a28656e	Fix a bug around deleting all the vectors of a doc	2024-11-28 15:15:06 +01:00
Clément Renault	cc4bd54669	Correctly construct the Embeddings struct	2024-11-28 13:53:25 +01:00
Clément Renault	58eab9a018	Send large payload through crossbeam	2024-11-28 12:01:06 +01:00
Clément Renault	5c488e20cc	Send the geo rtree through crossbeam channel	2024-11-27 18:03:45 +01:00
Clément Renault	da650f834e	Plug the NoPanicThreadPool in the tests and benchmarks	2024-11-27 17:04:49 +01:00
Clément Renault	e83534a430	Fix the indexer::index to correctly use the rayon::ThreadPool	2024-11-27 16:27:43 +01:00
Clément Renault	98d4a2909e	Fix the way we spawn the rayon threadpool	2024-11-27 16:05:44 +01:00
Clément Renault	a514ce472a	Make clippy happy	2024-11-27 14:59:04 +01:00
Clément Renault	cc63802115	Modify and return the IndexEmbeddings to write them later	2024-11-27 14:58:03 +01:00
Clément Renault	acec45ad7c	Send a WakeUp when writing data in the BBQueue buffers	2024-11-27 14:33:23 +01:00
Clément Renault	08d6413365	Fix result types	2024-11-27 14:32:42 +01:00
Clément Renault	70802eb7c7	Fix most issues with the lifetimes	2024-11-27 14:32:42 +01:00
Clément Renault	6ac5b3b136	Finish most of the channels types	2024-11-27 14:32:26 +01:00
Clément Renault	e1e76f39d0	Clean up dependencies	2024-11-27 14:30:34 +01:00
Clément Renault	2094ce8a9a	Move the arroy building after the writing loop	2024-11-27 14:30:33 +01:00
Clément Renault	8442db8101	Implement mostly all senders	2024-11-27 14:16:35 +01:00
Clément Renault	79671c9faa	Implement a first version of the bbqueue channels	2024-11-27 14:15:00 +01:00