integrate thread_pool

Use thread pool in process index op
benchmarks and fuzzers
2025-11-30 01:35:36 +00:00 · 2025-03-01 23:49:16 +01:00 · 2025-03-01 23:46:37 +01:00 · 2025-03-01 23:46:04 +01:00 · 2025-03-01 14:50:55 +01:00 · 2025-02-26 22:16:31 +01:00
93 changed files with 3194 additions and 1559 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -519,6 +519,7 @@ dependencies = [
 "rand_chacha",
 "reqwest",
 "roaring",
+ "scoped_thread_pool",
 "serde_json",
 "tempfile",
 ]
@@ -2062,6 +2063,7 @@ dependencies = [
 "either",
 "fastrand",
 "milli",
+ "scoped_thread_pool",
 "serde",
 "serde_json",
 "tempfile",
@@ -2768,6 +2770,7 @@ dependencies = [
 "page_size",
 "rayon",
 "roaring",
+ "scoped_thread_pool",
 "serde",
 "serde_json",
 "synchronoise",
@@ -3636,6 +3639,7 @@ dependencies = [
 "rustls",
 "rustls-pemfile",
 "rustls-pki-types",
+ "scoped_thread_pool",
 "segment",
 "serde",
 "serde_json",
@@ -3814,6 +3818,7 @@ dependencies = [
 "roaring",
 "rstar",
 "rustc-hash 2.1.0",
+ "scoped_thread_pool",
 "serde",
 "serde_json",
 "slice-group-by",
@@ -5088,6 +5093,13 @@ dependencies = [
 "winapi-util",
 ]

+[[package]]
+name = "scoped_thread_pool"
+version = "0.1.0"
+dependencies = [
+ "crossbeam-channel",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
--- a/crates/benchmarks/Cargo.toml
+++ b/crates/benchmarks/Cargo.toml
@@ -17,6 +17,7 @@ csv = "1.3.1"
 memmap2 = "0.9.5"
 milli = { path = "../milli" }
 mimalloc = { version = "0.1.43", default-features = false }
+scoped_thread_pool = { version = "0.1.0", path = "../../../../../../../dev/scoped_thread_pool" }
 serde_json = { version = "1.0.135", features = ["preserve_order"] }
 tempfile = "3.15.0"

--- a/crates/benchmarks/benches/indexing.rs
+++ b/crates/benchmarks/benches/indexing.rs
--- a/crates/benchmarks/benches/utils.rs
+++ b/crates/benchmarks/benches/utils.rs
@@ -2,6 +2,7 @@

 use std::fs::{create_dir_all, remove_dir_all, File};
 use std::io::{self, BufReader, BufWriter, Read};
+use std::num::NonZeroUsize;
 use std::path::Path;
 use std::str::FromStr as _;

@@ -9,9 +10,11 @@ use anyhow::Context;
 use bumpalo::Bump;
 use criterion::BenchmarkId;
 use memmap2::Mmap;
-use milli::heed::EnvOpenOptions;
+use milli::documents::PrimaryKey;
+use milli::heed::{EnvOpenOptions, RwTxn};
 use milli::progress::Progress;
 use milli::update::new::indexer;
+use milli::update::new::indexer::document_changes::CHUNK_SIZE;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
 use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
@@ -96,28 +99,59 @@ pub fn base_setup(conf: &Conf) -> Index {
    let mut wtxn = index.write_txn().unwrap();
    let rtxn = index.read_txn().unwrap();
    let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
-    let mut new_fields_ids_map = db_fields_ids_map.clone();
+    let new_fields_ids_map = db_fields_ids_map.clone();

    let documents = documents_from(conf.dataset, conf.dataset_format);
    let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
    indexer.add_documents(&documents).unwrap();

+    index_documents(
+        indexer,
+        &index,
+        &rtxn,
+        new_fields_ids_map,
+        &mut wtxn,
+        config,
+        db_fields_ids_map,
+    );
+
+    wtxn.commit().unwrap();
+    drop(rtxn);
+
+    index
+}
+
+pub fn index_documents(
+    indexer: indexer::DocumentOperation,
+    index: &Index,
+    rtxn: &milli::heed::RoTxn,
+    mut new_fields_ids_map: milli::FieldsIdsMap,
+    wtxn: &mut RwTxn,
+    config: IndexerConfig,
+    db_fields_ids_map: milli::FieldsIdsMap,
+) {
    let indexer_alloc = Bump::new();
+    let thread_count =
+        std::thread::available_parallelism().unwrap_or(NonZeroUsize::new(1).unwrap());
+    let thread_pool = scoped_thread_pool::ThreadPool::new(thread_count, "index".into());
    let (document_changes, _operation_stats, primary_key) = indexer
        .into_changes(
            &indexer_alloc,
-            &index,
-            &rtxn,
+            index,
+            rtxn,
            None,
            &mut new_fields_ids_map,
            &|| false,
            Progress::default(),
+            &thread_pool,
+            CHUNK_SIZE,
        )
        .unwrap();

    indexer::index(
-        &mut wtxn,
-        &index,
+        wtxn,
+        index,
+        &thread_pool,
        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
@@ -129,11 +163,38 @@ pub fn base_setup(conf: &Conf) -> Index {
        &Progress::default(),
    )
    .unwrap();
+}

-    wtxn.commit().unwrap();
-    drop(rtxn);
-
-    index
+pub fn index_delete_documents(
+    indexer: indexer::DocumentDeletion,
+    primary_key: PrimaryKey,
+    wtxn: &mut RwTxn,
+    index: &Index,
+    config: &IndexerConfig,
+    db_fields_ids_map: milli::FieldsIdsMap,
+    new_fields_ids_map: milli::FieldsIdsMap,
+) {
+    let indexer_alloc = Bump::new();
+    let thread_count =
+        std::thread::available_parallelism().unwrap_or(NonZeroUsize::new(1).unwrap());
+    let thread_pool = scoped_thread_pool::ThreadPool::new(thread_count, "index".into());
+    let document_changes =
+        indexer.into_changes(&indexer_alloc, primary_key, &thread_pool, CHUNK_SIZE);
+    indexer::index(
+        wtxn,
+        index,
+        &thread_pool,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
+        config.grenad_parameters(),
+        &db_fields_ids_map,
+        new_fields_ids_map,
+        Some(primary_key),
+        &document_changes,
+        EmbeddingConfigs::default(),
+        &|| false,
+        &Progress::default(),
+    )
+    .unwrap();
 }

 pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
--- a/crates/dump/README.md
+++ b/crates/dump/README.md
@@ -10,8 +10,10 @@ dump
 ├── instance-uid.uuid
 ├── keys.jsonl
 ├── metadata.json
-└── tasks
-    ├── update_files
-    │   └── [task_id].jsonl
+├── tasks
+│   ├── update_files
+│   │   └── [task_id].jsonl
+│   └── queue.jsonl
+└── batches
    └── queue.jsonl
-```
+```
--- a/crates/dump/src/lib.rs
+++ b/crates/dump/src/lib.rs
@@ -228,6 +228,7 @@ pub(crate) mod test {

    use big_s::S;
    use maplit::{btreemap, btreeset};
+    use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
    use meilisearch_types::facet_values_sort::FacetValuesSort;
    use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
    use meilisearch_types::index_uid_pattern::IndexUidPattern;
@@ -235,7 +236,8 @@ pub(crate) mod test {
    use meilisearch_types::milli;
    use meilisearch_types::milli::update::Setting;
    use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
-    use meilisearch_types::tasks::{Details, Status};
+    use meilisearch_types::task_view::DetailsView;
+    use meilisearch_types::tasks::{Details, Kind, Status};
    use serde_json::{json, Map, Value};
    use time::macros::datetime;
    use uuid::Uuid;
@@ -305,6 +307,30 @@ pub(crate) mod test {
        settings.check()
    }

+    pub fn create_test_batches() -> Vec<Batch> {
+        vec![Batch {
+            uid: 0,
+            details: DetailsView {
+                received_documents: Some(12),
+                indexed_documents: Some(Some(10)),
+                ..DetailsView::default()
+            },
+            progress: None,
+            stats: BatchStats {
+                total_nb_tasks: 1,
+                status: maplit::btreemap! { Status::Succeeded => 1 },
+                types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
+                index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
+            },
+            enqueued_at: Some(BatchEnqueuedAt {
+                earliest: datetime!(2022-11-11 0:00 UTC),
+                oldest: datetime!(2022-11-11 0:00 UTC),
+            }),
+            started_at: datetime!(2022-11-20 0:00 UTC),
+            finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
+        }]
+    }
+
    pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
        vec![
            (
@@ -427,6 +453,15 @@ pub(crate) mod test {
        index.flush().unwrap();
        index.settings(&settings).unwrap();

+        // ========== pushing the batch queue
+        let batches = create_test_batches();
+
+        let mut batch_queue = dump.create_batches_queue().unwrap();
+        for batch in &batches {
+            batch_queue.push_batch(batch).unwrap();
+        }
+        batch_queue.flush().unwrap();
+
        // ========== pushing the task queue
        let tasks = create_test_tasks();

--- a/crates/dump/src/reader/mod.rs
+++ b/crates/dump/src/reader/mod.rs
@@ -102,6 +102,13 @@ impl DumpReader {
        }
    }

+    pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
+        match self {
+            DumpReader::Current(current) => Ok(current.batches()),
+            DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
+        }
+    }
+
    pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
        match self {
            DumpReader::Current(current) => Ok(current.keys()),
@@ -227,6 +234,10 @@ pub(crate) mod test {
        insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
        insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -348,6 +359,10 @@ pub(crate) mod test {
        insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
        insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -412,6 +427,10 @@ pub(crate) mod test {
        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -492,6 +511,10 @@ pub(crate) mod test {
        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -569,6 +592,10 @@ pub(crate) mod test {
        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
        assert_eq!(dump.instance_uid().unwrap(), None);

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -662,6 +689,10 @@ pub(crate) mod test {
        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
        assert_eq!(dump.instance_uid().unwrap(), None);

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -755,6 +786,10 @@ pub(crate) mod test {
        insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
        assert_eq!(dump.instance_uid().unwrap(), None);

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -831,6 +866,10 @@ pub(crate) mod test {
        assert_eq!(dump.date(), None);
        assert_eq!(dump.instance_uid().unwrap(), None);

+        // batches didn't exists at the time
+        let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
+
        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
--- a/crates/dump/src/reader/v6/mod.rs
+++ b/crates/dump/src/reader/v6/mod.rs
@@ -18,6 +18,7 @@ pub type Checked = meilisearch_types::settings::Checked;
 pub type Unchecked = meilisearch_types::settings::Unchecked;

 pub type Task = crate::TaskDump;
+pub type Batch = meilisearch_types::batches::Batch;
 pub type Key = meilisearch_types::keys::Key;
 pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
 pub type Network = meilisearch_types::features::Network;
@@ -49,6 +50,7 @@ pub struct V6Reader {
    instance_uid: Option<Uuid>,
    metadata: Metadata,
    tasks: BufReader<File>,
+    batches: Option<BufReader<File>>,
    keys: BufReader<File>,
    features: Option<RuntimeTogglableFeatures>,
    network: Option<Network>,
@@ -79,6 +81,12 @@ impl V6Reader {
        } else {
            None
        };
+        let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
+            Ok(file) => Some(BufReader::new(file)),
+            // The batch file was only introduced during the v1.13, anything prior to that won't have batches
+            Err(err) if err.kind() == ErrorKind::NotFound => None,
+            Err(e) => return Err(e.into()),
+        };

        let network_file = match fs::read(dump.path().join("network.json")) {
            Ok(network_file) => Some(network_file),
@@ -101,6 +109,7 @@ impl V6Reader {
            metadata: serde_json::from_reader(&*meta_file)?,
            instance_uid,
            tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
+            batches,
            keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
            features,
            network,
@@ -144,7 +153,7 @@ impl V6Reader {
        &mut self,
    ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
        Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
-            let task: Task = serde_json::from_str(&line?).unwrap();
+            let task: Task = serde_json::from_str(&line?)?;

            let update_file_path = self
                .dump
@@ -156,8 +165,7 @@ impl V6Reader {
            if update_file_path.exists() {
                Ok((
                    task,
-                    Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
-                        as Box<super::UpdateFile>),
+                    Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
                ))
            } else {
                Ok((task, None))
@@ -165,6 +173,16 @@ impl V6Reader {
        }))
    }

+    pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
+        match self.batches.as_mut() {
+            Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
+                let batch = serde_json::from_str(&line?)?;
+                Ok(batch)
+            })),
+            None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
+        }
+    }
+
    pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
        Box::new(
            (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
--- a/crates/dump/src/writer.rs
+++ b/crates/dump/src/writer.rs
@@ -4,6 +4,7 @@ use std::path::PathBuf;

 use flate2::write::GzEncoder;
 use flate2::Compression;
+use meilisearch_types::batches::Batch;
 use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
 use meilisearch_types::keys::Key;
 use meilisearch_types::settings::{Checked, Settings};
@@ -54,6 +55,10 @@ impl DumpWriter {
        TaskWriter::new(self.dir.path().join("tasks"))
    }

+    pub fn create_batches_queue(&self) -> Result<BatchWriter> {
+        BatchWriter::new(self.dir.path().join("batches"))
+    }
+
    pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
        Ok(std::fs::write(
            self.dir.path().join("experimental-features.json"),
@@ -88,7 +93,7 @@ impl KeyWriter {
    }

    pub fn push_key(&mut self, key: &Key) -> Result<()> {
-        self.keys.write_all(&serde_json::to_vec(key)?)?;
+        serde_json::to_writer(&mut self.keys, &key)?;
        self.keys.write_all(b"\n")?;
        Ok(())
    }
@@ -118,7 +123,7 @@ impl TaskWriter {
    /// Pushes tasks in the dump.
    /// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
    pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
-        self.queue.write_all(&serde_json::to_vec(task)?)?;
+        serde_json::to_writer(&mut self.queue, &task)?;
        self.queue.write_all(b"\n")?;

        Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
@@ -130,6 +135,30 @@ impl TaskWriter {
    }
 }

+pub struct BatchWriter {
+    queue: BufWriter<File>,
+}
+
+impl BatchWriter {
+    pub(crate) fn new(path: PathBuf) -> Result<Self> {
+        std::fs::create_dir(&path)?;
+        let queue = File::create(path.join("queue.jsonl"))?;
+        Ok(BatchWriter { queue: BufWriter::new(queue) })
+    }
+
+    /// Pushes batches in the dump.
+    pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
+        serde_json::to_writer(&mut self.queue, &batch)?;
+        self.queue.write_all(b"\n")?;
+        Ok(())
+    }
+
+    pub fn flush(mut self) -> Result<()> {
+        self.queue.flush()?;
+        Ok(())
+    }
+}
+
 pub struct UpdateFile {
    path: PathBuf,
    writer: Option<BufWriter<File>>,
@@ -141,8 +170,8 @@ impl UpdateFile {
    }

    pub fn push_document(&mut self, document: &Document) -> Result<()> {
-        if let Some(writer) = self.writer.as_mut() {
-            writer.write_all(&serde_json::to_vec(document)?)?;
+        if let Some(mut writer) = self.writer.as_mut() {
+            serde_json::to_writer(&mut writer, &document)?;
            writer.write_all(b"\n")?;
        } else {
            let file = File::create(&self.path).unwrap();
@@ -209,8 +238,8 @@ pub(crate) mod test {
    use super::*;
    use crate::reader::Document;
    use crate::test::{
-        create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
-        create_test_settings, create_test_tasks,
+        create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
+        create_test_instance_uid, create_test_settings, create_test_tasks,
    };

    fn create_directory_hierarchy(dir: &Path) -> String {
@@ -285,8 +314,10 @@ pub(crate) mod test {
        let dump_path = dump.path();

        // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
-        insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
+        insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
        .
+        ├---- batches/
+        │    └---- queue.jsonl
        ├---- indexes/
        │    └---- doggos/
        │    │    ├---- documents.jsonl
@@ -301,7 +332,7 @@ pub(crate) mod test {
        ├---- keys.jsonl
        ├---- metadata.json
        └---- network.json
-        "###);
+        ");

        // ==== checking the top level infos
        let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
@@ -354,6 +385,16 @@ pub(crate) mod test {
            }
        }

+        // ==== checking the batch queue
+        let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
+        for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
+            let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
+            if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
+                batch.details.settings = None;
+            }
+            assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
+        }
+
        // ==== checking the keys
        let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
        for (key, expected) in keys.lines().zip(create_test_api_keys()) {
--- a/crates/fuzzers/Cargo.toml
+++ b/crates/fuzzers/Cargo.toml
@@ -17,6 +17,7 @@ clap = { version = "4.5.24", features = ["derive"] }
 either = "1.13.0"
 fastrand = "2.3.0"
 milli = { path = "../milli" }
+scoped_thread_pool = { version = "0.1.0", path = "../../../../../../../dev/scoped_thread_pool" }
 serde = { version = "1.0.217", features = ["derive"] }
 serde_json = { version = "1.0.135", features = ["preserve_order"] }
 tempfile = "3.15.0"
--- a/crates/fuzzers/src/bin/fuzz-indexing.rs
+++ b/crates/fuzzers/src/bin/fuzz-indexing.rs
@@ -12,6 +12,7 @@ use milli::documents::mmap_from_objects;
 use milli::heed::EnvOpenOptions;
 use milli::progress::Progress;
 use milli::update::new::indexer;
+use milli::update::new::indexer::document_changes::CHUNK_SIZE;
 use milli::update::{IndexDocumentsMethod, IndexerConfig};
 use milli::vector::EmbeddingConfigs;
 use milli::Index;
@@ -121,6 +122,11 @@ fn main() {
                                }
                            }

+                            let thread_pool =
+                                scoped_thread_pool::ThreadPool::with_available_parallelism(
+                                    "index".into(),
+                                );
+
                            let (document_changes, _operation_stats, primary_key) = indexer
                                .into_changes(
                                    &indexer_alloc,
@@ -130,12 +136,15 @@ fn main() {
                                    &mut new_fields_ids_map,
                                    &|| false,
                                    Progress::default(),
+                                    &thread_pool,
+                                    CHUNK_SIZE,
                                )
                                .unwrap();

                            indexer::index(
                                &mut wtxn,
                                &index,
+                                &thread_pool,
                                &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                                indexer_config.grenad_parameters(),
                                &db_fields_ids_map,
--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@@ -28,6 +28,7 @@ memmap2 = "0.9.5"
 page_size = "0.6.0"
 rayon = "1.10.0"
 roaring = { version = "0.10.10", features = ["serde"] }
+scoped_thread_pool = { version = "0.1.0", path = "../../../../../../../dev/scoped_thread_pool" }
 serde = { version = "1.0.217", features = ["derive"] }
 serde_json = { version = "1.0.135", features = ["preserve_order"] }
 synchronoise = "1.0.1"
--- a/crates/index-scheduler/src/dump.rs
+++ b/crates/index-scheduler/src/dump.rs
@@ -2,6 +2,7 @@ use std::collections::HashMap;
 use std::io;

 use dump::{KindDump, TaskDump, UpdateFile};
+use meilisearch_types::batches::{Batch, BatchId};
 use meilisearch_types::heed::RwTxn;
 use meilisearch_types::milli;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
@@ -14,9 +15,15 @@ pub struct Dump<'a> {
    index_scheduler: &'a IndexScheduler,
    wtxn: RwTxn<'a>,

+    batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
+
    indexes: HashMap<String, RoaringBitmap>,
    statuses: HashMap<Status, RoaringBitmap>,
    kinds: HashMap<Kind, RoaringBitmap>,
+
+    batch_indexes: HashMap<String, RoaringBitmap>,
+    batch_statuses: HashMap<Status, RoaringBitmap>,
+    batch_kinds: HashMap<Kind, RoaringBitmap>,
 }

 impl<'a> Dump<'a> {
@@ -27,12 +34,72 @@ impl<'a> Dump<'a> {
        Ok(Dump {
            index_scheduler,
            wtxn,
+            batch_to_task_mapping: HashMap::new(),
            indexes: HashMap::new(),
            statuses: HashMap::new(),
            kinds: HashMap::new(),
+            batch_indexes: HashMap::new(),
+            batch_statuses: HashMap::new(),
+            batch_kinds: HashMap::new(),
        })
    }

+    /// Register a new batch coming from a dump in the scheduler.
+    /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
+    pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
+        self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
+        if let Some(enqueued_at) = batch.enqueued_at {
+            utils::insert_task_datetime(
+                &mut self.wtxn,
+                self.index_scheduler.queue.batches.enqueued_at,
+                enqueued_at.earliest,
+                batch.uid,
+            )?;
+            utils::insert_task_datetime(
+                &mut self.wtxn,
+                self.index_scheduler.queue.batches.enqueued_at,
+                enqueued_at.oldest,
+                batch.uid,
+            )?;
+        }
+        utils::insert_task_datetime(
+            &mut self.wtxn,
+            self.index_scheduler.queue.batches.started_at,
+            batch.started_at,
+            batch.uid,
+        )?;
+        if let Some(finished_at) = batch.finished_at {
+            utils::insert_task_datetime(
+                &mut self.wtxn,
+                self.index_scheduler.queue.batches.finished_at,
+                finished_at,
+                batch.uid,
+            )?;
+        }
+
+        for index in batch.stats.index_uids.keys() {
+            match self.batch_indexes.get_mut(index) {
+                Some(bitmap) => {
+                    bitmap.insert(batch.uid);
+                }
+                None => {
+                    let mut bitmap = RoaringBitmap::new();
+                    bitmap.insert(batch.uid);
+                    self.batch_indexes.insert(index.to_string(), bitmap);
+                }
+            };
+        }
+
+        for status in batch.stats.status.keys() {
+            self.batch_statuses.entry(*status).or_default().insert(batch.uid);
+        }
+        for kind in batch.stats.types.keys() {
+            self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
+        }
+
+        Ok(())
+    }
+
    /// Register a new task coming from a dump in the scheduler.
    /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
    pub fn register_dumped_task(
@@ -149,6 +216,9 @@ impl<'a> Dump<'a> {
        };

        self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
+        if let Some(batch_id) = task.batch_uid {
+            self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
+        }

        for index in task.indexes() {
            match self.indexes.get_mut(index) {
@@ -198,6 +268,14 @@ impl<'a> Dump<'a> {

    /// Commit all the changes and exit the importing dump state
    pub fn finish(mut self) -> Result<()> {
+        for (batch_id, task_ids) in self.batch_to_task_mapping {
+            self.index_scheduler.queue.batch_to_tasks_mapping.put(
+                &mut self.wtxn,
+                &batch_id,
+                &task_ids,
+            )?;
+        }
+
        for (index, bitmap) in self.indexes {
            self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
        }
@@ -208,6 +286,16 @@ impl<'a> Dump<'a> {
            self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
        }

+        for (index, bitmap) in self.batch_indexes {
+            self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
+        }
+        for (status, bitmap) in self.batch_statuses {
+            self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
+        }
+        for (kind, bitmap) in self.batch_kinds {
+            self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
+        }
+
        self.wtxn.commit()?;
        self.index_scheduler.scheduler.wake_up.signal();

--- a/crates/index-scheduler/src/error.rs
+++ b/crates/index-scheduler/src/error.rs
@@ -109,6 +109,8 @@ pub enum Error {
    InvalidIndexUid { index_uid: String },
    #[error("Task `{0}` not found.")]
    TaskNotFound(TaskId),
+    #[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
+    TaskFileNotFound(TaskId),
    #[error("Batch `{0}` not found.")]
    BatchNotFound(BatchId),
    #[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
@@ -189,6 +191,7 @@ impl Error {
            | Error::InvalidTaskCanceledBy { .. }
            | Error::InvalidIndexUid { .. }
            | Error::TaskNotFound(_)
+            | Error::TaskFileNotFound(_)
            | Error::BatchNotFound(_)
            | Error::TaskDeletionWithEmptyQuery
            | Error::TaskCancelationWithEmptyQuery
@@ -250,6 +253,7 @@ impl ErrorCode for Error {
            Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
            Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
            Error::TaskNotFound(_) => Code::TaskNotFound,
+            Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
            Error::BatchNotFound(_) => Code::BatchNotFound,
            Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
            Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
--- a/crates/index-scheduler/src/features.rs
+++ b/crates/index-scheduler/src/features.rs
@@ -105,6 +105,19 @@ impl RoFeatures {
            .into())
        }
    }
+
+    pub fn check_get_task_documents_route(&self) -> Result<()> {
+        if self.runtime.get_task_documents_route {
+            Ok(())
+        } else {
+            Err(FeatureNotEnabledError {
+                disabled_action: "Getting the documents of an enqueued task",
+                feature: "get task documents route",
+                issue_link: "https://github.com/orgs/meilisearch/discussions/808",
+            }
+            .into())
+        }
+    }
 }

 impl FeatureData {
--- a/crates/index-scheduler/src/index_mapper/mod.rs
+++ b/crates/index-scheduler/src/index_mapper/mod.rs
@@ -106,6 +106,12 @@ pub struct IndexStats {
    /// are not returned to the disk after a deletion, this number is typically larger than
    /// `used_database_size` that only includes the size of the used pages.
    pub database_size: u64,
+    /// Number of embeddings in the index.
+    /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
+    pub number_of_embeddings: Option<u64>,
+    /// Number of embedded documents in the index.
+    /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
+    pub number_of_embedded_documents: Option<u64>,
    /// Size taken by the used pages of the index' DB, in bytes.
    ///
    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
@@ -130,8 +136,11 @@ impl IndexStats {
    ///
    /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
    pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
+        let arroy_stats = index.arroy_stats(rtxn)?;
        Ok(IndexStats {
            number_of_documents: index.number_of_documents(rtxn)?,
+            number_of_embeddings: Some(arroy_stats.number_of_embeddings),
+            number_of_embedded_documents: Some(arroy_stats.documents.len()),
            database_size: index.on_disk_size()?,
            used_database_size: index.used_size()?,
            primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -33,7 +33,7 @@ mod test_utils;
 pub mod upgrade;
 mod utils;
 pub mod uuid_codec;
-mod versioning;
+pub mod versioning;

 pub type Result<T, E = Error> = std::result::Result<T, E>;
 pub type TaskId = u32;
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -96,6 +96,7 @@ make_enum_progress! {
        StartTheDumpCreation,
        DumpTheApiKeys,
        DumpTheTasks,
+        DumpTheBatches,
        DumpTheIndexes,
        DumpTheExperimentalFeatures,
        CompressTheDump,
--- a/crates/index-scheduler/src/queue/mod.rs
+++ b/crates/index-scheduler/src/queue/mod.rs
@@ -8,6 +8,7 @@ mod tasks_test;
 mod test;

 use std::collections::BTreeMap;
+use std::fs::File as StdFile;
 use std::time::Duration;

 use file_store::FileStore;
@@ -216,6 +217,11 @@ impl Queue {
        }
    }

+    /// Open and returns the task's content File.
+    pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
+        self.file_store.get_update(uuid)
+    }
+
    /// Delete a file from the index scheduler.
    ///
    /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.
--- a/crates/index-scheduler/src/scheduler/process_dump_creation.rs
+++ b/crates/index-scheduler/src/scheduler/process_dump_creation.rs
@@ -1,3 +1,4 @@
+use std::collections::BTreeMap;
 use std::fs::File;
 use std::io::BufWriter;
 use std::sync::atomic::Ordering;
@@ -11,7 +12,9 @@ use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
 use time::macros::format_description;
 use time::OffsetDateTime;

-use crate::processing::{AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress};
+use crate::processing::{
+    AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
+};
 use crate::{Error, IndexScheduler, Result};

 impl IndexScheduler {
@@ -102,7 +105,40 @@ impl IndexScheduler {
        }
        dump_tasks.flush()?;

-        // 3. Dump the indexes
+        // 3. dump the batches
+        progress.update_progress(DumpCreationProgress::DumpTheBatches);
+        let mut dump_batches = dump.create_batches_queue()?;
+
+        let (atomic_batch_progress, update_batch_progress) =
+            AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
+        progress.update_progress(update_batch_progress);
+
+        for ret in self.queue.batches.all_batches.iter(&rtxn)? {
+            if self.scheduler.must_stop_processing.get() {
+                return Err(Error::AbortedTask);
+            }
+
+            let (_, mut b) = ret?;
+            // In the case we're dumping ourselves we want to be marked as finished
+            // to not loop over ourselves indefinitely.
+            if b.uid == task.uid {
+                let finished_at = OffsetDateTime::now_utc();
+
+                // We're going to fake the date because we don't know if everything is going to go well.
+                // But we need to dump the task as finished and successful.
+                // If something fail everything will be set appropriately in the end.
+                let mut statuses = BTreeMap::new();
+                statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
+                b.stats.status = statuses;
+                b.finished_at = Some(finished_at);
+            }
+
+            dump_batches.push_batch(&b)?;
+            atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
+        }
+        dump_batches.flush()?;
+
+        // 4. Dump the indexes
        progress.update_progress(DumpCreationProgress::DumpTheIndexes);
        let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
        let mut count = 0;
@@ -142,7 +178,7 @@ impl IndexScheduler {
            let documents = index
                .all_documents(&rtxn)
                .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
-            // 3.1. Dump the documents
+            // 4.1. Dump the documents
            for ret in documents {
                if self.scheduler.must_stop_processing.get() {
                    return Err(Error::AbortedTask);
@@ -204,7 +240,7 @@ impl IndexScheduler {
                atomic.fetch_add(1, Ordering::Relaxed);
            }

-            // 3.2. Dump the settings
+            // 4.2. Dump the settings
            let settings = meilisearch_types::settings::settings(
                index,
                &rtxn,
@@ -215,7 +251,7 @@ impl IndexScheduler {
            Ok(())
        })?;

-        // 4. Dump experimental feature settings
+        // 5. Dump experimental feature settings
        progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
        let features = self.features().runtime_features();
        dump.create_experimental_features(features)?;
--- a/crates/index-scheduler/src/scheduler/process_index_operation.rs
+++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs
@@ -3,6 +3,7 @@ use bumpalo::Bump;
 use meilisearch_types::heed::RwTxn;
 use meilisearch_types::milli::documents::PrimaryKey;
 use meilisearch_types::milli::progress::Progress;
+use meilisearch_types::milli::update::new::indexer::document_changes::CHUNK_SIZE;
 use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
 use meilisearch_types::milli::update::DocumentAdditionResult;
 use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
@@ -112,17 +113,24 @@ impl IndexScheduler {

                let local_pool;
                let indexer_config = self.index_mapper.indexer_config();
-                let pool = match &indexer_config.thread_pool {
+                let pool = match &indexer_config.rayon_thread_pool {
                    Some(pool) => pool,
                    None => {
                        local_pool = ThreadPoolNoAbortBuilder::new()
-                            .thread_name(|i| format!("indexing-thread-{i}"))
+                            .thread_name(|i| format!("rayon-{i}"))
                            .build()
                            .unwrap();
                        &local_pool
                    }
                };

+                let thread_pool = match &indexer_config.thread_pool {
+                    Some(thread_pool) => thread_pool,
+                    None => {
+                        &scoped_thread_pool::ThreadPool::with_available_parallelism("index".into())
+                    }
+                };
+
                progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
                let (document_changes, operation_stats, primary_key) = indexer
                    .into_changes(
@@ -133,6 +141,8 @@ impl IndexScheduler {
                        &mut new_fields_ids_map,
                        &|| must_stop_processing.get(),
                        progress.clone(),
+                        thread_pool,
+                        CHUNK_SIZE,
                    )
                    .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;

@@ -173,6 +183,7 @@ impl IndexScheduler {
                    indexer::index(
                        index_wtxn,
                        index,
+                        thread_pool,
                        pool,
                        indexer_config.grenad_parameters(),
                        &db_fields_ids_map,
@@ -261,7 +272,7 @@ impl IndexScheduler {
                if task.error.is_none() {
                    let local_pool;
                    let indexer_config = self.index_mapper.indexer_config();
-                    let pool = match &indexer_config.thread_pool {
+                    let pool = match &indexer_config.rayon_thread_pool {
                        Some(pool) => pool,
                        None => {
                            local_pool = ThreadPoolNoAbortBuilder::new()
@@ -272,16 +283,19 @@ impl IndexScheduler {
                        }
                    };

+                    let thread_pool = match &indexer_config.thread_pool {
+                        Some(thread_pool) => thread_pool,
+                        None => &scoped_thread_pool::ThreadPool::with_available_parallelism(
+                            "index".into(),
+                        ),
+                    };
+
                    let candidates_count = candidates.len();
                    progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
                    let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
-                    let document_changes = pool
-                        .install(|| {
-                            indexer
-                                .into_changes(&primary_key)
-                                .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))
-                        })
-                        .unwrap()?;
+                    let document_changes = indexer
+                        .into_changes(&primary_key, &indexer_alloc, thread_pool, CHUNK_SIZE)
+                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
                    let embedders = index
                        .embedding_configs(index_wtxn)
                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
@@ -291,6 +305,7 @@ impl IndexScheduler {
                    indexer::index(
                        index_wtxn,
                        index,
+                        thread_pool,
                        pool,
                        indexer_config.grenad_parameters(),
                        &db_fields_ids_map,
@@ -421,7 +436,7 @@ impl IndexScheduler {
                if !tasks.iter().all(|res| res.error.is_some()) {
                    let local_pool;
                    let indexer_config = self.index_mapper.indexer_config();
-                    let pool = match &indexer_config.thread_pool {
+                    let pool = match &indexer_config.rayon_thread_pool {
                        Some(pool) => pool,
                        None => {
                            local_pool = ThreadPoolNoAbortBuilder::new()
@@ -432,11 +447,19 @@ impl IndexScheduler {
                        }
                    };

+                    let thread_pool = match &indexer_config.thread_pool {
+                        Some(thread_pool) => thread_pool,
+                        None => &scoped_thread_pool::ThreadPool::with_available_parallelism(
+                            "index".into(),
+                        ),
+                    };
+
                    progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
                    let mut indexer = indexer::DocumentDeletion::new();
                    let candidates_count = to_delete.len();
                    indexer.delete_documents_by_docids(to_delete);
-                    let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
+                    let document_changes =
+                        indexer.into_changes(&indexer_alloc, primary_key, thread_pool, CHUNK_SIZE);
                    let embedders = index
                        .embedding_configs(index_wtxn)
                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
@@ -446,6 +469,7 @@ impl IndexScheduler {
                    indexer::index(
                        index_wtxn,
                        index,
+                        thread_pool,
                        pool,
                        indexer_config.grenad_parameters(),
                        &db_fields_ids_map,
--- a/crates/index-scheduler/src/scheduler/test.rs
+++ b/crates/index-scheduler/src/scheduler/test.rs
@@ -903,7 +903,7 @@ fn create_and_list_index() {

    index_scheduler.index("kefir").unwrap();
    let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
-    snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#"
+    snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
    [
      1,
      [
@@ -912,6 +912,8 @@ fn create_and_list_index() {
          {
            "number_of_documents": 0,
            "database_size": "[bytes]",
+            "number_of_embeddings": 0,
+            "number_of_embedded_documents": 0,
            "used_database_size": "[bytes]",
            "primary_key": null,
            "field_distribution": {},
@@ -921,5 +923,5 @@ fn create_and_list_index() {
        ]
      ]
    ]
-    "#);
+    "###);
 }
--- a/crates/index-scheduler/src/scheduler/test_failure.rs
+++ b/crates/index-scheduler/src/scheduler/test_failure.rs
@@ -6,8 +6,7 @@ use meili_snap::snapshot;
 use meilisearch_types::milli::obkv_to_json;
 use meilisearch_types::milli::update::IndexDocumentsMethod::*;
 use meilisearch_types::milli::update::Setting;
-use meilisearch_types::tasks::Kind;
-use meilisearch_types::tasks::KindWithContent;
+use meilisearch_types::tasks::{Kind, KindWithContent};

 use crate::insta_snapshot::snapshot_index_scheduler;
 use crate::test_utils::Breakpoint::*;
--- a/crates/index-scheduler/src/versioning.rs
+++ b/crates/index-scheduler/src/versioning.rs
@@ -1,9 +1,10 @@
-use crate::{upgrade::upgrade_index_scheduler, Result};
-use meilisearch_types::{
-    heed::{types::Str, Database, Env, RoTxn, RwTxn},
-    milli::heed_codec::version::VersionCodec,
-    versioning,
-};
+use meilisearch_types::heed::types::Str;
+use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
+use meilisearch_types::milli::heed_codec::version::VersionCodec;
+use meilisearch_types::versioning;
+
+use crate::upgrade::upgrade_index_scheduler;
+use crate::Result;

 /// The number of database used by queue itself
 const NUMBER_OF_DATABASES: u32 = 1;
@@ -21,30 +22,38 @@ pub struct Versioning {
 }

 impl Versioning {
-    pub(crate) const fn nb_db() -> u32 {
+    pub const fn nb_db() -> u32 {
        NUMBER_OF_DATABASES
    }

-    pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>> {
-        Ok(self.version.get(rtxn, entry_name::MAIN)?)
+    pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>, heed::Error> {
+        self.version.get(rtxn, entry_name::MAIN)
    }

-    pub fn set_version(&self, wtxn: &mut RwTxn, version: (u32, u32, u32)) -> Result<()> {
-        Ok(self.version.put(wtxn, entry_name::MAIN, &version)?)
+    pub fn set_version(
+        &self,
+        wtxn: &mut RwTxn,
+        version: (u32, u32, u32),
+    ) -> Result<(), heed::Error> {
+        self.version.put(wtxn, entry_name::MAIN, &version)
    }

-    pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<()> {
+    pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<(), heed::Error> {
        let major = versioning::VERSION_MAJOR.parse().unwrap();
        let minor = versioning::VERSION_MINOR.parse().unwrap();
        let patch = versioning::VERSION_PATCH.parse().unwrap();
        self.set_version(wtxn, (major, minor, patch))
    }

-    /// Create an index scheduler and start its run loop.
+    /// Return `Self` without checking anything about the version
+    pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
+        let version = env.create_database(wtxn, Some(db_name::VERSION))?;
+        Ok(Self { version })
+    }
+
    pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> {
        let mut wtxn = env.write_txn()?;
-        let version = env.create_database(&mut wtxn, Some(db_name::VERSION))?;
-        let this = Self { version };
+        let this = Self::raw_new(env, &mut wtxn)?;
        let from = match this.get_version(&wtxn)? {
            Some(version) => version,
            // fresh DB: use the db version
--- a/crates/meilisearch-types/src/batches.rs
+++ b/crates/meilisearch-types/src/batches.rs
@@ -30,7 +30,21 @@ pub struct Batch {
    pub enqueued_at: Option<BatchEnqueuedAt>,
 }

-#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
+impl PartialEq for Batch {
+    fn eq(&self, other: &Self) -> bool {
+        let Self { uid, progress, details, stats, started_at, finished_at, enqueued_at } = self;
+
+        *uid == other.uid
+            && progress.is_none() == other.progress.is_none()
+            && details == &other.details
+            && stats == &other.stats
+            && started_at == &other.started_at
+            && finished_at == &other.finished_at
+            && enqueued_at == &other.enqueued_at
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
 pub struct BatchEnqueuedAt {
    #[serde(with = "time::serde::rfc3339")]
    pub earliest: OffsetDateTime,
@@ -38,7 +52,7 @@ pub struct BatchEnqueuedAt {
    pub oldest: OffsetDateTime,
 }

-#[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)]
+#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
 #[serde(rename_all = "camelCase")]
 #[schema(rename_all = "camelCase")]
 pub struct BatchStats {
--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@@ -372,6 +372,7 @@ RemoteRemoteError                     , System               , BAD_GATEWAY ;
 RemoteTimeout                         , System               , BAD_GATEWAY ;
 TooManySearchRequests                 , System               , SERVICE_UNAVAILABLE ;
 TaskNotFound                          , InvalidRequest       , NOT_FOUND ;
+TaskFileNotFound                      , InvalidRequest       , NOT_FOUND ;
 BatchNotFound                         , InvalidRequest       , NOT_FOUND ;
 TooManyOpenFiles                      , System               , UNPROCESSABLE_ENTITY ;
 TooManyVectors                        , InvalidRequest       , BAD_REQUEST ;
--- a/crates/meilisearch-types/src/features.rs
+++ b/crates/meilisearch-types/src/features.rs
@@ -10,6 +10,7 @@ pub struct RuntimeTogglableFeatures {
    pub edit_documents_by_function: bool,
    pub contains_filter: bool,
    pub network: bool,
+    pub get_task_documents_route: bool,
 }

 #[derive(Default, Debug, Clone, Copy)]
--- a/crates/meilisearch-types/src/versioning.rs
+++ b/crates/meilisearch-types/src/versioning.rs
@@ -1,7 +1,10 @@
 use std::fs;
-use std::io::{self, ErrorKind};
+use std::io::{ErrorKind, Write};
 use std::path::Path;

+use milli::heed;
+use tempfile::NamedTempFile;
+
 /// The name of the file that contains the version of the database.
 pub const VERSION_FILE_NAME: &str = "VERSION";

@@ -10,37 +13,7 @@ pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
 pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");

 /// Persists the version of the current Meilisearch binary to a VERSION file
-pub fn update_version_file_for_dumpless_upgrade(
-    db_path: &Path,
-    from: (u32, u32, u32),
-    to: (u32, u32, u32),
-) -> Result<(), VersionFileError> {
-    let (from_major, from_minor, from_patch) = from;
-    let (to_major, to_minor, to_patch) = to;
-
-    if from_major > to_major
-        || (from_major == to_major && from_minor > to_minor)
-        || (from_major == to_major && from_minor == to_minor && from_patch > to_patch)
-    {
-        Err(VersionFileError::DowngradeNotSupported {
-            major: from_major,
-            minor: from_minor,
-            patch: from_patch,
-        })
-    } else if from_major < 1 || (from_major == to_major && from_minor < 12) {
-        Err(VersionFileError::TooOldForAutomaticUpgrade {
-            major: from_major,
-            minor: from_minor,
-            patch: from_patch,
-        })
-    } else {
-        create_current_version_file(db_path)?;
-        Ok(())
-    }
-}
-
-/// Persists the version of the current Meilisearch binary to a VERSION file
-pub fn create_current_version_file(db_path: &Path) -> io::Result<()> {
+pub fn create_current_version_file(db_path: &Path) -> anyhow::Result<()> {
    create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
 }

@@ -49,9 +22,14 @@ pub fn create_version_file(
    major: &str,
    minor: &str,
    patch: &str,
-) -> io::Result<()> {
+) -> anyhow::Result<()> {
    let version_path = db_path.join(VERSION_FILE_NAME);
-    fs::write(version_path, format!("{}.{}.{}", major, minor, patch))
+    // In order to persist the file later we must create it in the `data.ms` and not in `/tmp`
+    let mut file = NamedTempFile::new_in(db_path)?;
+    file.write_all(format!("{}.{}.{}", major, minor, patch).as_bytes())?;
+    file.flush()?;
+    file.persist(version_path)?;
+    Ok(())
 }

 pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> {
@@ -61,7 +39,7 @@ pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError>
        Ok(version) => parse_version(&version),
        Err(error) => match error.kind() {
            ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile),
-            _ => Err(error.into()),
+            _ => Err(anyhow::Error::from(error).into()),
        },
    }
 }
@@ -112,7 +90,9 @@ pub enum VersionFileError {
    DowngradeNotSupported { major: u32, minor: u32, patch: u32 },
    #[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")]
    TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 },
+    #[error("Error while modifying the database: {0}")]
+    ErrorWhileModifyingTheDatabase(#[from] heed::Error),

    #[error(transparent)]
-    IoError(#[from] std::io::Error),
+    AnyhowError(#[from] anyhow::Error),
 }
--- a/crates/meilisearch/Cargo.toml
+++ b/crates/meilisearch/Cargo.toml
@@ -115,6 +115,7 @@ utoipa = { version = "5.3.1", features = [
    "openapi_extensions",
 ] }
 utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
+scoped_thread_pool = { version = "0.1.0", path = "../../../../../../../dev/scoped_thread_pool" }

 [dev-dependencies]
 actix-rt = "2.10.0"
--- a/crates/meilisearch/src/analytics/segment_analytics.rs
+++ b/crates/meilisearch/src/analytics/segment_analytics.rs
@@ -197,6 +197,7 @@ struct Infos {
    experimental_max_number_of_batched_tasks: usize,
    experimental_limit_batched_tasks_total_size: u64,
    experimental_network: bool,
+    experimental_get_task_documents_route: bool,
    gpu_enabled: bool,
    db_path: bool,
    import_dump: bool,
@@ -288,6 +289,7 @@ impl Infos {
            edit_documents_by_function,
            contains_filter,
            network,
+            get_task_documents_route,
        } = features;

        // We're going to override every sensible information.
@@ -306,6 +308,7 @@ impl Infos {
            experimental_enable_logs_route: experimental_enable_logs_route | logs_route,
            experimental_reduce_indexing_memory_usage,
            experimental_network: network,
+            experimental_get_task_documents_route: get_task_documents_route,
            gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
            db_path: db_path != PathBuf::from("./data.ms"),
            import_dump: import_dump.is_some(),
--- a/crates/meilisearch/src/lib.rs
+++ b/crates/meilisearch/src/lib.rs
@@ -32,6 +32,7 @@ use analytics::Analytics;
 use anyhow::bail;
 use error::PayloadError;
 use extractors::payload::PayloadConfig;
+use index_scheduler::versioning::Versioning;
 use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
 use meilisearch_auth::AuthController;
 use meilisearch_types::milli::constants::VERSION_MAJOR;
@@ -40,10 +41,9 @@ use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMetho
 use meilisearch_types::settings::apply_settings_to_builder;
 use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::versioning::{
-    create_current_version_file, get_version, update_version_file_for_dumpless_upgrade,
-    VersionFileError, VERSION_MINOR, VERSION_PATCH,
+    create_current_version_file, get_version, VersionFileError, VERSION_MINOR, VERSION_PATCH,
 };
-use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
+use meilisearch_types::{compression, heed, milli, VERSION_FILE_NAME};
 pub use option::Opt;
 use option::ScheduleSnapshot;
 use search_queue::SearchQueue;
@@ -356,14 +356,19 @@ fn open_or_create_database_unchecked(

 /// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch.
 /// Returns the version that was contained in the version file
-fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(u32, u32, u32)> {
+fn check_version(
+    opt: &Opt,
+    index_scheduler_opt: &IndexSchedulerOptions,
+    binary_version: (u32, u32, u32),
+) -> anyhow::Result<(u32, u32, u32)> {
    let (bin_major, bin_minor, bin_patch) = binary_version;
    let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?;

    if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch {
        if opt.experimental_dumpless_upgrade {
            update_version_file_for_dumpless_upgrade(
-                &opt.db_path,
+                opt,
+                index_scheduler_opt,
                (db_major, db_minor, db_patch),
                (bin_major, bin_minor, bin_patch),
            )?;
@@ -380,6 +385,57 @@ fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(
    Ok((db_major, db_minor, db_patch))
 }

+/// Persists the version of the current Meilisearch binary to a VERSION file
+pub fn update_version_file_for_dumpless_upgrade(
+    opt: &Opt,
+    index_scheduler_opt: &IndexSchedulerOptions,
+    from: (u32, u32, u32),
+    to: (u32, u32, u32),
+) -> Result<(), VersionFileError> {
+    let (from_major, from_minor, from_patch) = from;
+    let (to_major, to_minor, to_patch) = to;
+
+    // Early exit in case of error
+    if from_major > to_major
+        || (from_major == to_major && from_minor > to_minor)
+        || (from_major == to_major && from_minor == to_minor && from_patch > to_patch)
+    {
+        return Err(VersionFileError::DowngradeNotSupported {
+            major: from_major,
+            minor: from_minor,
+            patch: from_patch,
+        });
+    } else if from_major < 1 || (from_major == to_major && from_minor < 12) {
+        return Err(VersionFileError::TooOldForAutomaticUpgrade {
+            major: from_major,
+            minor: from_minor,
+            patch: from_patch,
+        });
+    }
+
+    // In the case of v1.12, the index-scheduler didn't store its internal version at the time.
+    // => We must write it immediately **in the index-scheduler** otherwise we'll update the version file
+    //    there is a risk of DB corruption if a restart happens after writing the version file but before
+    //    writing the version in the index-scheduler. See <https://github.com/meilisearch/meilisearch/issues/5280>
+    if from_major == 1 && from_minor == 12 {
+        let env = unsafe {
+            heed::EnvOpenOptions::new()
+                .max_dbs(Versioning::nb_db())
+                .map_size(index_scheduler_opt.task_db_size)
+                .open(&index_scheduler_opt.tasks_path)
+        }?;
+        let mut wtxn = env.write_txn()?;
+        let versioning = Versioning::raw_new(&env, &mut wtxn)?;
+        versioning.set_version(&mut wtxn, (from_major, from_minor, from_patch))?;
+        wtxn.commit()?;
+        // Should be instant since we're the only one using the env
+        env.prepare_for_closing().wait();
+    }
+
+    create_current_version_file(&opt.db_path)?;
+    Ok(())
+}
+
 /// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
 fn open_or_create_database(
    opt: &Opt,
@@ -387,7 +443,11 @@ fn open_or_create_database(
    empty_db: bool,
    binary_version: (u32, u32, u32),
 ) -> anyhow::Result<(IndexScheduler, AuthController)> {
-    let version = if !empty_db { check_version(opt, binary_version)? } else { binary_version };
+    let version = if !empty_db {
+        check_version(opt, &index_scheduler_opt, binary_version)?
+    } else {
+        binary_version
+    };

    open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
 }
@@ -511,9 +571,15 @@ fn import_dump(
        index_scheduler.refresh_index_stats(&uid)?;
    }

+    // 5. Import the queue
    let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
+    // 5.1. Import the batches
+    for ret in dump_reader.batches()? {
+        let batch = ret?;
+        index_scheduler_dump.register_dumped_batch(batch)?;
+    }

-    // 5. Import the tasks.
+    // 5.2. Import the tasks
    for ret in dump_reader.tasks()? {
        let (task, file) = ret?;
        index_scheduler_dump.register_dumped_task(task, file)?;
--- a/crates/meilisearch/src/option.rs
+++ b/crates/meilisearch/src/option.rs
@@ -743,15 +743,21 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
    type Error = anyhow::Error;

    fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
-        let thread_pool = ThreadPoolNoAbortBuilder::new()
-            .thread_name(|index| format!("indexing-thread:{index}"))
+        let rayon_thread_pool = ThreadPoolNoAbortBuilder::new()
+            .thread_name(|index| format!("rayon-{index}"))
            .num_threads(*other.max_indexing_threads)
            .build()?;

+        let thread_pool = Some(scoped_thread_pool::ThreadPool::new(
+            NonZeroUsize::new(*other.max_indexing_threads).unwrap_or(NonZeroUsize::new(1).unwrap()),
+            "index".to_string(),
+        ));
+
        Ok(Self {
            log_every_n: Some(DEFAULT_LOG_EVERY_N),
            max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
-            thread_pool: Some(thread_pool),
+            rayon_thread_pool: Some(rayon_thread_pool),
+            thread_pool,
            max_positions_per_attributes: None,
            skip_index_budget: other.skip_index_budget,
            ..Default::default()
--- a/crates/meilisearch/src/routes/features.rs
+++ b/crates/meilisearch/src/routes/features.rs
@@ -51,6 +51,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
            edit_documents_by_function: Some(false),
            contains_filter: Some(false),
            network: Some(false),
+            get_task_documents_route: Some(false),
        })),
        (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
            {
@@ -91,6 +92,8 @@ pub struct RuntimeTogglableFeatures {
    pub contains_filter: Option<bool>,
    #[deserr(default)]
    pub network: Option<bool>,
+    #[deserr(default)]
+    pub get_task_documents_route: Option<bool>,
 }

 impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@@ -101,6 +104,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
            edit_documents_by_function,
            contains_filter,
            network,
+            get_task_documents_route,
        } = value;

        Self {
@@ -109,6 +113,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
            edit_documents_by_function: Some(edit_documents_by_function),
            contains_filter: Some(contains_filter),
            network: Some(network),
+            get_task_documents_route: Some(get_task_documents_route),
        }
    }
 }
@@ -120,6 +125,7 @@ pub struct PatchExperimentalFeatureAnalytics {
    edit_documents_by_function: bool,
    contains_filter: bool,
    network: bool,
+    get_task_documents_route: bool,
 }

 impl Aggregate for PatchExperimentalFeatureAnalytics {
@@ -134,6 +140,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
            edit_documents_by_function: new.edit_documents_by_function,
            contains_filter: new.contains_filter,
            network: new.network,
+            get_task_documents_route: new.get_task_documents_route,
        })
    }

@@ -157,6 +164,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
            edit_documents_by_function: Some(false),
            contains_filter: Some(false),
            network: Some(false),
+            get_task_documents_route: Some(false),
         })),
        (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
            {
@@ -190,6 +198,10 @@ async fn patch_features(
            .unwrap_or(old_features.edit_documents_by_function),
        contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
        network: new_features.0.network.unwrap_or(old_features.network),
+        get_task_documents_route: new_features
+            .0
+            .get_task_documents_route
+            .unwrap_or(old_features.get_task_documents_route),
    };

    // explicitly destructure for analytics rather than using the `Serialize` implementation, because
@@ -201,6 +213,7 @@ async fn patch_features(
        edit_documents_by_function,
        contains_filter,
        network,
+        get_task_documents_route,
    } = new_features;

    analytics.publish(
@@ -210,6 +223,7 @@ async fn patch_features(
            edit_documents_by_function,
            contains_filter,
            network,
+            get_task_documents_route,
        },
        &req,
    );
--- a/crates/meilisearch/src/routes/indexes/mod.rs
+++ b/crates/meilisearch/src/routes/indexes/mod.rs
@@ -496,6 +496,12 @@ pub struct IndexStats {
    pub number_of_documents: u64,
    /// Whether or not the index is currently ingesting document
    pub is_indexing: bool,
+    /// Number of embeddings in the index
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub number_of_embeddings: Option<u64>,
+    /// Number of embedded documents in the index
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub number_of_embedded_documents: Option<u64>,
    /// Association of every field name with the number of times it occurs in the documents.
    #[schema(value_type = HashMap<String, u64>)]
    pub field_distribution: FieldDistribution,
@@ -506,6 +512,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
        IndexStats {
            number_of_documents: stats.inner_stats.number_of_documents,
            is_indexing: stats.is_indexing,
+            number_of_embeddings: stats.inner_stats.number_of_embeddings,
+            number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents,
            field_distribution: stats.inner_stats.field_distribution,
        }
    }
@@ -524,6 +532,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
        (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
            {
                "numberOfDocuments": 10,
+                "numberOfEmbeddings": 10,
+                "numberOfEmbeddedDocuments": 10,
                "isIndexing": true,
                "fieldDistribution": {
                    "genre": 10,
--- a/crates/meilisearch/src/routes/tasks.rs
+++ b/crates/meilisearch/src/routes/tasks.rs
@@ -1,3 +1,5 @@
+use std::io::ErrorKind;
+
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::AwebQueryParameter;
@@ -16,6 +18,7 @@ use serde::Serialize;
 use time::format_description::well_known::Rfc3339;
 use time::macros::format_description;
 use time::{Date, Duration, OffsetDateTime, Time};
+use tokio::io::AsyncReadExt;
 use tokio::task;
 use utoipa::{IntoParams, OpenApi, ToSchema};

@@ -44,7 +47,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
            .route(web::delete().to(SeqHandler(delete_tasks))),
    )
    .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
-    .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
+    .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))))
+    .service(
+        web::resource("/{task_id}/documents")
+            .route(web::get().to(SeqHandler(get_task_documents_file))),
+    );
 }

 #[derive(Debug, Deserr, IntoParams)]
@@ -639,6 +646,76 @@ async fn get_task(
    }
 }

+/// Get a task's documents.
+///
+/// Get a [task's documents file](https://www.meilisearch.com/docs/learn/async/asynchronous_operations).
+#[utoipa::path(
+    get,
+    path = "/{taskUid}/documents",
+    tag = "Tasks",
+    security(("Bearer" = ["tasks.get", "tasks.*", "*"])),
+    params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)),
+    responses(
+        (status = 200, description = "The content of the task update", body = serde_json::Value, content_type = "application/x-ndjson"),
+        (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
+            {
+                "message": "The Authorization header is missing. It must use the bearer authorization method.",
+                "code": "missing_authorization_header",
+                "type": "auth",
+                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
+            }
+        )),
+        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+            {
+                "message": "Task :taskUid not found.",
+                "code": "task_not_found",
+                "type": "invalid_request",
+                "link": "https://docs.meilisearch.com/errors/#task_not_found"
+            }
+        ))
+    )
+)]
+async fn get_task_documents_file(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
+    task_uid: web::Path<String>,
+) -> Result<HttpResponse, ResponseError> {
+    index_scheduler.features().check_get_task_documents_route()?;
+    let task_uid_string = task_uid.into_inner();
+
+    let task_uid: TaskId = match task_uid_string.parse() {
+        Ok(id) => id,
+        Err(_e) => {
+            return Err(index_scheduler::Error::InvalidTaskUid { task_uid: task_uid_string }.into())
+        }
+    };
+
+    let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
+    let filters = index_scheduler.filters();
+    let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?;
+
+    if let Some(task) = tasks.first() {
+        match task.content_uuid() {
+            Some(uuid) => {
+                let mut tfile = match index_scheduler.queue.update_file(uuid) {
+                    Ok(file) => tokio::fs::File::from_std(file),
+                    Err(file_store::Error::IoError(e)) if e.kind() == ErrorKind::NotFound => {
+                        return Err(index_scheduler::Error::TaskFileNotFound(task_uid).into())
+                    }
+                    Err(e) => return Err(e.into()),
+                };
+                // Yes, that's awful to put everything in memory when we could have streamed it from
+                // disk but it's really (really) complex to do with the current state of async Rust.
+                let mut content = String::new();
+                tfile.read_to_string(&mut content).await?;
+                Ok(HttpResponse::Ok().content_type("application/x-ndjson").body(content))
+            }
+            None => Err(index_scheduler::Error::TaskFileNotFound(task_uid).into()),
+        }
+    } else {
+        Err(index_scheduler::Error::TaskNotFound(task_uid).into())
+    }
+}
+
 pub enum DeserializeDateOption {
    Before,
    After,
--- a/crates/meilisearch/src/search/federated/perform.rs
+++ b/crates/meilisearch/src/search/federated/perform.rs
@@ -95,12 +95,16 @@ pub async fn perform_federated_search(
        facet_order,
    } = search_by_index;

+    let before_waiting_remote_results = std::time::Instant::now();
+
    // 2.3. Wait for proxy search requests to complete
    let (mut remote_results, remote_errors) = remote_search.finish().await;

+    let after_waiting_remote_results = std::time::Instant::now();
+
    // 3. merge hits and metadata across indexes and hosts
    // 3.1. merge metadata
-    let (estimated_total_hits, degraded, used_negative_operator, facets) =
+    let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) =
        merge_metadata(&mut results_by_index, &remote_results);

    // 3.2. merge hits
@@ -122,9 +126,15 @@ pub async fn perform_federated_search(
    let (facet_distribution, facet_stats, facets_by_index) =
        facet_order.merge(federation.merge_facets, remote_results, facets);

+    let after_merge = std::time::Instant::now();
+
+    let local_duration = (before_waiting_remote_results - before_search)
+        + (after_merge - after_waiting_remote_results);
+    let max_duration = Duration::max(local_duration, max_remote_duration);
+
    Ok(FederatedSearchResult {
        hits: merged_hits,
-        processing_time_ms: before_search.elapsed().as_millis(),
+        processing_time_ms: max_duration.as_millis(),
        hits_info: HitsInfo::OffsetLimit {
            limit: federation.limit,
            offset: federation.offset,
@@ -370,11 +380,12 @@ struct SearchResultByIndex {
 fn merge_metadata(
    results_by_index: &mut Vec<SearchResultByIndex>,
    remote_results: &Vec<FederatedSearchResult>,
-) -> (usize, bool, bool, FederatedFacets) {
+) -> (usize, bool, bool, FederatedFacets, Duration) {
    let mut estimated_total_hits = 0;
    let mut degraded = false;
    let mut used_negative_operator = false;
    let mut facets: FederatedFacets = FederatedFacets::default();
+    let mut max_remote_duration = Duration::ZERO;
    for SearchResultByIndex {
        index,
        hits: _,
@@ -395,7 +406,7 @@ fn merge_metadata(
    }
    for FederatedSearchResult {
        hits: _,
-        processing_time_ms: _,
+        processing_time_ms,
        hits_info,
        semantic_hit_count: _,
        facet_distribution: _,
@@ -406,6 +417,8 @@ fn merge_metadata(
        remote_errors: _,
    } in remote_results
    {
+        let this_remote_duration = Duration::from_millis(*processing_time_ms as u64);
+        max_remote_duration = Duration::max(this_remote_duration, max_remote_duration);
        estimated_total_hits += match hits_info {
            HitsInfo::Pagination { total_hits: estimated_total_hits, .. }
            | HitsInfo::OffsetLimit { estimated_total_hits, .. } => estimated_total_hits,
@@ -415,7 +428,7 @@ fn merge_metadata(
        degraded |= degraded_for_host;
        used_negative_operator |= host_used_negative_operator;
    }
-    (estimated_total_hits, degraded, used_negative_operator, facets)
+    (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration)
 }

 type LocalQueriesByIndex = BTreeMap<String, Vec<QueryByIndex>>;
--- a/crates/meilisearch/tests/assets/v6_v1.13.0_batches_and_enqueued_tasks.dump
+++ b/crates/meilisearch/tests/assets/v6_v1.13.0_batches_and_enqueued_tasks.dump
--- a/crates/meilisearch/tests/common/server.rs
+++ b/crates/meilisearch/tests/common/server.rs
@@ -163,6 +163,10 @@ impl Server<Owned> {
        self.service.get("/tasks").await
    }

+    pub async fn batches(&self) -> (Value, StatusCode) {
+        self.service.get("/batches").await
+    }
+
    pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
        self.service.patch("/experimental-features", value).await
    }
--- a/crates/meilisearch/tests/documents/add_documents.rs
+++ b/crates/meilisearch/tests/documents/add_documents.rs
@@ -1803,6 +1803,275 @@ async fn add_documents_with_geo_field() {
      "finishedAt": "[date]"
    }
    "###);
+
+    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+    @r###"
+    {
+      "results": [
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          }
+        },
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          }
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 4
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
+        .await;
+    snapshot!(code, @"200 OK");
+    // we are expecting docs 4 and 3 first as they have geo
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
+    @r###"
+    {
+      "hits": [
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn update_documents_with_geo_field() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
+
+    let documents = json!([
+        {
+            "id": "1",
+        },
+        {
+            "id": "2",
+            "_geo": null,
+        },
+        {
+            "id": "3",
+            "_geo": { "lat": 1, "lng": 1 },
+        },
+        {
+            "id": "4",
+            "_geo": { "lat": "1", "lng": "1" },
+        },
+    ]);
+
+    let (task, _status_code) = index.add_documents(documents, None).await;
+    let response = index.wait_task(task.uid()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+        @r###"
+    {
+      "uid": 1,
+      "batchUid": 1,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 4,
+        "indexedDocuments": 4
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
+        .await;
+    snapshot!(code, @"200 OK");
+    // we are expecting docs 4 and 3 first as they have geo
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
+    @r###"
+    {
+      "hits": [
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "###);
+
+    let updated_documents = json!([{
+      "id": "3",
+      "doggo": "kefir",
+    }]);
+    let (task, _status_code) = index.update_documents(updated_documents, None).await;
+    let response = index.wait_task(task.uid()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+        @r###"
+    {
+      "uid": 2,
+      "batchUid": 2,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+    @r###"
+    {
+      "results": [
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "doggo": "kefir"
+        },
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          }
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 4
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
+        .await;
+    snapshot!(code, @"200 OK");
+    // the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
+    @r###"
+    {
+      "hits": [
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "doggo": "kefir",
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "###);
 }

 #[actix_rt::test]
--- a/crates/meilisearch/tests/documents/delete_documents.rs
+++ b/crates/meilisearch/tests/documents/delete_documents.rs
@@ -161,6 +161,8 @@ async fn delete_document_by_filter() {
    {
      "numberOfDocuments": 4,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 3,
        "id": 4
@@ -208,6 +210,8 @@ async fn delete_document_by_filter() {
    {
      "numberOfDocuments": 2,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 1,
        "id": 2
@@ -274,6 +278,8 @@ async fn delete_document_by_filter() {
    {
      "numberOfDocuments": 1,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 1,
        "id": 1
--- a/crates/meilisearch/tests/dumps/data.rs
+++ b/crates/meilisearch/tests/dumps/data.rs
@@ -22,6 +22,7 @@ pub enum GetDump {
    TestV5,

    TestV6WithExperimental,
+    TestV6WithBatchesAndEnqueuedTasks,
 }

 impl GetDump {
@@ -74,6 +75,10 @@ impl GetDump {
                "tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
            )
            .into(),
+            GetDump::TestV6WithBatchesAndEnqueuedTasks => {
+                exist_relative_path!("tests/assets/v6_v1.13.0_batches_and_enqueued_tasks.dump")
+                    .into()
+            }
        }
    }
 }
--- a/crates/meilisearch/tests/dumps/mod.rs
+++ b/crates/meilisearch/tests/dumps/mod.rs
@@ -27,9 +27,24 @@ async fn import_dump_v1_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -173,6 +188,8 @@ async fn import_dump_v1_movie_with_settings() {
    {
      "numberOfDocuments": 53,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "genres": 53,
        "id": 53,
@@ -333,9 +350,24 @@ async fn import_dump_v1_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -483,9 +515,24 @@ async fn import_dump_v2_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -623,9 +670,24 @@ async fn import_dump_v2_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -773,9 +835,24 @@ async fn import_dump_v2_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -920,9 +997,24 @@ async fn import_dump_v3_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1060,9 +1152,24 @@ async fn import_dump_v3_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1210,9 +1317,24 @@ async fn import_dump_v3_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1357,9 +1479,24 @@ async fn import_dump_v4_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1497,9 +1634,24 @@ async fn import_dump_v4_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1647,9 +1799,24 @@ async fn import_dump_v4_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1798,33 +1965,35 @@ async fn import_dump_v5() {
        server.wait_task(task["uid"].as_u64().unwrap()).await;
    }

-    let expected_stats = json!({
-        "numberOfDocuments": 10,
-        "isIndexing": false,
-        "fieldDistribution": {
-            "cast": 10,
-            "director": 10,
-            "genres": 10,
-            "id": 10,
-            "overview": 10,
-            "popularity": 10,
-            "poster_path": 10,
-            "producer": 10,
-            "production_companies": 10,
-            "release_date": 10,
-            "tagline": 10,
-            "title": 10,
-            "vote_average": 10,
-            "vote_count": 10
-        }
-    });
-
    let index1 = server.index("test");
    let index2 = server.index("test2");

    let (stats, code) = index1.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(stats, expected_stats);
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 10,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "cast": 10,
+        "director": 10,
+        "genres": 10,
+        "id": 10,
+        "overview": 10,
+        "popularity": 10,
+        "poster_path": 10,
+        "producer": 10,
+        "production_companies": 10,
+        "release_date": 10,
+        "tagline": 10,
+        "title": 10,
+        "vote_average": 10,
+        "vote_count": 10
+      }
+    }
+    "###);

    let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await;
    snapshot!(code, @"200 OK");
@@ -1835,7 +2004,32 @@ async fn import_dump_v5() {

    let (stats, code) = index2.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(stats, expected_stats);
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 10,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "cast": 10,
+        "director": 10,
+        "genres": 10,
+        "id": 10,
+        "overview": 10,
+        "popularity": 10,
+        "poster_path": 10,
+        "producer": 10,
+        "production_companies": 10,
+        "release_date": 10,
+        "tagline": 10,
+        "title": 10,
+        "vote_average": 10,
+        "vote_count": 10
+      }
+    }
+    "###);

    let (keys, code) = server.list_api_keys("").await;
    snapshot!(code, @"200 OK");
@@ -1909,7 +2103,8 @@ async fn import_dump_v6_containing_experimental_features() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);

@@ -1993,6 +2188,63 @@ async fn import_dump_v6_containing_experimental_features() {
        .await;
 }

+#[actix_rt::test]
+async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
+    let temp = tempfile::tempdir().unwrap();
+
+    let options = Opt {
+        import_dump: Some(GetDump::TestV6WithBatchesAndEnqueuedTasks.path()),
+        ..default_settings(temp.path())
+    };
+    let mut server = Server::new_auth_with_options(options, temp).await;
+    server.use_api_key("MASTER_KEY");
+    server.wait_task(2).await.succeeded();
+    let (tasks, _) = server.tasks().await;
+    snapshot!(json_string!(tasks, { ".results[1].startedAt" => "[date]", ".results[1].finishedAt" => "[date]", ".results[1].duration" => "[date]" }), name: "tasks");
+    let (batches, _) = server.batches().await;
+    snapshot!(json_string!(batches, { ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].duration" => "[date]" }), name: "batches");
+
+    let (indexes, code) = server.list_indexes(None, None).await;
+    assert_eq!(code, 200, "{indexes}");
+
+    assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
+    assert_eq!(indexes["results"][0]["uid"], json!("kefir"));
+    assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
+
+    let (response, code) = server.get_features().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "metrics": false,
+      "logsRoute": false,
+      "editDocumentsByFunction": false,
+      "containsFilter": false,
+      "network": false,
+      "getTaskDocumentsRoute": false
+    }
+    "###);
+
+    let index = server.index("kefir");
+    let (documents, _) = index.get_all_documents_raw("").await;
+    snapshot!(documents, @r#"
+    {
+      "results": [
+        {
+          "id": 1,
+          "dog": "kefir"
+        },
+        {
+          "id": 2,
+          "dog": "intel"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "#);
+}
+
 // In this test we must generate the dump ourselves to ensure the
 // `user provided` vectors are well set
 #[actix_rt::test]
@@ -2071,7 +2323,8 @@ async fn generate_and_import_dump_containing_vectors() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);

--- a/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/batches.snap
+++ b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/batches.snap
@@ -0,0 +1,78 @@
+---
+source: crates/meilisearch/tests/dumps/mod.rs
+snapshot_kind: text
+---
+{
+  "results": [
+    {
+      "uid": 2,
+      "progress": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "stats": {
+        "totalNbTasks": 1,
+        "status": {
+          "succeeded": 1
+        },
+        "types": {
+          "documentAdditionOrUpdate": 1
+        },
+        "indexUids": {
+          "kefir": 1
+        }
+      },
+      "duration": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    },
+    {
+      "uid": 1,
+      "progress": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "stats": {
+        "totalNbTasks": 1,
+        "status": {
+          "succeeded": 1
+        },
+        "types": {
+          "documentAdditionOrUpdate": 1
+        },
+        "indexUids": {
+          "kefir": 1
+        }
+      },
+      "duration": "PT0.144827890S",
+      "startedAt": "2025-02-04T10:15:21.275640274Z",
+      "finishedAt": "2025-02-04T10:15:21.420468164Z"
+    },
+    {
+      "uid": 0,
+      "progress": null,
+      "details": {},
+      "stats": {
+        "totalNbTasks": 1,
+        "status": {
+          "succeeded": 1
+        },
+        "types": {
+          "indexCreation": 1
+        },
+        "indexUids": {
+          "kefir": 1
+        }
+      },
+      "duration": "PT0.032902186S",
+      "startedAt": "2025-02-04T10:14:43.559526162Z",
+      "finishedAt": "2025-02-04T10:14:43.592428348Z"
+    }
+  ],
+  "total": 3,
+  "limit": 20,
+  "from": 2,
+  "next": null
+}
--- a/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/tasks.snap
+++ b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/tasks.snap
@@ -0,0 +1,78 @@
+---
+source: crates/meilisearch/tests/dumps/mod.rs
+snapshot_kind: text
+---
+{
+  "results": [
+    {
+      "uid": 3,
+      "batchUid": null,
+      "indexUid": null,
+      "status": "succeeded",
+      "type": "dumpCreation",
+      "canceledBy": null,
+      "details": {
+        "dumpUid": null
+      },
+      "error": null,
+      "duration": "PT0.000629059S",
+      "enqueuedAt": "2025-02-04T10:22:31.318175268Z",
+      "startedAt": "2025-02-04T10:22:31.331701375Z",
+      "finishedAt": "2025-02-04T10:22:31.332330434Z"
+    },
+    {
+      "uid": 2,
+      "batchUid": 2,
+      "indexUid": "kefir",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "[date]",
+      "enqueuedAt": "2025-02-04T10:15:49.212484063Z",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    },
+    {
+      "uid": 1,
+      "batchUid": null,
+      "indexUid": "kefir",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "PT0.144827890S",
+      "enqueuedAt": "2025-02-04T10:15:21.258630973Z",
+      "startedAt": "2025-02-04T10:15:21.275640274Z",
+      "finishedAt": "2025-02-04T10:15:21.420468164Z"
+    },
+    {
+      "uid": 0,
+      "batchUid": null,
+      "indexUid": "kefir",
+      "status": "succeeded",
+      "type": "indexCreation",
+      "canceledBy": null,
+      "details": {
+        "primaryKey": null
+      },
+      "error": null,
+      "duration": "PT0.032902186S",
+      "enqueuedAt": "2025-02-04T10:14:43.550379968Z",
+      "startedAt": "2025-02-04T10:14:43.559526162Z",
+      "finishedAt": "2025-02-04T10:14:43.592428348Z"
+    }
+  ],
+  "total": 4,
+  "limit": 20,
+  "from": 3,
+  "next": null
+}
--- a/crates/meilisearch/tests/features/mod.rs
+++ b/crates/meilisearch/tests/features/mod.rs
@@ -22,7 +22,8 @@ async fn experimental_features() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);

@@ -35,7 +36,8 @@ async fn experimental_features() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);

@@ -48,7 +50,8 @@ async fn experimental_features() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);

@@ -62,7 +65,8 @@ async fn experimental_features() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);

@@ -76,7 +80,8 @@ async fn experimental_features() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);
 }
@@ -97,7 +102,8 @@ async fn experimental_feature_metrics() {
      "logsRoute": false,
      "editDocumentsByFunction": false,
      "containsFilter": false,
-      "network": false
+      "network": false,
+      "getTaskDocumentsRoute": false
    }
    "###);

@@ -152,7 +158,7 @@ async fn errors() {
    meili_snap::snapshot!(code, @"400 Bad Request");
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
-      "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`",
+      "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
      "code": "bad_request",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#bad_request"
--- a/crates/meilisearch/tests/stats/mod.rs
+++ b/crates/meilisearch/tests/stats/mod.rs
@@ -1,3 +1,4 @@
+use meili_snap::{json_string, snapshot};
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;

@@ -74,3 +75,253 @@ async fn stats() {
    assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
    assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
 }
+
+#[actix_rt::test]
+async fn add_remove_embeddings() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    // 2 embedded documents for 5 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 5,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 2 embedded documents for 3 embeddings in total
+    let documents = json!([
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 2 embedded documents for 2 embeddings in total
+    let documents = json!([
+        {"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 2,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 1 embedded documents for 2 embeddings in total
+    let documents = json!([
+        {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+        {"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 2,
+      "numberOfEmbeddedDocuments": 1,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn add_remove_embedded_documents() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    // 2 embedded documents for 5 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 5,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
+    let (response, code) = index.delete_document(0).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 1,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 1,
+      "fieldDistribution": {
+        "id": 1,
+        "name": 1
+      }
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn update_embedder_settings() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    // 2 embedded documents for 3 embeddings in total
+    // but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // add embedders to the settings
+    // 2 embedded documents for 3 embeddings in total
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+}
--- a/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
+++ b/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
@@ -135,6 +135,8 @@ async fn check_the_index_scheduler(server: &Server) {
        "kefir": {
          "numberOfDocuments": 1,
          "isIndexing": false,
+          "numberOfEmbeddings": 0,
+          "numberOfEmbeddedDocuments": 0,
          "fieldDistribution": {
            "age": 1,
            "description": 1,
@@ -215,6 +217,8 @@ async fn check_the_index_scheduler(server: &Server) {
        "kefir": {
          "numberOfDocuments": 1,
          "isIndexing": false,
+          "numberOfEmbeddings": 0,
+          "numberOfEmbeddedDocuments": 0,
          "fieldDistribution": {
            "age": 1,
            "description": 1,
@@ -228,10 +232,12 @@ async fn check_the_index_scheduler(server: &Server) {
    "###);
    let index = server.index("kefir");
    let (stats, _) = index.stats().await;
-    snapshot!(stats, @r#"
+    snapshot!(stats, @r###"
    {
      "numberOfDocuments": 1,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "age": 1,
        "description": 1,
@@ -240,7 +246,7 @@ async fn check_the_index_scheduler(server: &Server) {
        "surname": 1
      }
    }
-    "#);
+    "###);

    // Delete all the tasks of a specific batch
    let (task, _) = server.delete_tasks("batchUids=10").await;
--- a/crates/meilitool/src/main.rs
+++ b/crates/meilitool/src/main.rs
@@ -1,22 +1,26 @@
 use std::fs::{read_dir, read_to_string, remove_file, File};
-use std::io::BufWriter;
+use std::io::{BufWriter, Write as _};
 use std::path::PathBuf;
 use std::time::Instant;

 use anyhow::{bail, Context};
-use clap::{Parser, Subcommand};
+use clap::{Parser, Subcommand, ValueEnum};
 use dump::{DumpWriter, IndexMetadata};
 use file_store::FileStore;
 use meilisearch_auth::AuthController;
-use meilisearch_types::heed::types::{SerdeJson, Str};
+use meilisearch_types::batches::Batch;
+use meilisearch_types::heed::types::{Bytes, SerdeJson, Str};
 use meilisearch_types::heed::{
    CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
 };
+use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
+use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
 use meilisearch_types::milli::{obkv_to_json, BEU32};
 use meilisearch_types::tasks::{Status, Task};
 use meilisearch_types::versioning::{get_version, parse_version};
 use meilisearch_types::Index;
+use serde_json::Value::Object;
 use time::macros::format_description;
 use time::OffsetDateTime;
 use upgrade::OfflineUpgrade;
@@ -68,6 +72,24 @@ enum Command {
        skip_enqueued_tasks: bool,
    },

+    /// Exports the documents of an index in NDJSON format from a Meilisearch index to stdout.
+    ///
+    /// This command can be executed on a running Meilisearch database. However, please note that
+    /// it will maintain a read-only transaction for the duration of the extraction process.
+    ExportDocuments {
+        /// The index name to export the documents from.
+        #[arg(long)]
+        index_name: String,
+
+        /// Do not export vectors with the documents.
+        #[arg(long)]
+        ignore_vectors: bool,
+
+        /// The number of documents to skip.
+        #[arg(long)]
+        offset: Option<usize>,
+    },
+
    /// Attempts to upgrade from one major version to the next without a dump.
    ///
    /// Make sure to run this commmand when Meilisearch is not running!
@@ -102,6 +124,25 @@ enum Command {
    /// the compaction operation can start. Once the compaction is done, the big index is replaced
    /// by the compacted one and the mutable transaction is released.
    CompactIndex { index_name: String },
+
+    /// Uses the hair dryer the dedicate pages hot in cache
+    ///
+    /// To make the index faster we must make sure it is hot in the DB cache that's the cure of
+    /// memory-mapping but also it's strengh. This command is designed to make a spcific part of
+    /// the index hot in cache.
+    HairDryer {
+        #[arg(long, value_delimiter = ',')]
+        index_name: Vec<String>,
+
+        #[arg(long, value_delimiter = ',')]
+        index_part: Vec<IndexPart>,
+    },
+}
+
+#[derive(Clone, ValueEnum)]
+enum IndexPart {
+    /// Will make the arroy index hot.
+    Arroy,
 }

 fn main() -> anyhow::Result<()> {
@@ -114,11 +155,17 @@ fn main() -> anyhow::Result<()> {
        Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
            export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version)
        }
+        Command::ExportDocuments { index_name, ignore_vectors, offset } => {
+            export_documents(db_path, index_name, ignore_vectors, offset)
+        }
        Command::OfflineUpgrade { target_version } => {
            let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
            OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade()
        }
        Command::CompactIndex { index_name } => compact_index(db_path, &index_name),
+        Command::HairDryer { index_name, index_part } => {
+            hair_dryer(db_path, &index_name, &index_part)
+        }
    }
 }

@@ -255,70 +302,86 @@ fn export_a_dump(

    eprintln!("Successfully dumped {count} keys!");

+    eprintln!("Dumping the queue");
    let rtxn = env.read_txn()?;
    let all_tasks: Database<BEU32, SerdeJson<Task>> =
        try_opening_database(&env, &rtxn, "all-tasks")?;
+    let all_batches: Database<BEU32, SerdeJson<Batch>> =
+        try_opening_database(&env, &rtxn, "all-batches")?;
    let index_mapping: Database<Str, UuidCodec> =
        try_opening_database(&env, &rtxn, "index-mapping")?;

-    if skip_enqueued_tasks {
-        eprintln!("Skip dumping the enqueued tasks...");
-    } else {
-        let mut dump_tasks = dump.create_tasks_queue()?;
-        let mut count = 0;
-        for ret in all_tasks.iter(&rtxn)? {
-            let (_, t) = ret?;
-            let status = t.status;
-            let content_file = t.content_uuid();
+    eprintln!("Dumping the tasks");
+    let mut dump_tasks = dump.create_tasks_queue()?;
+    let mut count_tasks = 0;
+    let mut count_enqueued_tasks = 0;
+    for ret in all_tasks.iter(&rtxn)? {
+        let (_, t) = ret?;
+        let status = t.status;
+        let content_file = t.content_uuid();

-            let mut dump_content_file = dump_tasks.push_task(&t.into())?;
+        if status == Status::Enqueued && skip_enqueued_tasks {
+            continue;
+        }

-            // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
-            if let Some(content_file_uuid) = content_file {
-                if status == Status::Enqueued {
-                    let content_file = file_store.get_update(content_file_uuid)?;
+        let mut dump_content_file = dump_tasks.push_task(&t.into())?;

-                    if (detected_version.0, detected_version.1, detected_version.2) < (1, 12, 0) {
-                        eprintln!("Dumping the enqueued tasks reading them in obkv format...");
-                        let reader =
-                            DocumentsBatchReader::from_reader(content_file).with_context(|| {
-                                format!("While reading content file {:?}", content_file_uuid)
-                            })?;
-                        let (mut cursor, documents_batch_index) =
-                            reader.into_cursor_and_fields_index();
-                        while let Some(doc) = cursor.next_document().with_context(|| {
-                            format!("While iterating on content file {:?}", content_file_uuid)
-                        })? {
-                            dump_content_file
-                                .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
-                        }
-                    } else {
-                        eprintln!(
-                            "Dumping the enqueued tasks reading them in JSON stream format..."
-                        );
-                        for document in
-                            serde_json::de::Deserializer::from_reader(content_file).into_iter()
-                        {
-                            let document = document.with_context(|| {
-                                format!("While reading content file {:?}", content_file_uuid)
-                            })?;
-                            dump_content_file.push_document(&document)?;
-                        }
+        // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
+        if let Some(content_file_uuid) = content_file {
+            if status == Status::Enqueued {
+                let content_file = file_store.get_update(content_file_uuid)?;
+
+                if (detected_version.0, detected_version.1, detected_version.2) < (1, 12, 0) {
+                    eprintln!("Dumping the enqueued tasks reading them in obkv format...");
+                    let reader =
+                        DocumentsBatchReader::from_reader(content_file).with_context(|| {
+                            format!("While reading content file {:?}", content_file_uuid)
+                        })?;
+                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
+                    while let Some(doc) = cursor.next_document().with_context(|| {
+                        format!("While iterating on content file {:?}", content_file_uuid)
+                    })? {
+                        dump_content_file
+                            .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
+                    }
+                } else {
+                    eprintln!("Dumping the enqueued tasks reading them in JSON stream format...");
+                    for document in
+                        serde_json::de::Deserializer::from_reader(content_file).into_iter()
+                    {
+                        let document = document.with_context(|| {
+                            format!("While reading content file {:?}", content_file_uuid)
+                        })?;
+                        dump_content_file.push_document(&document)?;
                    }
-
-                    dump_content_file.flush()?;
-                    count += 1;
                }
+
+                dump_content_file.flush()?;
+                count_enqueued_tasks += 1;
            }
        }
-        dump_tasks.flush()?;
-
-        eprintln!("Successfully dumped {count} enqueued tasks!");
+        count_tasks += 1;
    }
+    dump_tasks.flush()?;
+    eprintln!(
+        "Successfully dumped {count_tasks} tasks including {count_enqueued_tasks} enqueued tasks!"
+    );

+    // 4. dump the batches
+    eprintln!("Dumping the batches");
+    let mut dump_batches = dump.create_batches_queue()?;
+    let mut count = 0;
+
+    for ret in all_batches.iter(&rtxn)? {
+        let (_, b) = ret?;
+        dump_batches.push_batch(&b)?;
+        count += 1;
+    }
+    dump_batches.flush()?;
+    eprintln!("Successfully dumped {count} batches!");
+
+    // 5. Dump the indexes
    eprintln!("Dumping the indexes...");
-
-    // 4. Dump the indexes
    let mut count = 0;
    for result in index_mapping.iter(&rtxn)? {
        let (uid, uuid) = result?;
@@ -339,14 +402,14 @@ fn export_a_dump(
        let fields_ids_map = index.fields_ids_map(&rtxn)?;
        let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

-        // 4.1. Dump the documents
+        // 5.1. Dump the documents
        for ret in index.all_documents(&rtxn)? {
            let (_id, doc) = ret?;
            let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
            index_dumper.push_document(&document)?;
        }

-        // 4.2. Dump the settings
+        // 5.2. Dump the settings
        let settings = meilisearch_types::settings::settings(
            &index,
            &rtxn,
@@ -443,3 +506,170 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {

    bail!("Target index {index_name} not found!")
 }
+
+fn export_documents(
+    db_path: PathBuf,
+    index_name: String,
+    ignore_vectors: bool,
+    offset: Option<usize>,
+) -> anyhow::Result<()> {
+    let index_scheduler_path = db_path.join("tasks");
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    let rtxn = env.read_txn()?;
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &rtxn, "index-mapping")?;
+
+    for result in index_mapping.iter(&rtxn)? {
+        let (uid, uuid) = result?;
+        if uid == index_name {
+            let index_path = db_path.join("indexes").join(uuid.to_string());
+            let index =
+                Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
+                    format!("While trying to open the index at path {:?}", index_path.display())
+                })?;
+
+            let rtxn = index.read_txn()?;
+            let fields_ids_map = index.fields_ids_map(&rtxn)?;
+            let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
+            let embedding_configs = index.embedding_configs(&rtxn)?;
+
+            if let Some(offset) = offset {
+                eprintln!("Skipping {offset} documents");
+            }
+
+            let mut stdout = BufWriter::new(std::io::stdout());
+            let all_documents = index.documents_ids(&rtxn)?.into_iter().skip(offset.unwrap_or(0));
+            for (i, ret) in index.iter_documents(&rtxn, all_documents)?.enumerate() {
+                let (id, doc) = ret?;
+                let mut document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+
+                if i % 10_000 == 0 {
+                    eprintln!("Starting the {}th document", i + offset.unwrap_or(0));
+                }
+
+                if !ignore_vectors {
+                    'inject_vectors: {
+                        let embeddings = index.embeddings(&rtxn, id)?;
+
+                        if embeddings.is_empty() {
+                            break 'inject_vectors;
+                        }
+
+                        let vectors = document
+                            .entry(RESERVED_VECTORS_FIELD_NAME)
+                            .or_insert(Object(Default::default()));
+
+                        let Object(vectors) = vectors else {
+                            return Err(meilisearch_types::milli::Error::UserError(
+                                meilisearch_types::milli::UserError::InvalidVectorsMapType {
+                                    document_id: {
+                                        if let Ok(Some(Ok(index))) = index
+                                            .external_id_of(&rtxn, std::iter::once(id))
+                                            .map(|it| it.into_iter().next())
+                                        {
+                                            index
+                                        } else {
+                                            format!("internal docid={id}")
+                                        }
+                                    },
+                                    value: vectors.clone(),
+                                },
+                            )
+                            .into());
+                        };
+
+                        for (embedder_name, embeddings) in embeddings {
+                            let user_provided = embedding_configs
+                                .iter()
+                                .find(|conf| conf.name == embedder_name)
+                                .is_some_and(|conf| conf.user_provided.contains(id));
+
+                            let embeddings = ExplicitVectors {
+                                embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
+                                    embeddings,
+                                )),
+                                regenerate: !user_provided,
+                            };
+                            vectors
+                                .insert(embedder_name, serde_json::to_value(embeddings).unwrap());
+                        }
+                    }
+                }
+
+                serde_json::to_writer(&mut stdout, &document)?;
+            }
+
+            stdout.flush()?;
+        } else {
+            eprintln!("Found index {uid} but it's not the right index...");
+        }
+    }
+
+    Ok(())
+}
+
+fn hair_dryer(
+    db_path: PathBuf,
+    index_names: &[String],
+    index_parts: &[IndexPart],
+) -> anyhow::Result<()> {
+    let index_scheduler_path = db_path.join("tasks");
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    eprintln!("Trying to get a read transaction on the index scheduler...");
+
+    let rtxn = env.read_txn()?;
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &rtxn, "index-mapping")?;
+
+    for result in index_mapping.iter(&rtxn)? {
+        let (uid, uuid) = result?;
+        if index_names.iter().any(|i| i == uid) {
+            let index_path = db_path.join("indexes").join(uuid.to_string());
+            let index =
+                Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
+                    format!("While trying to open the index at path {:?}", index_path.display())
+                })?;
+
+            eprintln!("Trying to get a read transaction on the {uid} index...");
+
+            let rtxn = index.read_txn()?;
+            for part in index_parts {
+                match part {
+                    IndexPart::Arroy => {
+                        let mut count = 0;
+                        let total = index.vector_arroy.len(&rtxn)?;
+                        eprintln!("Hair drying arroy for {uid}...");
+                        for (i, result) in index
+                            .vector_arroy
+                            .remap_types::<Bytes, Bytes>()
+                            .iter(&rtxn)?
+                            .enumerate()
+                        {
+                            let (key, value) = result?;
+
+                            // All of this just to avoid compiler optimizations 🤞
+                            // We must read all the bytes to make the pages hot in cache.
+                            // <https://doc.rust-lang.org/std/hint/fn.black_box.html>
+                            count += std::hint::black_box(key.iter().fold(0, |acc, _| acc + 1));
+                            count += std::hint::black_box(value.iter().fold(0, |acc, _| acc + 1));
+
+                            if i % 10_000 == 0 {
+                                let perc = (i as f64) / (total as f64) * 100.0;
+                                eprintln!("Visited {i}/{total} ({perc:.2}%) keys")
+                            }
+                        }
+                        eprintln!("Done hair drying a total of at least {count} bytes.");
+                    }
+                }
+            }
+        } else {
+            eprintln!("Found index {uid} but it's not the right index...");
+        }
+    }
+
+    Ok(())
+}
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -1,120 +1,121 @@
 [package]
-name = "milli"
 edition = "2021"
+name = "milli"
 publish = false

-version.workspace = true
 authors.workspace = true
 description.workspace = true
 homepage.workspace = true
 readme.workspace = true
+version.workspace = true
 # edition.workspace = true
 license.workspace = true

 [dependencies]
 big_s = "1.0.2"
-bimap = { version = "0.6.3", features = ["serde"] }
+bimap = {version = "0.6.3", features = ["serde"]}
 bincode = "1.3.3"
 bstr = "1.11.3"
-bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] }
+bytemuck = {version = "1.21.0", features = ["extern_crate_alloc"]}
 byteorder = "1.5.0"
-charabia = { version = "0.9.2", default-features = false }
+charabia = {version = "0.9.2", default-features = false}
 concat-arrays = "0.1.2"
 convert_case = "0.6.0"
 crossbeam-channel = "0.5.14"
 deserr = "0.6.3"
-either = { version = "1.13.0", features = ["serde"] }
-flatten-serde-json = { path = "../flatten-serde-json" }
+either = {version = "1.13.0", features = ["serde"]}
+flatten-serde-json = {path = "../flatten-serde-json"}
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
-heed = { version = "0.20.5", default-features = false, features = [
-    "serde-json",
-    "serde-bincode",
-    "read-txn-no-tls",
-] }
-indexmap = { version = "2.7.0", features = ["serde"] }
-json-depth-checker = { path = "../json-depth-checker" }
-levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
+grenad = {version = "0.5.0", default-features = false, features = ["rayon", "tempfile"]}
+heed = {version = "0.20.5", default-features = false, features = [
+  "serde-json",
+  "serde-bincode",
+  "read-txn-no-tls",
+]}
+indexmap = {version = "2.7.0", features = ["serde"]}
+json-depth-checker = {path = "../json-depth-checker"}
+levenshtein_automata = {version = "0.2.1", features = ["fst_automaton"]}
 memchr = "2.7.4"
 memmap2 = "0.9.5"
 obkv = "0.3.0"
 once_cell = "1.20.2"
 ordered-float = "4.6.0"
 rayon = "1.10.0"
-roaring = { version = "0.10.10", features = ["serde"] }
-rstar = { version = "0.12.2", features = ["serde"] }
-serde = { version = "1.0.217", features = ["derive"] }
-serde_json = { version = "1.0.135", features = ["preserve_order", "raw_value"] }
+roaring = {version = "0.10.10", features = ["serde"]}
+rstar = {version = "0.12.2", features = ["serde"]}
+serde = {version = "1.0.217", features = ["derive"]}
+serde_json = {version = "1.0.135", features = ["preserve_order", "raw_value"]}
 slice-group-by = "0.3.1"
-smallstr = { version = "0.3.0", features = ["serde"] }
+smallstr = {version = "0.3.0", features = ["serde"]}
 smallvec = "1.13.2"
 smartstring = "1.0.1"
 tempfile = "3.15.0"
 thiserror = "2.0.9"
-time = { version = "0.3.37", features = [
-    "serde-well-known",
-    "formatting",
-    "parsing",
-    "macros",
-] }
-uuid = { version = "1.11.0", features = ["v4"] }
+time = {version = "0.3.37", features = [
+  "serde-well-known",
+  "formatting",
+  "parsing",
+  "macros",
+]}
+uuid = {version = "1.11.0", features = ["v4"]}

-filter-parser = { path = "../filter-parser" }
+filter-parser = {path = "../filter-parser"}
+scoped_thread_pool = {path = "/home/dureuill/dev/scoped_thread_pool"}

 # documents words self-join
 itertools = "0.14.0"

-csv = "1.3.1"
-candle-core = { version = "0.8.2" }
-candle-transformers = { version = "0.8.2" }
-candle-nn = { version = "0.8.2" }
-tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
-    "onig",
-] }
-hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
-    "online",
-] }
-tiktoken-rs = "0.6.0"
-liquid = "0.26.9"
-rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [
-    "serde",
-    "no_module",
-    "no_custom_syntax",
-    "no_time",
-    "sync",
-] }
+allocator-api2 = "0.2.21"
 arroy = "0.5.0"
-rand = "0.8.5"
-tracing = "0.1.41"
-ureq = { version = "2.12.1", features = ["json"] }
-url = "2.5.4"
-rayon-par-bridge = "0.1.0"
-hashbrown = "0.15.2"
+bbqueue = {git = "https://github.com/meilisearch/bbqueue"}
 bumpalo = "3.16.0"
 bumparaw-collections = "0.1.4"
-thread_local = "1.1.8"
-allocator-api2 = "0.2.21"
-rustc-hash = "2.1.0"
-uell = "0.1.0"
+candle-core = {version = "0.8.2"}
+candle-nn = {version = "0.8.2"}
+candle-transformers = {version = "0.8.2"}
+csv = "1.3.1"
 enum-iterator = "2.1.0"
-bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
-flume = { version = "0.11.1", default-features = false }
-utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
+flume = {version = "0.11.1", default-features = false}
+hashbrown = "0.15.2"
+hf-hub = {git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
+  "online",
+]}
+liquid = "0.26.9"
+rand = "0.8.5"
+rayon-par-bridge = "0.1.0"
+rhai = {git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [
+  "serde",
+  "no_module",
+  "no_custom_syntax",
+  "no_time",
+  "sync",
+]}
+rustc-hash = "2.1.0"
+thread_local = "1.1.8"
+tiktoken-rs = "0.6.0"
+tokenizers = {git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
+  "onig",
+]}
+tracing = "0.1.41"
+uell = "0.1.0"
+ureq = {version = "2.12.1", features = ["json"]}
+url = "2.5.4"
+utoipa = {version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"]}

 [dev-dependencies]
-mimalloc = { version = "0.1.43", default-features = false }
+mimalloc = {version = "0.1.43", default-features = false}
 # fixed version due to format breakages in v1.40
 insta = "=1.39.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
-meili-snap = { path = "../meili-snap" }
-rand = { version = "0.8.5", features = ["small_rng"] }
+meili-snap = {path = "../meili-snap"}
+rand = {version = "0.8.5", features = ["small_rng"]}

 [features]
 all-tokenizations = [
-    "charabia/default",
+  "charabia/default",
 ]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@@ -515,3 +515,68 @@ fn conditionally_lookup_for_error_message() {
        assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
    }
 }
+
+impl Error {
+    pub fn from_scoped_thread_pool_error(
+        thread_pool: &scoped_thread_pool::ThreadPool<Self>,
+        thread_id: usize,
+        error: scoped_thread_pool::Error<Self>,
+    ) -> Self {
+        match error {
+            scoped_thread_pool::Error::Err(error) => error,
+            scoped_thread_pool::Error::Panic(payload)
+            | scoped_thread_pool::Error::ThreadExited(Some(payload)) => {
+                let msg = match payload.downcast_ref::<&'static str>() {
+                    Some(s) => *s,
+                    None => match payload.downcast_ref::<String>() {
+                        Some(s) => &s[..],
+                        None => "Box<dyn Any>",
+                    },
+                };
+                tracing::error!(
+                    thread_name = thread_pool.thread_name(thread_id),
+                    "Thread panicked with {msg}"
+                );
+                Error::InternalError(InternalError::PanicInThreadPool(PanicCatched))
+            }
+            scoped_thread_pool::Error::ThreadExited(None) => {
+                Error::InternalError(InternalError::PanicInThreadPool(PanicCatched))
+            }
+        }
+    }
+
+    pub fn from_scoped_thread_pool_errors(
+        thread_pool: &scoped_thread_pool::ThreadPool<Self>,
+        value: scoped_thread_pool::Errors<Error>,
+    ) -> Self {
+        // iterate all errors, keeping the "max" one
+        // such that AbortedIndexing < regular error < panic
+        let mut max = None;
+        for (thread_id, error) in value.0 {
+            max = match (max, error) {
+                (None, error) => Some((thread_id, error)),
+                (max @ Some((_, scoped_thread_pool::Error::Panic(_))), _) => max,
+                (_, new @ scoped_thread_pool::Error::Panic(_)) => Some((thread_id, new)),
+                (max @ Some((_, scoped_thread_pool::Error::ThreadExited(Some(_)))), _) => max,
+                (_, new @ scoped_thread_pool::Error::ThreadExited(Some(_))) => {
+                    Some((thread_id, new))
+                }
+                (max @ Some((_, scoped_thread_pool::Error::ThreadExited(None))), _) => max,
+                (_, new @ scoped_thread_pool::Error::ThreadExited(None)) => Some((thread_id, new)),
+                (
+                    Some((
+                        _,
+                        scoped_thread_pool::Error::Err(Error::InternalError(
+                            InternalError::AbortedIndexation,
+                        )),
+                    )),
+                    new,
+                ) => Some((thread_id, new)),
+                (max @ Some((_, scoped_thread_pool::Error::Err(_))), _) => max,
+            };
+        }
+        // Errors never have an empty list
+        let (thread_id, error) = max.unwrap();
+        Self::from_scoped_thread_pool_error(thread_pool, thread_id, error)
+    }
+}
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -22,7 +22,7 @@ use crate::heed_codec::version::VersionCodec;
 use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
-use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
+use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
 use crate::{
    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@@ -1731,6 +1731,18 @@ impl Index {
        let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
        Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
    }
+
+    pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
+        let mut stats = ArroyStats::default();
+        let embedding_configs = self.embedding_configs(rtxn)?;
+        for config in embedding_configs {
+            let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
+            let reader =
+                ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
+            reader.aggregate_stats(rtxn, &mut stats)?;
+        }
+        Ok(stats)
+    }
 }

 #[derive(Debug, Deserialize, Serialize)]
@@ -1776,6 +1788,7 @@ pub(crate) mod tests {
    use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
    use crate::progress::Progress;
    use crate::update::new::indexer;
+    use crate::update::new::indexer::document_changes::CHUNK_SIZE;
    use crate::update::settings::InnerIndexSettings;
    use crate::update::{
        self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, Settings,
@@ -1825,7 +1838,7 @@ pub(crate) mod tests {
        ) -> Result<(), crate::error::Error> {
            let local_pool;
            let indexer_config = &self.indexer_config;
-            let pool = match &indexer_config.thread_pool {
+            let pool = match &indexer_config.rayon_thread_pool {
                Some(pool) => pool,
                None => {
                    local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
@@ -1833,6 +1846,11 @@ pub(crate) mod tests {
                }
            };

+            let thread_pool = match &indexer_config.thread_pool {
+                Some(thread_pool) => thread_pool,
+                None => &scoped_thread_pool::ThreadPool::with_available_parallelism("index".into()),
+            };
+
            let rtxn = self.inner.read_txn()?;
            let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?;
            let mut new_fields_ids_map = db_fields_ids_map.clone();
@@ -1852,29 +1870,28 @@ pub(crate) mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                thread_pool,
+                CHUNK_SIZE,
            )?;

            if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
                return Err(error.into());
            }

-            pool.install(|| {
-                indexer::index(
-                    wtxn,
-                    &self.inner,
-                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
-                    indexer_config.grenad_parameters(),
-                    &db_fields_ids_map,
-                    new_fields_ids_map,
-                    primary_key,
-                    &document_changes,
-                    embedders,
-                    &|| false,
-                    &Progress::default(),
-                )
-            })
-            .unwrap()?;
-
+            indexer::index(
+                wtxn,
+                &self.inner,
+                thread_pool,
+                &pool,
+                indexer_config.grenad_parameters(),
+                &db_fields_ids_map,
+                new_fields_ids_map,
+                primary_key,
+                &document_changes,
+                embedders,
+                &|| false,
+                &Progress::default(),
+            )?;
            Ok(())
        }

@@ -1913,7 +1930,7 @@ pub(crate) mod tests {
        ) -> Result<(), crate::error::Error> {
            let local_pool;
            let indexer_config = &self.indexer_config;
-            let pool = match &indexer_config.thread_pool {
+            let pool = match &indexer_config.rayon_thread_pool {
                Some(pool) => pool,
                None => {
                    local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
@@ -1921,6 +1938,11 @@ pub(crate) mod tests {
                }
            };

+            let thread_pool = match &indexer_config.thread_pool {
+                Some(thread_pool) => thread_pool,
+                None => &scoped_thread_pool::ThreadPool::with_available_parallelism("index".into()),
+            };
+
            let rtxn = self.inner.read_txn()?;
            let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?;
            let mut new_fields_ids_map = db_fields_ids_map.clone();
@@ -1943,28 +1965,28 @@ pub(crate) mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                thread_pool,
+                CHUNK_SIZE,
            )?;

            if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
                return Err(error.into());
            }

-            pool.install(|| {
-                indexer::index(
-                    wtxn,
-                    &self.inner,
-                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
-                    indexer_config.grenad_parameters(),
-                    &db_fields_ids_map,
-                    new_fields_ids_map,
-                    primary_key,
-                    &document_changes,
-                    embedders,
-                    &|| false,
-                    &Progress::default(),
-                )
-            })
-            .unwrap()?;
+            indexer::index(
+                wtxn,
+                &self.inner,
+                thread_pool,
+                &pool,
+                indexer_config.grenad_parameters(),
+                &db_fields_ids_map,
+                new_fields_ids_map,
+                primary_key,
+                &document_changes,
+                embedders,
+                &|| false,
+                &Progress::default(),
+            )?;

            Ok(())
        }
@@ -1993,7 +2015,7 @@ pub(crate) mod tests {

        let local_pool;
        let indexer_config = &index.indexer_config;
-        let pool = match &indexer_config.thread_pool {
+        let pool = match &indexer_config.rayon_thread_pool {
            Some(pool) => pool,
            None => {
                local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
@@ -2001,6 +2023,11 @@ pub(crate) mod tests {
            }
        };

+        let thread_pool = match &indexer_config.thread_pool {
+            Some(thread_pool) => thread_pool,
+            None => &scoped_thread_pool::ThreadPool::with_available_parallelism("index".into()),
+        };
+
        let rtxn = index.inner.read_txn().unwrap();
        let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap();
        let mut new_fields_ids_map = db_fields_ids_map.clone();
@@ -2024,6 +2051,8 @@ pub(crate) mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

@@ -2034,7 +2063,8 @@ pub(crate) mod tests {
                indexer::index(
                    &mut wtxn,
                    &index.inner,
-                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
+                    thread_pool,
+                    &pool,
                    indexer_config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@@ -7,6 +7,7 @@ use maplit::{btreemap, hashset};

 use crate::progress::Progress;
 use crate::update::new::indexer;
+use crate::update::new::indexer::document_changes::CHUNK_SIZE;
 use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use crate::vector::EmbeddingConfigs;
 use crate::{db_snap, Criterion, Index};
@@ -65,6 +66,9 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    // index documents
    indexer.add_documents(&payload).unwrap();

+    let thread_pool =
+        scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
+
    let indexer_alloc = Bump::new();
    let (document_changes, operation_stats, primary_key) = indexer
        .into_changes(
@@ -75,6 +79,8 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
            &mut new_fields_ids_map,
            &|| false,
            Progress::default(),
+            &thread_pool,
+            CHUNK_SIZE,
        )
        .unwrap();

@@ -85,6 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    indexer::index(
        &mut wtxn,
        &index,
+        &thread_pool,
        &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
--- a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -28,7 +28,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
 ) -> Result<grenad::Reader<BufReader<File>>> {
    let max_positions_per_attributes = max_positions_per_attributes
        .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();
    let force_reindexing = settings_diff.reindex_searchable();

    // initialize destination values.
--- a/crates/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@@ -23,7 +23,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    _settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();

    let mut facet_number_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
--- a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@@ -55,7 +55,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
    localized_field_ids: &LocalizedFieldIds,
    facet_search: bool,
 ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
@@ -145,7 +145,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
--- a/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -44,7 +44,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<ExtractedFacetValues> {
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();

    let mut fid_docid_facet_numbers_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
--- a/crates/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@@ -26,7 +26,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    _settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();

    let mut fid_word_count_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
--- a/crates/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_word_docids.rs
@@ -35,7 +35,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    grenad::Reader<BufReader<File>>,
    grenad::Reader<BufReader<File>>,
 )> {
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();

    let mut word_fid_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
--- a/crates/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@@ -39,7 +39,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
    let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
    let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;

-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();
    let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
        .map(|_| {
            create_sorter(
--- a/crates/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@@ -24,7 +24,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    _settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    let max_memory = indexer.max_memory_by_thread();
+    let max_memory = indexer.max_memory_by_rayon_thread();

    let mut word_position_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
--- a/crates/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/crates/milli/src/update/index_documents/helpers/grenad_helpers.rs
@@ -119,7 +119,11 @@ impl GrenadParameters {
    ///
    /// This should be called inside of a rayon thread pool,
    /// otherwise, it will take the global number of threads.
-    pub fn max_memory_by_thread(&self) -> Option<usize> {
+    pub fn max_memory_by_thread(&self, thread_count: usize) -> Option<usize> {
+        self.max_memory.map(|max_memory| (max_memory / thread_count))
+    }
+
+    pub fn max_memory_by_rayon_thread(&self) -> Option<usize> {
        self.max_memory.map(|max_memory| (max_memory / rayon::current_num_threads()))
    }
 }
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -227,7 +227,7 @@ where
            crate::vector::error::PossibleEmbeddingMistakes::new(&field_distribution);

        let backup_pool;
-        let pool = match self.indexer_config.thread_pool {
+        let pool = match self.indexer_config.rayon_thread_pool {
            Some(ref pool) => pool,
            None => {
                // We initialize a backup pool with the default
@@ -770,6 +770,7 @@ mod tests {
    use crate::progress::Progress;
    use crate::search::TermsMatchingStrategy;
    use crate::update::new::indexer;
+    use crate::update::new::indexer::document_changes::CHUNK_SIZE;
    use crate::update::Setting;
    use crate::{db_snap, Filter, Search, UserError};

@@ -1967,6 +1968,8 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string()),
+                CHUNK_SIZE,
            )
            .unwrap();

@@ -2115,6 +2118,9 @@ mod tests {
        let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
        indexer.add_documents(&documents).unwrap();
        indexer.delete_documents(&["2"]);
+
+        let thread_pool =
+            scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
                &indexer_alloc,
@@ -2124,12 +2130,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2177,6 +2186,9 @@ mod tests {

        let indexer_alloc = Bump::new();
        let embedders = EmbeddingConfigs::default();
+        let thread_pool =
+            scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
+
        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
                &indexer_alloc,
@@ -2186,12 +2198,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2229,6 +2244,8 @@ mod tests {
        let embedders = EmbeddingConfigs::default();
        let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments);
        indexer.add_documents(&documents).unwrap();
+        let thread_pool =
+            scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());

        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
@@ -2239,12 +2256,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2291,12 +2311,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2327,6 +2350,8 @@ mod tests {

        let indexer_alloc = Bump::new();
        let embedders = EmbeddingConfigs::default();
+        let thread_pool =
+            scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
        let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments);
        indexer.delete_documents(&["1", "2"]);

@@ -2345,12 +2370,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2382,6 +2410,8 @@ mod tests {

        let indexer_alloc = Bump::new();
        let embedders = EmbeddingConfigs::default();
+        let thread_pool =
+            scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
        let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments);

        indexer.delete_documents(&["1", "2", "1", "2"]);
@@ -2404,12 +2434,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2440,6 +2473,8 @@ mod tests {

        let indexer_alloc = Bump::new();
        let embedders = EmbeddingConfigs::default();
+        let thread_pool =
+            scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
        let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments);

        let documents = documents!([
@@ -2456,12 +2491,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2508,12 +2546,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2683,6 +2724,8 @@ mod tests {

        let indexer_alloc = Bump::new();
        let embedders = EmbeddingConfigs::default();
+        let thread_pool =
+            scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
        let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);

        // OP
@@ -2702,12 +2745,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2761,12 +2807,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
@@ -2817,12 +2866,15 @@ mod tests {
                &mut new_fields_ids_map,
                &|| false,
                Progress::default(),
+                &thread_pool,
+                CHUNK_SIZE,
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &thread_pool,
            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
--- a/crates/milli/src/update/indexer_config.rs
+++ b/crates/milli/src/update/indexer_config.rs
@@ -11,7 +11,8 @@ pub struct IndexerConfig {
    pub max_memory: Option<usize>,
    pub chunk_compression_type: CompressionType,
    pub chunk_compression_level: Option<u32>,
-    pub thread_pool: Option<ThreadPoolNoAbort>,
+    pub rayon_thread_pool: Option<ThreadPoolNoAbort>,
+    pub thread_pool: Option<scoped_thread_pool::ThreadPool<crate::Error>>,
    pub max_positions_per_attributes: Option<u32>,
    pub skip_index_budget: bool,
 }
@@ -36,6 +37,7 @@ impl Default for IndexerConfig {
            max_memory: None,
            chunk_compression_type: CompressionType::None,
            chunk_compression_level: None,
+            rayon_thread_pool: None,
            thread_pool: None,
            max_positions_per_attributes: None,
            skip_index_budget: false,
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -51,12 +51,13 @@ const MAX_FRAME_HEADER_SIZE: usize = 9;
 /// when new stuff is available in any BBQueue buffer but we send
 /// a message in this queue only if it is empty to avoid filling
 /// the channel *and* the BBQueue.
-pub fn extractor_writer_bbqueue(
-    bbbuffers: &mut Vec<BBBuffer>,
+pub fn extractor_writer_bbqueue<'a>(
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
+    bbbuffers: &'a mut Vec<BBBuffer>,
    total_bbbuffer_capacity: usize,
    channel_capacity: usize,
-) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) {
-    let current_num_threads = rayon::current_num_threads();
+) -> (ExtractorBbqueueSender<'a>, WriterBbqueueReceiver<'a>) {
+    let current_num_threads = thread_pool.thread_count();
    let bbbuffer_capacity = total_bbbuffer_capacity.checked_div(current_num_threads).unwrap();
    bbbuffers.resize_with(current_num_threads, || BBBuffer::new(bbbuffer_capacity));

@@ -66,12 +67,18 @@ pub fn extractor_writer_bbqueue(
    let max_grant = capacity.saturating_div(2).checked_sub(MAX_FRAME_HEADER_SIZE).unwrap();

    let producers = ThreadLocal::with_capacity(bbbuffers.len());
-    let consumers = rayon::broadcast(|bi| {
-        let bbqueue = &bbbuffers[bi.index()];
-        let (producer, consumer) = bbqueue.try_split_framed().unwrap();
-        producers.get_or(|| FullySend(RefCell::new(producer)));
-        consumer
-    });
+    let consumers = ThreadLocal::with_capacity(bbbuffers.len());
+    thread_pool
+        .broadcast(|thread_index| {
+            let bbqueue: &BBBuffer = &bbbuffers[thread_index];
+            let (producer, consumer) = bbqueue.try_split_framed().unwrap();
+            producers.get_or(|| FullySend(RefCell::new(producer)));
+            consumers.get_or(|| FullySend(consumer));
+            Ok(())
+        })
+        .map_err(|errors| crate::Error::from_scoped_thread_pool_errors(thread_pool, errors))
+        .unwrap();
+    let consumers: Vec<_> = consumers.into_iter().map(|consumer| consumer.0).collect();

    let sent_messages_attempts = Arc::new(AtomicUsize::new(0));
    let blocking_sent_messages_attempts = Arc::new(AtomicUsize::new(0));
@@ -963,28 +970,70 @@ impl GeoSender<'_, '_> {
            .map_err(|_| SendError(()))
    }

-    pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> crate::Result<()> {
-        let database = Database::Main;
-        let value_length = bitmap.serialized_size();
-        let key = GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes();
-        let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
-            InternalError::StorePut {
-                database_name: database.database_name(),
-                key: key.into(),
-                value_length,
-                error: MdbError::BadValSize.into(),
-            }
-        })?;
+    pub fn set_geo_faceted(
+        &self,
+        bitmap: &RoaringBitmap,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
+    ) -> crate::Result<()> {
+        let writer = GeoWriter { bitmap, channel: *self };
+        thread_pool
+            .execute(&writer)
+            .map_err(|errors| crate::Error::from_scoped_thread_pool_errors(thread_pool, errors))
+    }
+}

-        self.0.write_key_value_with(
+struct GeoWriter<'a, 'b> {
+    bitmap: &'a RoaringBitmap,
+    channel: GeoSender<'a, 'b>,
+}
+impl<'a, 'b> scoped_thread_pool::Workload<'static> for GeoWriter<'a, 'b> {
+    type Context = ();
+
+    type Error = crate::Error;
+
+    fn context(
+        &self,
+        _thread_count: usize,
+        _thread_index: usize,
+    ) -> Result<Self::Context, Self::Error> {
+        Ok(())
+    }
+
+    fn run_task(
+        &self,
+        _thread_count: usize,
+        thread_index: usize,
+        task_index: usize,
+        _context: &mut Self::Context,
+    ) -> Option<Result<(), Self::Error>> {
+        if thread_index != 0 || task_index != 0 {
+            return None;
+        }
+        let database = Database::Main;
+        let value_length = self.bitmap.serialized_size();
+        let key = GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes();
+        let key_length = match key.len().try_into().ok().and_then(NonZeroU16::new) {
+            Some(key_length) => key_length,
+            None => {
+                return Some(Err(InternalError::StorePut {
+                    database_name: database.database_name(),
+                    key: key.into(),
+                    value_length,
+                    error: MdbError::BadValSize.into(),
+                }
+                .into()))
+            }
+        };
+
+        Some(self.channel.0.write_key_value_with(
            database,
            key_length,
            value_length,
            |key_buffer, value_buffer| {
                key_buffer.copy_from_slice(key);
-                bitmap.serialize_into(value_buffer)?;
+                self.bitmap.serialize_into(value_buffer)?;
                Ok(())
            },
-        )
+        ))
    }
 }
--- a/crates/milli/src/update/new/document_change.rs
+++ b/crates/milli/src/update/new/document_change.rs
@@ -144,7 +144,7 @@ impl<'doc> Update<'doc> {
        )?)
    }

-    pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> {
+    pub fn only_changed_fields(&self) -> DocumentFromVersions<'_, 'doc> {
        DocumentFromVersions::new(&self.new)
    }

@@ -182,7 +182,7 @@ impl<'doc> Update<'doc> {
        let mut cached_current = None;
        let mut updated_selected_field_count = 0;

-        for entry in self.updated().iter_top_level_fields() {
+        for entry in self.only_changed_fields().iter_top_level_fields() {
            let (key, updated_value) = entry?;

            if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
@@ -241,7 +241,7 @@ impl<'doc> Update<'doc> {
        Ok(has_deleted_fields)
    }

-    pub fn updated_vectors(
+    pub fn only_changed_vectors(
        &self,
        doc_alloc: &'doc Bump,
        embedders: &'doc EmbeddingConfigs,
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@@ -38,7 +38,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
        Ok(RefCell::new(BalancedCaches::new_in(
            self.buckets,
-            self.grenad_parameters.max_memory_by_thread(),
+            self.grenad_parameters.max_memory_by_thread(self.buckets),
            extractor_alloc,
        )))
    }
@@ -388,6 +388,7 @@ fn truncate_str(s: &str) -> &str {
 impl FacetedDocidsExtractor {
    #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
        document_changes: &DC,
        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
@@ -412,10 +413,11 @@ impl FacetedDocidsExtractor {
            let extractor = FacetedExtractorData {
                attributes_to_extract: &attributes_to_extract,
                grenad_parameters: indexing_context.grenad_parameters,
-                buckets: rayon::current_num_threads(),
+                buckets: thread_pool.thread_count(),
                sender,
            };
            extract(
+                thread_pool,
                document_changes,
                &extractor,
                indexing_context,
--- a/crates/milli/src/update/new/extract/geo/mod.rs
+++ b/crates/milli/src/update/new/extract/geo/mod.rs
@@ -21,6 +21,7 @@ use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result};

 pub struct GeoExtractor {
    grenad_parameters: GrenadParameters,
+    thread_count: usize,
 }

 impl GeoExtractor {
@@ -28,11 +29,12 @@ impl GeoExtractor {
        rtxn: &RoTxn,
        index: &Index,
        grenad_parameters: GrenadParameters,
+        thread_count: usize,
    ) -> Result<Option<Self>> {
        let is_sortable = index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
        let is_filterable = index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
        if is_sortable || is_filterable {
-            Ok(Some(GeoExtractor { grenad_parameters }))
+            Ok(Some(GeoExtractor { grenad_parameters, thread_count }))
        } else {
            Ok(None)
        }
@@ -157,7 +159,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
    ) -> Result<()> {
        let rtxn = &context.rtxn;
        let index = context.index;
-        let max_memory = self.grenad_parameters.max_memory_by_thread();
+        let max_memory = self.grenad_parameters.max_memory_by_thread(self.thread_count);
        let db_fields_ids_map = context.db_fields_ids_map;
        let mut data_ref = context.data.borrow_mut_or_yield();

@@ -199,7 +201,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
                        .transpose()?;

                    let updated_geo = update
-                        .updated()
+                        .merged(rtxn, index, db_fields_ids_map)?
                        .geo_field()?
                        .map(|geo| extract_geo_coordinates(external_id, geo))
                        .transpose()?;
--- a/crates/milli/src/update/new/extract/mod.rs
+++ b/crates/milli/src/update/new/extract/mod.rs
@@ -5,7 +5,6 @@ mod geo;
 mod searchable;
 mod vectors;

-use bumpalo::Bump;
 pub use cache::{
    merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
 };
@@ -15,22 +14,6 @@ pub use geo::*;
 pub use searchable::*;
 pub use vectors::EmbeddingExtractor;

-use super::indexer::document_changes::{DocumentChanges, IndexingContext};
-use super::steps::IndexingStep;
-use super::thread_local::{FullySend, ThreadLocal};
-use crate::Result;
-
-pub trait DocidsExtractor {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
-        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
-        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: IndexingStep,
-    ) -> Result<Vec<BalancedCaches<'extractor>>>
-    where
-        MSP: Fn() -> bool + Sync;
-}
-
 /// TODO move in permissive json pointer
 pub mod perm_json_p {
    use serde_json::{Map, Value};
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -218,7 +218,7 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
        Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
            self.buckets,
-            self.grenad_parameters.max_memory_by_thread(),
+            self.grenad_parameters.max_memory_by_thread(self.buckets),
            extractor_alloc,
        ))))
    }
@@ -240,6 +240,7 @@ pub struct WordDocidsExtractors;

 impl WordDocidsExtractors {
    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
        document_changes: &DC,
        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
@@ -288,10 +289,11 @@ impl WordDocidsExtractors {
            let extractor = WordDocidsExtractorData {
                tokenizer: &document_tokenizer,
                grenad_parameters: indexing_context.grenad_parameters,
-                buckets: rayon::current_num_threads(),
+                buckets: thread_pool.thread_count(),
            };

            extract(
+                thread_pool,
                document_changes,
                &extractor,
                indexing_context,
--- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@@ -2,29 +2,62 @@ use std::cell::RefCell;
 use std::collections::VecDeque;
 use std::rc::Rc;

+use bumpalo::Bump;
 use heed::RoTxn;

-use super::tokenize_document::DocumentTokenizer;
-use super::SearchableExtractor;
+use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use crate::proximity::{index_proximity, MAX_DISTANCE};
 use crate::update::new::document::Document;
 use crate::update::new::extract::cache::BalancedCaches;
-use crate::update::new::indexer::document_changes::DocumentChangeContext;
+use crate::update::new::indexer::document_changes::{
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
+};
 use crate::update::new::ref_cell_ext::RefCellExt as _;
+use crate::update::new::steps::IndexingStep;
+use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
-use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
+use crate::update::GrenadParameters;
+use crate::{FieldId, GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};

-pub struct WordPairProximityDocidsExtractor;
+impl<'a, 'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractor<'a> {
+    type Data = RefCell<BalancedCaches<'extractor>>;

-impl SearchableExtractor for WordPairProximityDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
+    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
+        Ok(RefCell::new(BalancedCaches::new_in(
+            self.buckets,
+            self.grenad_parameters.max_memory_by_thread(self.buckets),
+            extractor_alloc,
+        )))
+    }
+
+    fn process<'doc>(
+        &self,
+        changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
+        context: &DocumentChangeContext<Self::Data>,
+    ) -> Result<()> {
+        for change in changes {
+            let change = change?;
+            self.extract_document_change(context, change)?;
+        }
+        Ok(())
+    }
+}
+
+pub struct WordPairProximityDocidsExtractor<'a> {
+    tokenizer: &'a DocumentTokenizer<'a>,
+    grenad_parameters: &'a GrenadParameters,
+    buckets: usize,
+}
+
+impl<'a> WordPairProximityDocidsExtractor<'a> {
+    fn attributes_to_extract<'b>(
+        rtxn: &'b RoTxn,
+        index: &'b Index,
+    ) -> Result<Option<Vec<&'b str>>> {
        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
    }

-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
+    fn attributes_to_skip<'b>(_rtxn: &'b RoTxn, _index: &'b Index) -> Result<Vec<&'b str>> {
        Ok(Vec::new())
    }

@@ -32,10 +65,11 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
    // and to store the docids of the documents that have a number of words in a given field
    // equal to or under than MAX_COUNTED_WORDS.
    fn extract_document_change(
+        &self,
        context: &DocumentChangeContext<RefCell<BalancedCaches>>,
-        document_tokenizer: &DocumentTokenizer,
        document_change: DocumentChange,
    ) -> Result<()> {
+        let document_tokenizer = self.tokenizer;
        let doc_alloc = &context.doc_alloc;

        let index = context.index;
@@ -129,6 +163,70 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
        }
        Ok(())
    }
+
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
+        document_changes: &DC,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
+        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
+        step: IndexingStep,
+    ) -> Result<Vec<BalancedCaches<'extractor>>>
+    where
+        MSP: Fn() -> bool + Sync,
+    {
+        let rtxn = indexing_context.index.read_txn()?;
+        let stop_words = indexing_context.index.stop_words(&rtxn)?;
+        let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
+        let allowed_separators: Option<Vec<_>> =
+            allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let dictionary = indexing_context.index.dictionary(&rtxn)?;
+        let dictionary: Option<Vec<_>> =
+            dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let mut builder = tokenizer_builder(
+            stop_words.as_ref(),
+            allowed_separators.as_deref(),
+            dictionary.as_deref(),
+        );
+        let tokenizer = builder.build();
+
+        let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
+        let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
+        let localized_attributes_rules =
+            indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
+
+        let document_tokenizer = DocumentTokenizer {
+            tokenizer: &tokenizer,
+            attribute_to_extract: attributes_to_extract.as_deref(),
+            attribute_to_skip: attributes_to_skip.as_slice(),
+            localized_attributes_rules: &localized_attributes_rules,
+            max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
+        };
+
+        let extractor_data: WordPairProximityDocidsExtractor = WordPairProximityDocidsExtractor {
+            tokenizer: &document_tokenizer,
+            grenad_parameters: indexing_context.grenad_parameters,
+            buckets: thread_pool.thread_count(),
+        };
+
+        let datastore = ThreadLocal::new();
+
+        {
+            let span =
+                tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
+            let _entered = span.enter();
+            extract(
+                thread_pool,
+                document_changes,
+                &extractor_data,
+                indexing_context,
+                extractor_allocs,
+                &datastore,
+                step,
+            )?;
+        }
+
+        Ok(datastore.into_iter().map(RefCell::into_inner).collect())
+    }
 }

 fn build_key<'a>(
--- a/crates/milli/src/update/new/extract/searchable/mod.rs
+++ b/crates/milli/src/update/new/extract/searchable/mod.rs
@@ -1,146 +1,5 @@
 mod extract_word_docids;
 mod extract_word_pair_proximity_docids;
 mod tokenize_document;
-
-use std::cell::RefCell;
-use std::marker::PhantomData;
-
-use bumpalo::Bump;
 pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
 pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
-use heed::RoTxn;
-use tokenize_document::{tokenizer_builder, DocumentTokenizer};
-
-use super::cache::BalancedCaches;
-use super::DocidsExtractor;
-use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
-};
-use crate::update::new::steps::IndexingStep;
-use crate::update::new::thread_local::{FullySend, ThreadLocal};
-use crate::update::new::DocumentChange;
-use crate::update::GrenadParameters;
-use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE};
-
-pub struct SearchableExtractorData<'a, EX: SearchableExtractor> {
-    tokenizer: &'a DocumentTokenizer<'a>,
-    grenad_parameters: &'a GrenadParameters,
-    buckets: usize,
-    _ex: PhantomData<EX>,
-}
-
-impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
-    for SearchableExtractorData<'a, EX>
-{
-    type Data = RefCell<BalancedCaches<'extractor>>;
-
-    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
-        Ok(RefCell::new(BalancedCaches::new_in(
-            self.buckets,
-            self.grenad_parameters.max_memory_by_thread(),
-            extractor_alloc,
-        )))
-    }
-
-    fn process<'doc>(
-        &self,
-        changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
-        context: &DocumentChangeContext<Self::Data>,
-    ) -> Result<()> {
-        for change in changes {
-            let change = change?;
-            EX::extract_document_change(context, self.tokenizer, change)?;
-        }
-        Ok(())
-    }
-}
-
-pub trait SearchableExtractor: Sized + Sync {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
-        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
-        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: IndexingStep,
-    ) -> Result<Vec<BalancedCaches<'extractor>>>
-    where
-        MSP: Fn() -> bool + Sync,
-    {
-        let rtxn = indexing_context.index.read_txn()?;
-        let stop_words = indexing_context.index.stop_words(&rtxn)?;
-        let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
-        let allowed_separators: Option<Vec<_>> =
-            allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let dictionary = indexing_context.index.dictionary(&rtxn)?;
-        let dictionary: Option<Vec<_>> =
-            dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let mut builder = tokenizer_builder(
-            stop_words.as_ref(),
-            allowed_separators.as_deref(),
-            dictionary.as_deref(),
-        );
-        let tokenizer = builder.build();
-
-        let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
-        let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
-        let localized_attributes_rules =
-            indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
-
-        let document_tokenizer = DocumentTokenizer {
-            tokenizer: &tokenizer,
-            attribute_to_extract: attributes_to_extract.as_deref(),
-            attribute_to_skip: attributes_to_skip.as_slice(),
-            localized_attributes_rules: &localized_attributes_rules,
-            max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
-        };
-
-        let extractor_data: SearchableExtractorData<Self> = SearchableExtractorData {
-            tokenizer: &document_tokenizer,
-            grenad_parameters: indexing_context.grenad_parameters,
-            buckets: rayon::current_num_threads(),
-            _ex: PhantomData,
-        };
-
-        let datastore = ThreadLocal::new();
-
-        {
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
-            let _entered = span.enter();
-            extract(
-                document_changes,
-                &extractor_data,
-                indexing_context,
-                extractor_allocs,
-                &datastore,
-                step,
-            )?;
-        }
-
-        Ok(datastore.into_iter().map(RefCell::into_inner).collect())
-    }
-
-    fn extract_document_change(
-        context: &DocumentChangeContext<RefCell<BalancedCaches>>,
-        document_tokenizer: &DocumentTokenizer,
-        document_change: DocumentChange,
-    ) -> Result<()>;
-
-    fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index)
-        -> Result<Option<Vec<&'a str>>>;
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
-}
-
-impl<T: SearchableExtractor> DocidsExtractor for T {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
-        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
-        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: IndexingStep,
-    ) -> Result<Vec<BalancedCaches<'extractor>>>
-    where
-        MSP: Fn() -> bool + Sync,
-    {
-        Self::run_extraction(document_changes, indexing_context, extractor_allocs, step)
-    }
-}
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@@ -99,7 +99,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                        context.db_fields_ids_map,
                        &context.doc_alloc,
                    )?;
-                    let new_vectors = update.updated_vectors(&context.doc_alloc, self.embedders)?;
+                    let new_vectors =
+                        update.only_changed_vectors(&context.doc_alloc, self.embedders)?;

                    if let Some(new_vectors) = &new_vectors {
                        unused_vectors_distribution.append(new_vectors)?;
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@@ -1,15 +1,14 @@
 use std::cell::{Cell, RefCell};
-use std::sync::atomic::Ordering;
+use std::marker::PhantomData;
+use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, RwLock};

 use bumpalo::Bump;
 use heed::RoTxn;
-use rayon::iter::IndexedParallelIterator;

 use super::super::document_change::DocumentChange;
 use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
 use crate::progress::{AtomicDocumentStep, Progress};
-use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::update::GrenadParameters;
@@ -114,7 +113,7 @@ pub trait DocumentChanges<'pl // lifetime of the underlying payload
 >: Sync {
    type Item: Send;

-    fn iter(&self, chunk_size: usize) -> impl IndexedParallelIterator<Item = impl AsRef<[Self::Item]>>;
+    fn items(&self, thread_index: usize, task_index: usize) -> Option<&[Self::Item]>;

    fn len(&self) -> usize;

@@ -186,9 +185,10 @@ where
    }
 }

-const CHUNK_SIZE: usize = 100;
+pub const CHUNK_SIZE: usize = 100;

-pub fn extract<
+struct Extract<
+    'shared,    // covariant lifetime for shared borrows
    'pl,        // covariant lifetime of the underlying payload
    'extractor, // invariant lifetime of extractor_alloc
    'fid,       // invariant lifetime of fields ids map
@@ -196,31 +196,121 @@ pub fn extract<
    'data,      // invariant on EX::Data lifetime of datastore
    'index,     // covariant lifetime of the index
    EX,
+    DC,
+    MSP,
+> where
    DC: DocumentChanges<'pl>,
+    EX: Extractor<'extractor>,
+    MSP: Fn() -> bool + Sync,
+{
+    document_changes: &'shared DC,
+    extractor: &'shared EX,
+    indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
+    extractor_allocs: &'extractor ThreadLocal<FullySend<Bump>>,
+    datastore: &'data ThreadLocal<EX::Data>,
+    step: Arc<AtomicU32>,
+    _marker: PhantomData<&'pl ()>,
+}
+
+impl<
+        'doc,
+        'extractor: 'doc, // invariant lifetime of extractor_alloc
+        'shared,
+        'pl,                     // covariant lifetime of the underlying payload
+        'fid: 'doc,              // invariant lifetime of fields ids map
+        'indexer: 'doc, // covariant lifetime of objects that are borrowed during the entire indexing
+        'data: 'doc,    // invariant on EX::Data lifetime of datastore
+        'index: 'doc + 'indexer, // covariant lifetime of the index
+        EX,
+        DC: DocumentChanges<'pl>,
+        MSP,
+    > scoped_thread_pool::Workload<'doc>
+    for Extract<'shared, 'pl, 'extractor, 'fid, 'indexer, 'data, 'index, EX, DC, MSP>
+where
+    EX: Extractor<'extractor>,
+    MSP: Fn() -> bool + Sync,
+{
+    type Context = DocumentChangeContext<'doc, 'extractor, 'fid, 'indexer, EX::Data>;
+
+    type Error = crate::Error;
+
+    fn context(
+        &self,
+        _thread_count: usize,
+        _thread_index: usize,
+    ) -> std::result::Result<
+        DocumentChangeContext<'doc, 'extractor, 'fid, 'indexer, EX::Data>,
+        Self::Error,
+    > {
+        let extractor = self.extractor;
+        DocumentChangeContext::new(
+            self.indexing_context.index,
+            self.indexing_context.db_fields_ids_map,
+            self.indexing_context.new_fields_ids_map,
+            self.extractor_allocs,
+            self.indexing_context.doc_allocs,
+            self.datastore,
+            self.indexing_context.fields_ids_map_store,
+            move |index_alloc| extractor.init_data(index_alloc),
+        )
+    }
+
+    fn run_task(
+        &self,
+        _thread_count: usize,
+        thread_index: usize,
+        task_index: usize,
+        context: &mut Self::Context,
+    ) -> Option<std::result::Result<(), Self::Error>> {
+        let items = self.document_changes.items(thread_index, task_index)?;
+        if (self.indexing_context.must_stop_processing)() {
+            return Some(Err(InternalError::AbortedIndexation.into()));
+        }
+
+        // Clean up and reuse the document-specific allocator
+        context.doc_alloc.reset();
+
+        let changes = items.iter().filter_map(|item| {
+            self.document_changes.item_to_document_change(context, item).transpose()
+        });
+
+        let res = self.extractor.process(changes, context);
+        self.step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
+
+        // send back the doc_alloc in the pool
+        context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
+
+        Some(res)
+    }
+}
+
+pub fn extract<
+    'pool,      // invariant lifetime of the thread pool
+    'pl,        // covariant lifetime of the underlying payload
+    'extractor, // invariant lifetime of extractor_alloc
+    'fid,       // invariant lifetime of fields ids map
+    'indexer,   // covariant lifetime of objects that are borrowed during the entire indexing
+    'data,      // invariant on EX::Data lifetime of datastore
+    'index,     // covariant lifetime of the index
+    EX,
+    DC,
    MSP,
 >(
+    thread_pool: &'pool scoped_thread_pool::ThreadPool<crate::Error>,
    document_changes: &DC,
    extractor: &EX,
-    IndexingContext {
-        index,
-        db_fields_ids_map,
-        new_fields_ids_map,
-        doc_allocs,
-        fields_ids_map_store,
-        must_stop_processing,
-        progress,
-        grenad_parameters: _,
-    }: IndexingContext<'fid, 'indexer, 'index, MSP>,
+    indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
    extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
    datastore: &'data ThreadLocal<EX::Data>,
    step: IndexingStep,
 ) -> Result<()>
 where
+    DC: DocumentChanges<'pl>,
    EX: Extractor<'extractor>,
    MSP: Fn() -> bool + Sync,
 {
    tracing::trace!("We are resetting the extractor allocators");
-    progress.update_progress(step);
+    indexing_context.progress.update_progress(step);
    // Clean up and reuse the extractor allocs
    for extractor_alloc in extractor_allocs.iter_mut() {
        tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
@@ -229,45 +319,22 @@ where

    let total_documents = document_changes.len() as u32;
    let (step, progress_step) = AtomicDocumentStep::new(total_documents);
-    progress.update_progress(progress_step);
+    indexing_context.progress.update_progress(progress_step);

-    let pi = document_changes.iter(CHUNK_SIZE);
-    pi.try_arc_for_each_try_init(
-        || {
-            DocumentChangeContext::new(
-                index,
-                db_fields_ids_map,
-                new_fields_ids_map,
-                extractor_allocs,
-                doc_allocs,
-                datastore,
-                fields_ids_map_store,
-                move |index_alloc| extractor.init_data(index_alloc),
-            )
-        },
-        |context, items| {
-            if (must_stop_processing)() {
-                return Err(Arc::new(InternalError::AbortedIndexation.into()));
-            }
+    let extract = Extract {
+        document_changes,
+        extractor,
+        indexing_context,
+        extractor_allocs,
+        datastore,
+        step,
+        _marker: PhantomData,
+    };
+    thread_pool
+        .execute(&extract)
+        .map_err(|errors| crate::Error::from_scoped_thread_pool_errors(thread_pool, errors))?;

-            // Clean up and reuse the document-specific allocator
-            context.doc_alloc.reset();
-
-            let items = items.as_ref();
-            let changes = items.iter().filter_map(|item| {
-                document_changes.item_to_document_change(context, item).transpose()
-            });
-
-            let res = extractor.process(changes, context).map_err(Arc::new);
-            step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
-
-            // send back the doc_alloc in the pool
-            context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
-
-            res
-        },
-    )?;
-    step.store(total_documents, Ordering::Relaxed);
+    extract.step.store(total_documents, Ordering::Relaxed);

    Ok(())
 }
--- a/crates/milli/src/update/new/indexer/document_deletion.rs
+++ b/crates/milli/src/update/new/indexer/document_deletion.rs
@@ -1,8 +1,7 @@
 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
-use rayon::iter::IndexedParallelIterator;
-use rayon::slice::ParallelSlice as _;
 use roaring::RoaringBitmap;
+use scoped_thread_pool::PartitionChunks;

 use super::document_changes::{DocumentChangeContext, DocumentChanges};
 use crate::documents::PrimaryKey;
@@ -28,31 +27,28 @@ impl DocumentDeletion {
        self,
        indexer_alloc: &'indexer Bump,
        primary_key: PrimaryKey<'indexer>,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
+        chunk_size: usize,
    ) -> DocumentDeletionChanges<'indexer> {
        let to_delete: bumpalo::collections::Vec<_> =
            self.to_delete.into_iter().collect_in(indexer_alloc);

        let to_delete = to_delete.into_bump_slice();

+        let to_delete = PartitionChunks::new(to_delete, chunk_size, thread_pool.thread_count());
+
        DocumentDeletionChanges { to_delete, primary_key }
    }
 }

 pub struct DocumentDeletionChanges<'indexer> {
-    to_delete: &'indexer [DocumentId],
+    to_delete: scoped_thread_pool::PartitionChunks<'indexer, DocumentId>,
    primary_key: PrimaryKey<'indexer>,
 }

 impl<'pl> DocumentChanges<'pl> for DocumentDeletionChanges<'pl> {
    type Item = DocumentId;

-    fn iter(
-        &self,
-        chunk_size: usize,
-    ) -> impl IndexedParallelIterator<Item = impl AsRef<[Self::Item]>> {
-        self.to_delete.par_chunks(chunk_size)
-    }
-
    fn item_to_document_change<
        'doc, // lifetime of a single `process` call
        T: MostlySend,
@@ -78,7 +74,11 @@ impl<'pl> DocumentChanges<'pl> for DocumentDeletionChanges<'pl> {
    }

    fn len(&self) -> usize {
-        self.to_delete.len()
+        self.to_delete.slice().len()
+    }
+
+    fn items(&self, thread_index: usize, task_index: usize) -> Option<&[Self::Item]> {
+        self.to_delete.partition(thread_index, task_index)
    }
 }

@@ -86,6 +86,7 @@ impl<'pl> DocumentChanges<'pl> for DocumentDeletionChanges<'pl> {
 mod test {
    use std::cell::RefCell;
    use std::marker::PhantomData;
+    use std::num::NonZeroUsize;
    use std::sync::RwLock;

    use bumpalo::Bump;
@@ -94,7 +95,7 @@ mod test {
    use crate::index::tests::TempIndex;
    use crate::progress::Progress;
    use crate::update::new::indexer::document_changes::{
-        extract, DocumentChangeContext, Extractor, IndexingContext,
+        extract, DocumentChangeContext, Extractor, IndexingContext, CHUNK_SIZE,
    };
    use crate::update::new::indexer::DocumentDeletion;
    use crate::update::new::steps::IndexingStep;
@@ -135,6 +136,9 @@ mod test {
            }
        }

+        let thread_pool =
+            scoped_thread_pool::ThreadPool::new(NonZeroUsize::new(1).unwrap(), "test".into());
+
        let mut deletions = DocumentDeletion::new();
        deletions.delete_documents_by_docids(Vec::<u32>::new().into_iter().collect());
        let indexer = Bump::new();
@@ -155,8 +159,12 @@ mod test {

        let deletion_tracker = TrackDeletion(PhantomData);

-        let changes = deletions
-            .into_changes(&indexer, crate::documents::PrimaryKey::Flat { name: "id", field_id: 0 });
+        let changes = deletions.into_changes(
+            &indexer,
+            crate::documents::PrimaryKey::Flat { name: "id", field_id: 0 },
+            &thread_pool,
+            CHUNK_SIZE,
+        );

        let context = IndexingContext {
            index: &index,
@@ -173,6 +181,7 @@ mod test {
            let datastore = ThreadLocal::new();

            extract(
+                &thread_pool,
                &changes,
                &deletion_tracker,
                context,
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -6,8 +6,8 @@ use bumparaw_collections::RawMap;
 use hashbrown::hash_map::Entry;
 use heed::RoTxn;
 use memmap2::Mmap;
-use rayon::slice::ParallelSlice;
 use rustc_hash::FxBuildHasher;
+use scoped_thread_pool::PartitionChunks;
 use serde_json::value::RawValue;
 use serde_json::Deserializer;

@@ -57,6 +57,8 @@ impl<'pl> DocumentOperation<'pl> {
        new_fields_ids_map: &mut FieldsIdsMap,
        must_stop_processing: &MSP,
        progress: Progress,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
+        chunk_size: usize,
    ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
    where
        MSP: Fn() -> bool,
@@ -130,6 +132,8 @@ impl<'pl> DocumentOperation<'pl> {
        docids_version_offsets.sort_unstable_by_key(|(_, po)| method.sort_key(&po.operations));

        let docids_version_offsets = docids_version_offsets.into_bump_slice();
+        let docids_version_offsets =
+            PartitionChunks::new(docids_version_offsets, chunk_size, thread_pool.thread_count());
        Ok((DocumentOperationChanges { docids_version_offsets }, operations_stats, primary_key))
    }
 }
@@ -353,13 +357,6 @@ fn merge_version_offsets<'s, 'pl>(
 impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> {
    type Item = (&'pl str, PayloadOperations<'pl>);

-    fn iter(
-        &self,
-        chunk_size: usize,
-    ) -> impl rayon::prelude::IndexedParallelIterator<Item = impl AsRef<[Self::Item]>> {
-        self.docids_version_offsets.par_chunks(chunk_size)
-    }
-
    fn item_to_document_change<'doc, T: MostlySend + 'doc>(
        &'doc self,
        context: &'doc DocumentChangeContext<T>,
@@ -379,12 +376,16 @@ impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> {
    }

    fn len(&self) -> usize {
-        self.docids_version_offsets.len()
+        self.docids_version_offsets.slice().len()
+    }
+
+    fn items(&self, thread_index: usize, task_index: usize) -> Option<&[Self::Item]> {
+        self.docids_version_offsets.partition(thread_index, task_index)
    }
 }

 pub struct DocumentOperationChanges<'pl> {
-    docids_version_offsets: &'pl [(&'pl str, PayloadOperations<'pl>)],
+    docids_version_offsets: PartitionChunks<'pl, (&'pl str, PayloadOperations<'pl>)>,
 }

 pub enum Payload<'pl> {
--- a/crates/milli/src/update/new/indexer/extract.rs
+++ b/crates/milli/src/update/new/indexer/extract.rs
@@ -22,6 +22,7 @@ use crate::{Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};

 #[allow(clippy::too_many_arguments)]
 pub(super) fn extract_all<'pl, 'extractor, DC, MSP>(
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    document_changes: &DC,
    indexing_context: IndexingContext<MSP>,
    indexer_span: Span,
@@ -47,11 +48,12 @@ where
    // document but we need to create a function that collects and compresses documents.
    let document_sender = extractor_sender.documents();
    let document_extractor = DocumentsExtractor::new(document_sender, embedders);
-    let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+    let datastore = ThreadLocal::with_capacity(thread_pool.thread_count());
    {
        let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
        let _entered = span.enter();
        extract(
+            thread_pool,
            document_changes,
            &document_extractor,
            indexing_context,
@@ -84,6 +86,7 @@ where
            let _entered = span.enter();

            FacetedDocidsExtractor::run_extraction(
+                thread_pool,
                document_changes,
                indexing_context,
                extractor_allocs,
@@ -97,6 +100,7 @@ where
            let _entered = span.enter();

            facet_field_ids_delta = merge_and_send_facet_docids(
+                thread_pool,
                caches,
                FacetDatabases::new(index),
                index,
@@ -118,6 +122,7 @@ where
            let _entered = span.enter();

            WordDocidsExtractors::run_extraction(
+                thread_pool,
                document_changes,
                indexing_context,
                extractor_allocs,
@@ -129,6 +134,7 @@ where
            let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
            let _entered = span.enter();
            merge_and_send_docids(
+                thread_pool,
                word_docids,
                index.word_docids.remap_types(),
                index,
@@ -142,6 +148,7 @@ where
                tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
            let _entered = span.enter();
            merge_and_send_docids(
+                thread_pool,
                word_fid_docids,
                index.word_fid_docids.remap_types(),
                index,
@@ -155,6 +162,7 @@ where
                tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
            let _entered = span.enter();
            merge_and_send_docids(
+                thread_pool,
                exact_word_docids,
                index.exact_word_docids.remap_types(),
                index,
@@ -168,6 +176,7 @@ where
                tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
            let _entered = span.enter();
            merge_and_send_docids(
+                thread_pool,
                word_position_docids,
                index.word_position_docids.remap_types(),
                index,
@@ -181,6 +190,7 @@ where
                tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
            let _entered = span.enter();
            merge_and_send_docids(
+                thread_pool,
                fid_word_count_docids,
                index.field_id_word_count_docids.remap_types(),
                index,
@@ -198,7 +208,8 @@ where
            let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
            let _entered = span.enter();

-            <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
+            WordPairProximityDocidsExtractor::run_extraction(
+                thread_pool,
                document_changes,
                indexing_context,
                extractor_allocs,
@@ -211,6 +222,7 @@ where
            let _entered = span.enter();

            merge_and_send_docids(
+                thread_pool,
                caches,
                index.word_pair_proximity_docids.remap_types(),
                index,
@@ -232,12 +244,13 @@ where
            field_distribution,
            request_threads(),
        );
-        let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+        let mut datastore = ThreadLocal::with_capacity(thread_pool.thread_count());
        {
            let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
            let _entered = span.enter();

            extract(
+                thread_pool,
                document_changes,
                &extractor,
                indexing_context,
@@ -263,17 +276,23 @@ where
    }

    'geo: {
-        let Some(extractor) = GeoExtractor::new(&rtxn, index, *indexing_context.grenad_parameters)?
+        let Some(extractor) = GeoExtractor::new(
+            &rtxn,
+            index,
+            *indexing_context.grenad_parameters,
+            thread_pool.thread_count(),
+        )?
        else {
            break 'geo;
        };
-        let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+        let datastore = ThreadLocal::with_capacity(thread_pool.thread_count());

        {
            let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
            let _entered = span.enter();

            extract(
+                thread_pool,
                document_changes,
                &extractor,
                indexing_context,
@@ -289,6 +308,7 @@ where
            index,
            extractor_sender.geo(),
            &indexing_context.must_stop_processing,
+            thread_pool,
        )?;
    }
    indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -1,5 +1,5 @@
 use std::sync::atomic::AtomicBool;
-use std::sync::RwLock;
+use std::sync::{Once, RwLock};
 use std::thread::{self, Builder};

 use big_s::S;
@@ -21,7 +21,6 @@ use crate::progress::Progress;
 use crate::update::GrenadParameters;
 use crate::vector::{ArroyWrapper, EmbeddingConfigs};
 use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
-use std::sync::Once;

 pub(crate) mod de;
 pub mod document_changes;
@@ -45,6 +44,7 @@ static LOG_MEMORY_METRICS_ONCE: Once = Once::new();
 pub fn index<'pl, 'indexer, 'index, DC, MSP>(
    wtxn: &mut RwTxn,
    index: &'index Index,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    pool: &ThreadPoolNoAbort,
    grenad_parameters: GrenadParameters,
    db_fields_ids_map: &'indexer FieldsIdsMap,
@@ -105,16 +105,15 @@ where
        );
    });

-    let (extractor_sender, writer_receiver) = pool
-        .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
-        .unwrap();
+    let (extractor_sender, writer_receiver) =
+        extractor_writer_bbqueue(thread_pool, &mut bbbuffers, total_bbbuffer_capacity, 1000);

    let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
    let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
    let new_fields_ids_map = RwLock::new(new_fields_ids_map);
-    let fields_ids_map_store = ThreadLocal::with_capacity(rayon::current_num_threads());
-    let mut extractor_allocs = ThreadLocal::with_capacity(rayon::current_num_threads());
-    let doc_allocs = ThreadLocal::with_capacity(rayon::current_num_threads());
+    let fields_ids_map_store = ThreadLocal::with_capacity(thread_pool.thread_count());
+    let mut extractor_allocs = ThreadLocal::with_capacity(thread_pool.thread_count());
+    let doc_allocs = ThreadLocal::with_capacity(thread_pool.thread_count());

    let indexing_context = IndexingContext {
        index,
@@ -140,21 +139,19 @@ where
        let document_ids = &mut document_ids;
        let extractor_handle =
            Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
-                pool.install(move || {
-                    extract::extract_all(
-                        document_changes,
-                        indexing_context,
-                        indexer_span,
-                        extractor_sender,
-                        embedders,
-                        &mut extractor_allocs,
-                        finished_extraction,
-                        field_distribution,
-                        index_embeddings,
-                        document_ids,
-                    )
-                })
-                .unwrap()
+                extract::extract_all(
+                    thread_pool,
+                    document_changes,
+                    indexing_context,
+                    indexer_span,
+                    extractor_sender,
+                    embedders,
+                    &mut extractor_allocs,
+                    finished_extraction,
+                    field_distribution,
+                    index_embeddings,
+                    document_ids,
+                )
            })?;

        let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
@@ -191,19 +188,23 @@ where

        indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);

-        build_vectors(
-            index,
-            wtxn,
-            index_embeddings,
-            &mut arroy_writers,
-            &indexing_context.must_stop_processing,
-        )?;
+        pool.install(|| {
+            build_vectors(
+                index,
+                wtxn,
+                index_embeddings,
+                &mut arroy_writers,
+                &indexing_context.must_stop_processing,
+            )
+        })
+        .unwrap()?;

        post_processing::post_process(
            indexing_context,
            wtxn,
            global_fields_ids_map,
            facet_field_ids_delta,
+            thread_pool,
        )?;

        indexing_context.progress.update_progress(IndexingStep::Finalizing);
--- a/crates/milli/src/update/new/indexer/partial_dump.rs
+++ b/crates/milli/src/update/new/indexer/partial_dump.rs
@@ -1,8 +1,8 @@
 use std::ops::DerefMut;

 use bumparaw_collections::RawMap;
-use rayon::iter::IndexedParallelIterator;
 use rustc_hash::FxBuildHasher;
+use scoped_thread_pool::ThreadPool;
 use serde_json::value::RawValue;

 use super::document_changes::{DocumentChangeContext, DocumentChanges};
@@ -14,45 +14,34 @@ use crate::update::new::thread_local::MostlySend;
 use crate::update::new::{DocumentChange, Insertion};
 use crate::{Error, InternalError, Result, UserError};

-pub struct PartialDump<I> {
-    iter: I,
-}
+pub struct PartialDump;

-impl<I> PartialDump<I> {
-    pub fn new_from_jsonlines(iter: I) -> Self {
-        PartialDump { iter }
+impl PartialDump {
+    pub fn new_from_jsonlines() -> Self {
+        PartialDump
    }

    pub fn into_changes<'index>(
        self,
        concurrent_available_ids: &'index ConcurrentAvailableIds,
        primary_key: &'index PrimaryKey,
-    ) -> PartialDumpChanges<'index, I> {
+        _thread_pool: &ThreadPool<crate::Error>,
+        _chunk_size: usize,
+    ) -> PartialDumpChanges<'index> {
        // Note for future self:
        //   - We recommend sending chunks of documents in this `PartialDumpIndexer` we therefore need to create a custom take_while_size method (that doesn't drop items).
-        PartialDumpChanges { iter: self.iter, concurrent_available_ids, primary_key }
+        PartialDumpChanges { concurrent_available_ids, primary_key }
    }
 }

-pub struct PartialDumpChanges<'doc, I> {
-    iter: I,
+pub struct PartialDumpChanges<'doc> {
    concurrent_available_ids: &'doc ConcurrentAvailableIds,
    primary_key: &'doc PrimaryKey<'doc>,
 }

-impl<'index, Iter> DocumentChanges<'index> for PartialDumpChanges<'index, Iter>
-where
-    Iter: IndexedParallelIterator<Item = Box<RawValue>> + Clone + Sync + 'index,
-{
+impl<'index> DocumentChanges<'index> for PartialDumpChanges<'index> {
    type Item = Box<RawValue>;

-    fn iter(
-        &self,
-        chunk_size: usize,
-    ) -> impl IndexedParallelIterator<Item = impl AsRef<[Self::Item]>> {
-        self.iter.clone().chunks(chunk_size)
-    }
-
    fn item_to_document_change<'doc, T: MostlySend + 'doc>(
        &'doc self,
        context: &'doc DocumentChangeContext<T>,
@@ -85,6 +74,10 @@ where
    }

    fn len(&self) -> usize {
-        self.iter.len()
+        unimplemented!()
+    }
+
+    fn items(&self, thread_index: usize, task_index: usize) -> Option<&[Self::Item]> {
+        unimplemented!()
    }
 }
--- a/crates/milli/src/update/new/indexer/post_processing.rs
+++ b/crates/milli/src/update/new/indexer/post_processing.rs
@@ -27,6 +27,7 @@ pub(super) fn post_process<MSP>(
    wtxn: &mut RwTxn<'_>,
    global_fields_ids_map: GlobalFieldsIdsMap<'_>,
    facet_field_ids_delta: FacetFieldIdsDelta,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
 ) -> Result<()>
 where
    MSP: Fn() -> bool + Sync,
@@ -39,7 +40,13 @@ where
    compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
    indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
    if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
-        compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?;
+        compute_prefix_database(
+            index,
+            wtxn,
+            prefix_delta,
+            indexing_context.grenad_parameters,
+            thread_pool,
+        )?;
    };
    Ok(())
 }
@@ -50,16 +57,38 @@ fn compute_prefix_database(
    wtxn: &mut RwTxn,
    prefix_delta: PrefixDelta,
    grenad_parameters: &GrenadParameters,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
 ) -> Result<()> {
    let PrefixDelta { modified, deleted } = prefix_delta;
    // Compute word prefix docids
-    compute_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
+    compute_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters, thread_pool)?;
    // Compute exact word prefix docids
-    compute_exact_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
+    compute_exact_word_prefix_docids(
+        wtxn,
+        index,
+        &modified,
+        &deleted,
+        grenad_parameters,
+        thread_pool,
+    )?;
    // Compute word prefix fid docids
-    compute_word_prefix_fid_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
+    compute_word_prefix_fid_docids(
+        wtxn,
+        index,
+        &modified,
+        &deleted,
+        grenad_parameters,
+        thread_pool,
+    )?;
    // Compute word prefix position docids
-    compute_word_prefix_position_docids(wtxn, index, &modified, &deleted, grenad_parameters)
+    compute_word_prefix_position_docids(
+        wtxn,
+        index,
+        &modified,
+        &deleted,
+        grenad_parameters,
+        thread_pool,
+    )
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing")]
--- a/crates/milli/src/update/new/indexer/update_by_function.rs
+++ b/crates/milli/src/update/new/indexer/update_by_function.rs
@@ -1,9 +1,10 @@
+use bumpalo::collections::CollectIn;
+use bumpalo::Bump;
 use bumparaw_collections::RawMap;
-use rayon::iter::IndexedParallelIterator;
-use rayon::slice::ParallelSlice as _;
 use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
 use roaring::RoaringBitmap;
 use rustc_hash::FxBuildHasher;
+use scoped_thread_pool::{PartitionChunks, ThreadPool};

 use super::document_changes::DocumentChangeContext;
 use super::DocumentChanges;
@@ -22,14 +23,12 @@ pub struct UpdateByFunction {
    code: String,
 }

-pub struct UpdateByFunctionChanges<'doc> {
-    primary_key: &'doc PrimaryKey<'doc>,
+pub struct UpdateByFunctionChanges<'index> {
+    primary_key: &'index PrimaryKey<'index>,
    engine: Engine,
    ast: AST,
    context: Option<Dynamic>,
-    // It is sad that the RoaringBitmap doesn't
-    // implement IndexedParallelIterator
-    documents: Vec<u32>,
+    documents: PartitionChunks<'index, u32>,
 }

 impl UpdateByFunction {
@@ -40,6 +39,9 @@ impl UpdateByFunction {
    pub fn into_changes<'index>(
        self,
        primary_key: &'index PrimaryKey,
+        allocator: &'index Bump,
+        thread_pool: &ThreadPool<crate::Error>,
+        chunk_size: usize,
    ) -> Result<UpdateByFunctionChanges<'index>> {
        let Self { documents, context, code } = self;

@@ -64,26 +66,19 @@ impl UpdateByFunction {
            None => None,
        };

-        Ok(UpdateByFunctionChanges {
-            primary_key,
-            engine,
-            ast,
-            context,
-            documents: documents.into_iter().collect(),
-        })
+        let documents: bumpalo::collections::Vec<'_, _> =
+            documents.into_iter().collect_in(allocator);
+        let documents = documents.into_bump_slice();
+
+        let documents = PartitionChunks::new(documents, chunk_size, thread_pool.thread_count());
+
+        Ok(UpdateByFunctionChanges { primary_key, engine, ast, context, documents })
    }
 }

 impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
    type Item = u32;

-    fn iter(
-        &self,
-        chunk_size: usize,
-    ) -> impl IndexedParallelIterator<Item = impl AsRef<[Self::Item]>> {
-        self.documents.as_slice().par_chunks(chunk_size)
-    }
-
    fn item_to_document_change<'doc, T: MostlySend + 'doc>(
        &self,
        context: &'doc DocumentChangeContext<T>,
@@ -185,7 +180,11 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
    }

    fn len(&self) -> usize {
-        self.documents.len()
+        self.documents.slice().len()
+    }
+
+    fn items(&self, thread_index: usize, task_index: usize) -> Option<&[Self::Item]> {
+        self.documents.partition(thread_index, task_index)
    }
 }

--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -1,10 +1,10 @@
 use std::cell::RefCell;
+use std::sync::Mutex;

 use hashbrown::HashMap;
 use heed::types::Bytes;
 use heed::{Database, RoTxn};
 use memmap2::Mmap;
-use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use roaring::RoaringBitmap;

 use super::channel::*;
@@ -22,6 +22,7 @@ pub fn merge_and_send_rtree<'extractor, MSP>(
    index: &Index,
    geo_sender: GeoSender<'_, '_>,
    must_stop_processing: &MSP,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
 ) -> Result<()>
 where
    MSP: Fn() -> bool + Sync,
@@ -57,13 +58,14 @@ where

    let rtree_mmap = unsafe { Mmap::map(&file)? };
    geo_sender.set_rtree(rtree_mmap).unwrap();
-    geo_sender.set_geo_faceted(&faceted)?;
+    geo_sender.set_geo_faceted(&faceted, thread_pool)?;

    Ok(())
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
 pub fn merge_and_send_docids<'extractor, MSP, D>(
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    mut caches: Vec<BalancedCaches<'extractor>>,
    database: Database<Bytes, Bytes>,
    index: &Index,
@@ -74,7 +76,10 @@ where
    MSP: Fn() -> bool + Sync,
    D: DatabaseType + Sync,
 {
-    transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
+    let frozen_caches = Mutex::new(transpose_and_freeze_caches(&mut caches)?);
+
+    match thread_pool.broadcast(|thread_index| {
+        let frozen = std::mem::take(frozen_caches.lock().unwrap().get_mut(thread_index).unwrap());
        let rtxn = index.read_txn()?;
        if must_stop_processing() {
            return Err(InternalError::AbortedIndexation.into());
@@ -92,12 +97,17 @@ where
                }
                Operation::Ignore => Ok(()),
            }
-        })
-    })
+        })?;
+        Ok(())
+    }) {
+        Ok(()) => Ok(()),
+        Err(errors) => Err(crate::Error::from_scoped_thread_pool_errors(thread_pool, errors)),
+    }
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
 pub fn merge_and_send_facet_docids<'extractor>(
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    mut caches: Vec<BalancedCaches<'extractor>>,
    database: FacetDatabases,
    index: &Index,
@@ -108,9 +118,15 @@ pub fn merge_and_send_facet_docids<'extractor>(
    let max_number_count = (index.facet_id_f64_docids.len(rtxn)? / 500) as usize;
    let max_string_count = max_string_count.clamp(1000, 100_000);
    let max_number_count = max_number_count.clamp(1000, 100_000);
-    transpose_and_freeze_caches(&mut caches)?
-        .into_par_iter()
-        .map(|frozen| {
+    let transposed_frozen_caches = Mutex::new(transpose_and_freeze_caches(&mut caches)?);
+    let output = Mutex::new(FacetFieldIdsDelta::new(max_string_count, max_number_count));
+    thread_pool
+        .broadcast(|thread_index| {
+            // TODO: we can probably spare the mutex here since it is guaranteed that each thread will access its own cell of the vec
+            let frozen = std::mem::take(
+                transposed_frozen_caches.lock().unwrap().get_mut(thread_index).unwrap(),
+            );
+
            let mut facet_field_ids_delta =
                FacetFieldIdsDelta::new(max_string_count, max_number_count);
            let rtxn = index.read_txn()?;
@@ -130,13 +146,18 @@ pub fn merge_and_send_facet_docids<'extractor>(
                    Operation::Ignore => Ok(()),
                }
            })?;
-
-            Ok(facet_field_ids_delta)
+            {
+                let mut common = output.lock().unwrap();
+                *common = std::mem::replace(
+                    &mut *common,
+                    FacetFieldIdsDelta::new(max_string_count, max_number_count),
+                )
+                .merge(facet_field_ids_delta);
+            }
+            Ok(())
        })
-        .reduce(
-            || Ok(FacetFieldIdsDelta::new(max_string_count, max_number_count)),
-            |lhs, rhs| Ok(lhs?.merge(rhs?)),
-        )
+        .map_err(|errors| crate::Error::from_scoped_thread_pool_errors(thread_pool, errors))?;
+    Ok(output.into_inner().unwrap())
 }

 pub struct FacetDatabases<'a> {
--- a/crates/milli/src/update/new/words_prefix_docids.rs
+++ b/crates/milli/src/update/new/words_prefix_docids.rs
@@ -26,11 +26,13 @@ impl WordPrefixDocids {
        database: Database<Bytes, CboRoaringBitmapCodec>,
        prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
        grenad_parameters: &GrenadParameters,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    ) -> WordPrefixDocids {
        WordPrefixDocids {
            database,
            prefix_database,
-            max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
+            max_memory_by_thread: grenad_parameters
+                .max_memory_by_thread(thread_pool.thread_count()),
        }
    }

@@ -39,9 +41,10 @@ impl WordPrefixDocids {
        wtxn: &mut heed::RwTxn,
        prefix_to_compute: &BTreeSet<Prefix>,
        prefix_to_delete: &BTreeSet<Prefix>,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    ) -> Result<()> {
        delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
-        self.recompute_modified_prefixes(wtxn, prefix_to_compute)
+        self.recompute_modified_prefixes(wtxn, prefix_to_compute, thread_pool)
    }

    #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -49,6 +52,7 @@ impl WordPrefixDocids {
        &self,
        wtxn: &mut RwTxn,
        prefixes: &BTreeSet<Prefix>,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    ) -> Result<()> {
        // We fetch the docids associated to the newly added word prefix fst only.
        // And collect the CboRoaringBitmaps pointers in an HashMap.
@@ -56,7 +60,7 @@ impl WordPrefixDocids {

        // We access this HashMap in parallel to compute the *union* of all
        // of them and *serialize* them into files. There is one file by CPU.
-        let local_entries = ThreadLocal::with_capacity(rayon::current_num_threads());
+        let local_entries = ThreadLocal::with_capacity(thread_pool.thread_count());
        prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| {
            let refcell = local_entries.get_or(|| {
                let file = BufWriter::new(spooled_tempfile(
@@ -162,11 +166,13 @@ impl WordPrefixIntegerDocids {
        database: Database<Bytes, CboRoaringBitmapCodec>,
        prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
        grenad_parameters: &GrenadParameters,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    ) -> WordPrefixIntegerDocids {
        WordPrefixIntegerDocids {
            database,
            prefix_database,
-            max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
+            max_memory_by_thread: grenad_parameters
+                .max_memory_by_thread(thread_pool.thread_count()),
        }
    }

@@ -175,9 +181,10 @@ impl WordPrefixIntegerDocids {
        wtxn: &mut heed::RwTxn,
        prefix_to_compute: &BTreeSet<Prefix>,
        prefix_to_delete: &BTreeSet<Prefix>,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    ) -> Result<()> {
        delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
-        self.recompute_modified_prefixes(wtxn, prefix_to_compute)
+        self.recompute_modified_prefixes(wtxn, prefix_to_compute, thread_pool)
    }

    #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -185,6 +192,7 @@ impl WordPrefixIntegerDocids {
        &self,
        wtxn: &mut RwTxn,
        prefixes: &BTreeSet<Prefix>,
+        thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
    ) -> Result<()> {
        // We fetch the docids associated to the newly added word prefix fst only.
        // And collect the CboRoaringBitmaps pointers in an HashMap.
@@ -192,7 +200,7 @@ impl WordPrefixIntegerDocids {

        // We access this HashMap in parallel to compute the *union* of all
        // of them and *serialize* them into files. There is one file by CPU.
-        let local_entries = ThreadLocal::with_capacity(rayon::current_num_threads());
+        let local_entries = ThreadLocal::with_capacity(thread_pool.thread_count());
        prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| {
            let refcell = local_entries.get_or(|| {
                let file = BufWriter::new(spooled_tempfile(
@@ -312,13 +320,15 @@ pub fn compute_word_prefix_docids(
    prefix_to_compute: &BTreeSet<Prefix>,
    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: &GrenadParameters,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
 ) -> Result<()> {
    WordPrefixDocids::new(
        index.word_docids.remap_key_type(),
        index.word_prefix_docids.remap_key_type(),
        grenad_parameters,
+        thread_pool,
    )
-    .execute(wtxn, prefix_to_compute, prefix_to_delete)
+    .execute(wtxn, prefix_to_compute, prefix_to_delete, thread_pool)
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -328,13 +338,15 @@ pub fn compute_exact_word_prefix_docids(
    prefix_to_compute: &BTreeSet<Prefix>,
    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: &GrenadParameters,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
 ) -> Result<()> {
    WordPrefixDocids::new(
        index.exact_word_docids.remap_key_type(),
        index.exact_word_prefix_docids.remap_key_type(),
        grenad_parameters,
+        thread_pool,
    )
-    .execute(wtxn, prefix_to_compute, prefix_to_delete)
+    .execute(wtxn, prefix_to_compute, prefix_to_delete, thread_pool)
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -344,13 +356,15 @@ pub fn compute_word_prefix_fid_docids(
    prefix_to_compute: &BTreeSet<Prefix>,
    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: &GrenadParameters,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
 ) -> Result<()> {
    WordPrefixIntegerDocids::new(
        index.word_fid_docids.remap_key_type(),
        index.word_prefix_fid_docids.remap_key_type(),
        grenad_parameters,
+        thread_pool,
    )
-    .execute(wtxn, prefix_to_compute, prefix_to_delete)
+    .execute(wtxn, prefix_to_compute, prefix_to_delete, thread_pool)
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -360,11 +374,13 @@ pub fn compute_word_prefix_position_docids(
    prefix_to_compute: &BTreeSet<Prefix>,
    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: &GrenadParameters,
+    thread_pool: &scoped_thread_pool::ThreadPool<crate::Error>,
 ) -> Result<()> {
    WordPrefixIntegerDocids::new(
        index.word_position_docids.remap_key_type(),
        index.word_prefix_position_docids.remap_key_type(),
        grenad_parameters,
+        thread_pool,
    )
-    .execute(wtxn, prefix_to_compute, prefix_to_delete)
+    .execute(wtxn, prefix_to_compute, prefix_to_delete, thread_pool)
 }
--- a/crates/milli/src/update/upgrade/mod.rs
+++ b/crates/milli/src/update/upgrade/mod.rs
@@ -1,7 +1,9 @@
 mod v1_12;
+mod v1_13;

 use heed::RwTxn;
-use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3};
+use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
+use v1_13::V1_13_0_To_Current;

 use crate::progress::{Progress, VariableNameStep};
 use crate::{Index, InternalError, Result};
@@ -26,11 +28,13 @@ pub fn upgrade(
    progress: Progress,
 ) -> Result<bool> {
    let from = index.get_version(wtxn)?.unwrap_or(db_version);
-    let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()];
+    let upgrade_functions: &[&dyn UpgradeIndex] =
+        &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_V1_13_0 {}, &V1_13_0_To_Current()];

    let start = match from {
        (1, 12, 0..=2) => 0,
        (1, 12, 3..) => 1,
+        (1, 13, 0) => 2,
        // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
        (1, 13, _) => return Ok(false),
        (major, minor, patch) => {
--- a/crates/milli/src/update/upgrade/v1_12.rs
+++ b/crates/milli/src/update/upgrade/v1_12.rs
@@ -1,11 +1,9 @@
 use heed::RwTxn;

-use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
+use super::UpgradeIndex;
 use crate::progress::Progress;
 use crate::{make_enum_progress, Index, Result};

-use super::UpgradeIndex;
-
 #[allow(non_camel_case_types)]
 pub(super) struct V1_12_To_V1_12_3 {}

@@ -33,9 +31,9 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
 }

 #[allow(non_camel_case_types)]
-pub(super) struct V1_12_3_To_Current();
+pub(super) struct V1_12_3_To_V1_13_0 {}

-impl UpgradeIndex for V1_12_3_To_Current {
+impl UpgradeIndex for V1_12_3_To_V1_13_0 {
    fn upgrade(
        &self,
        _wtxn: &mut RwTxn,
@@ -43,14 +41,11 @@ impl UpgradeIndex for V1_12_3_To_Current {
        _original: (u32, u32, u32),
        _progress: Progress,
    ) -> Result<bool> {
-        Ok(false)
+        // recompute the indexes stats
+        Ok(true)
    }

    fn target_version(&self) -> (u32, u32, u32) {
-        (
-            VERSION_MAJOR.parse().unwrap(),
-            VERSION_MINOR.parse().unwrap(),
-            VERSION_PATCH.parse().unwrap(),
-        )
+        (1, 13, 0)
    }
 }
--- a/crates/milli/src/update/upgrade/v1_13.rs
+++ b/crates/milli/src/update/upgrade/v1_13.rs
@@ -0,0 +1,29 @@
+use heed::RwTxn;
+
+use super::UpgradeIndex;
+use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
+use crate::progress::Progress;
+use crate::{Index, Result};
+
+#[allow(non_camel_case_types)]
+pub(super) struct V1_13_0_To_Current();
+
+impl UpgradeIndex for V1_13_0_To_Current {
+    fn upgrade(
+        &self,
+        _wtxn: &mut RwTxn,
+        _index: &Index,
+        _original: (u32, u32, u32),
+        _progress: Progress,
+    ) -> Result<bool> {
+        Ok(false)
+    }
+
+    fn target_version(&self) -> (u32, u32, u32) {
+        (
+            VERSION_MAJOR.parse().unwrap(),
+            VERSION_MINOR.parse().unwrap(),
+            VERSION_PATCH.parse().unwrap(),
+        )
+    }
+}
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@@ -410,8 +410,43 @@ impl ArroyWrapper {
    fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
        self.database.remap_data_type()
    }
+
+    pub fn aggregate_stats(
+        &self,
+        rtxn: &RoTxn,
+        stats: &mut ArroyStats,
+    ) -> Result<(), arroy::Error> {
+        if self.quantized {
+            for reader in self.readers(rtxn, self.quantized_db()) {
+                let reader = reader?;
+                let documents = reader.item_ids();
+                if documents.is_empty() {
+                    break;
+                }
+                stats.documents |= documents;
+                stats.number_of_embeddings += documents.len();
+            }
+        } else {
+            for reader in self.readers(rtxn, self.angular_db()) {
+                let reader = reader?;
+                let documents = reader.item_ids();
+                if documents.is_empty() {
+                    break;
+                }
+                stats.documents |= documents;
+                stats.number_of_embeddings += documents.len();
+            }
+        }
+
+        Ok(())
+    }
 }

+#[derive(Debug, Default, Clone)]
+pub struct ArroyStats {
+    pub number_of_embeddings: u64,
+    pub documents: RoaringBitmap,
+}
 /// One or multiple embeddings stored consecutively in a flat vector.
 pub struct Embeddings<F> {
    data: Vec<F>,
--- a/crates/milli/src/vector/rest.rs
+++ b/crates/milli/src/vector/rest.rs
@@ -130,6 +130,7 @@ impl Embedder {
        let client = ureq::AgentBuilder::new()
            .max_idle_connections(REQUEST_PARALLELISM * 2)
            .max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
+            .timeout(std::time::Duration::from_secs(30))
            .build();

        let request = Request::new(options.request)?;
--- a/crates/milli/tests/search/facet_distribution.rs
+++ b/crates/milli/tests/search/facet_distribution.rs
@@ -5,6 +5,7 @@ use maplit::hashset;
 use milli::documents::mmap_from_objects;
 use milli::progress::Progress;
 use milli::update::new::indexer;
+use milli::update::new::indexer::document_changes::CHUNK_SIZE;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
 use milli::{FacetDistribution, Index, Object, OrderBy};
@@ -36,6 +37,8 @@ fn test_facet_distribution_with_no_facet_values() {
    let mut new_fields_ids_map = db_fields_ids_map.clone();

    let embedders = EmbeddingConfigs::default();
+    let thread_pool =
+        scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
    let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);

    let doc1: Object = from_value(
@@ -59,12 +62,15 @@ fn test_facet_distribution_with_no_facet_values() {
            &mut new_fields_ids_map,
            &|| false,
            Progress::default(),
+            &thread_pool,
+            CHUNK_SIZE,
        )
        .unwrap();

    indexer::index(
        &mut wtxn,
        &index,
+        &thread_pool,
        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
--- a/crates/milli/tests/search/mod.rs
+++ b/crates/milli/tests/search/mod.rs
@@ -9,6 +9,7 @@ use heed::EnvOpenOptions;
 use maplit::{btreemap, hashset};
 use milli::progress::Progress;
 use milli::update::new::indexer;
+use milli::update::new::indexer::document_changes::CHUNK_SIZE;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
 use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy};
@@ -72,6 +73,8 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    let mut new_fields_ids_map = db_fields_ids_map.clone();

    let embedders = EmbeddingConfigs::default();
+    let thread_pool =
+        scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
    let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);

    let mut file = tempfile::tempfile().unwrap();
@@ -92,6 +95,8 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
            &mut new_fields_ids_map,
            &|| false,
            Progress::default(),
+            &thread_pool,
+            CHUNK_SIZE,
        )
        .unwrap();

@@ -102,6 +107,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    indexer::index(
        &mut wtxn,
        &index,
+        &thread_pool,
        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
--- a/crates/milli/tests/search/query_criteria.rs
+++ b/crates/milli/tests/search/query_criteria.rs
@@ -7,6 +7,7 @@ use itertools::Itertools;
 use maplit::hashset;
 use milli::progress::Progress;
 use milli::update::new::indexer;
+use milli::update::new::indexer::document_changes::CHUNK_SIZE;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
 use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
@@ -288,6 +289,8 @@ fn criteria_ascdesc() {
    let mut new_fields_ids_map = db_fields_ids_map.clone();

    let embedders = EmbeddingConfigs::default();
+    let thread_pool =
+        scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
    let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);

    let mut file = tempfile::tempfile().unwrap();
@@ -328,12 +331,15 @@ fn criteria_ascdesc() {
            &mut new_fields_ids_map,
            &|| false,
            Progress::default(),
+            &thread_pool,
+            CHUNK_SIZE,
        )
        .unwrap();

    indexer::index(
        &mut wtxn,
        &index,
+        &thread_pool,
        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
--- a/crates/milli/tests/search/typo_tolerance.rs
+++ b/crates/milli/tests/search/typo_tolerance.rs
@@ -5,6 +5,7 @@ use heed::EnvOpenOptions;
 use milli::documents::mmap_from_objects;
 use milli::progress::Progress;
 use milli::update::new::indexer;
+use milli::update::new::indexer::document_changes::CHUNK_SIZE;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
 use milli::{Criterion, Index, Object, Search, TermsMatchingStrategy};
@@ -123,6 +124,8 @@ fn test_typo_disabled_on_word() {
    let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
    let mut new_fields_ids_map = db_fields_ids_map.clone();
    let embedders = EmbeddingConfigs::default();
+    let thread_pool =
+        scoped_thread_pool::ThreadPool::with_available_parallelism("index".to_string());
    let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);

    indexer.add_documents(&documents).unwrap();
@@ -137,12 +140,15 @@ fn test_typo_disabled_on_word() {
            &mut new_fields_ids_map,
            &|| false,
            Progress::default(),
+            &thread_pool,
+            CHUNK_SIZE,
        )
        .unwrap();

    indexer::index(
        &mut wtxn,
        &index,
+        &thread_pool,
        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
--- a/workloads/hackernews-modify-facet-numbers.json
+++ b/workloads/hackernews-modify-facet-numbers.json
@@ -31,7 +31,7 @@
    "hackernews-modified-number-filters.ndjson": {
      "local_location": null,
      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
-      "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
+      "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
    }
  },
  "precommands": [
--- a/workloads/hackernews-modify-facet-strings.json
+++ b/workloads/hackernews-modify-facet-strings.json
@@ -31,7 +31,7 @@
    "hackernews-modified-string-filters.ndjson": {
      "local_location": null,
      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
-      "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
+      "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
    }
  },
  "precommands": [
Author	SHA1	Message	Date
Louis Dureuil	50268b930c	integrate thread_pool	2025-03-01 23:49:16 +01:00
Louis Dureuil	93ba4b924a	Use thread pool in process index op	2025-03-01 23:46:37 +01:00
Louis Dureuil	b7d5576347	benchmarks and fuzzers	2025-03-01 23:46:04 +01:00
Louis Dureuil	f67b246108	Change cargo toml	2025-03-01 14:50:55 +01:00
Louis Dureuil	a1f60c61e8	Reasonable changes	2025-02-26 22:16:31 +01:00
Louis Dureuil	de2fedaa9d	Use thread_pool broadcast	2025-02-26 22:12:19 +01:00
Louis Dureuil	89717ba0f1	error support	2025-02-26 22:11:34 +01:00
Louis Dureuil	8d93de28b8	Add thread pool to cargo toml	2025-02-26 22:11:16 +01:00
meili-bors[bot]	5e7803632d	Merge #5342 Some checks failed Test suite / Tests on ubuntu-20.04 (push) Failing after 1s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 14s Test suite / Run Rustfmt (push) Successful in 1m54s Test suite / Run Clippy (push) Failing after 6m49s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5342: Fix workload sha r=dureuill a=ManyTheFish The dataset shasum was wrong for some workloads making the `/bench workloads/*.json` crash Co-authored-by: ManyTheFish <many@meilisearch.com>	2025-02-12 16:27:09 +00:00
meili-bors[bot]	885710a07b	Merge #5341 5341: Embeddings stats r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #5321 ## What does this PR do? - Add embedding stats - force dumpless upgrade to recompute stats - add tests Co-authored-by: ManyTheFish <many@meilisearch.com>	2025-02-12 15:46:37 +00:00
ManyTheFish	c55fdad2c3	Fix dumpless upgrade target version	2025-02-12 16:35:05 +01:00
ManyTheFish	1caad4c4b0	Add multiple embeddings for the same embedder in tests	2025-02-12 16:13:34 +01:00
ManyTheFish	8419ed52a1	fix clippy	2025-02-12 14:38:51 +01:00
ManyTheFish	a65c52cc97	Convert dump test into snapshots	2025-02-12 14:14:10 +01:00
ManyTheFish	49e9655c24	Update snapshots	2025-02-12 14:05:32 +01:00
meili-bors[bot]	fa763ca5dc	Merge #5339 5339: Add back timeout from v1.11.3 r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5337 ## What does this PR do? - Fix regression compared with v1.11 by reintroducing the 30s timeout on all REST API calls. Thanks to `@migueltarga` for reporting the issue Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2025-02-12 12:50:27 +00:00
ManyTheFish	c7aeb554b2	Add tests	2025-02-12 13:37:41 +01:00
ManyTheFish	88d9d47928	Fix benchmark sha	2025-02-12 13:27:15 +01:00
Louis Dureuil	8e0d8d31f9	Add back timeout from v1.11.3	2025-02-12 11:53:00 +01:00
meili-bors[bot]	81a38099ec	Merge #5336 5336: Meilitool Hair Dryer r=dureuill a=Kerollmops This pull request introduces a new subcommand to hair dry a specific part of specific indexes. It is useful when [the memory-mapped pages are not hot in the cache](https://arc.net/l/quote/ixhcdwcq) and must be. Hair drying those interesting pages makes the search requests using the vector store much faster. The previous technique used the "cat method," which consists of reading the whole LMDB data file and pipping it into the null file descriptor. By doing that, the whole LMDB data file becomes hot in the cache. However, when the database is large, at least 30% of it is free, and unused pages and many other pages don't need to be hot, e.g., raw JSON documents or uninteresting parts of the inverted index. This new subcommand reads all the Arroy pages of a given index to make them hot, and only those. More coming... The current algorithm is single-threaded and takes a lot of time. I am in the process of multithreading it. This is the time it takes to hair dry a 305GiB database with a single thread. ``` real 21m51.054s user 0m3.155s sys 0m19.393s ``` ## To Do - [ ] (optional) Do the reads in parallel. Co-authored-by: Kerollmops <clement@meilisearch.com>	2025-02-12 10:45:16 +00:00
ManyTheFish	bd27fe7d02	force dumpless upgrade to recompute stats	2025-02-12 11:45:02 +01:00
ManyTheFish	41203f0931	Add embedders stats	2025-02-12 11:37:47 +01:00
Kerollmops	803a699b15	Remove unsafes	2025-02-12 10:46:45 +01:00
Kerollmops	246ad3b06e	Display a progress percentage	2025-02-12 09:56:05 +01:00
meili-bors[bot]	70305b9f71	Merge #5332 5332: Fix geo update r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5331 ## What does this PR do? - use the merged version that contains all fields instead of the updated version that contains only updated fields - add test that detects the problem - As it is the second time that `changes.updated` is causing a bug, I'm changing its name to `only_changed_fields`, hopefully better communicating that old fields are not there Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2025-02-11 18:51:33 +00:00
Kerollmops	5dab435d13	Add more logs about read txns	2025-02-11 18:14:48 +01:00
Kerollmops	c83c1a3c51	Introduce the Hair Dryer meilitool sucommand	2025-02-11 18:01:53 +01:00
Louis Dureuil	b83275c9c5	Change the `updated*` functions to `only_new` functions, hopefully better communicating what they do	2025-02-11 15:27:10 +01:00
Louis Dureuil	d7f35ee3ba	Use merged document instead of updated	2025-02-11 15:27:10 +01:00
Louis Dureuil	1dce341bfb	Add test	2025-02-11 15:27:10 +01:00
meili-bors[bot]	4876c1c8eb	Merge #5310 Some checks failed Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests almost all features (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 18s Test suite / Run tests in debug (push) Failing after 17s Test suite / Run Rustfmt (push) Successful in 2m42s Test suite / Run Clippy (push) Failing after 7m17s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5310: Fix batch export/import dump r=Kerollmops a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5304 Fixes https://github.com/meilisearch/meilisearch/issues/5247 ## What does this PR do? - Add the batches to the dump - Update the tests - Create a new dump test containing batches and an enqueued task with a document addition Co-authored-by: Tamo <tamo@meilisearch.com>	2025-02-11 10:21:34 +00:00
Tamo	43c8d54501	fix test after rebase	2025-02-11 11:19:13 +01:00
Tamo	84e2a1f836	rename the atomic to something more meaningful	2025-02-11 11:14:49 +01:00
Tamo	00eb47d42e	use serde_json::to_writer instead of serializing + writing	2025-02-11 11:14:49 +01:00
Tamo	9293e7f2c1	fix tests after rebase	2025-02-11 11:14:49 +01:00
Tamo	80198aa855	add a dump test with batches and enqueued tasks	2025-02-11 11:14:49 +01:00
Tamo	fa00b42c93	fix the missing batch in the dumps in meilisearch and meilitools	2025-02-11 11:14:49 +01:00
meili-bors[bot]	6c9409edf8	Merge #5326 Some checks failed Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests almost all features (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 15s Test suite / Run Clippy (push) Failing after 11s Test suite / Run tests in debug (push) Failing after 34s Test suite / Run Rustfmt (push) Successful in 1m32s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5326: Expose a route to get the file content associated with a task r=Kerollmops a=Kerollmops This PR exposes a new `/tasks/{taskUid}/documents` route, exposing the update file associated with a task. ## To Do - [x] (optional) Change the route to `/tasks/{taskUid}/documents` `@dureuill.` - [x] Update Open API example. - [x] Create [an Experimental Feature Discussion](https://github.com/orgs/meilisearch/discussions/808). - [x] Make this route experimental and enable it via the experimental route. Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>	2025-02-10 16:50:13 +00:00
Clément Renault	acb06cb3e6	Improve the error message when missing documents Co-authored-by: Tamo <tamo@meilisearch.com>	2025-02-10 16:53:50 +01:00
Kerollmops	7d0d8f4445	Make the feature experimental	2025-02-10 16:11:32 +01:00
Kerollmops	491d115c3c	Change the route to get the task documents	2025-02-10 14:55:07 +01:00
Kerollmops	55fa2dda00	Update the Open API example	2025-02-10 14:52:48 +01:00
Kerollmops	c71eea8023	Improve error message when update file has been processed	2025-02-10 14:33:01 +01:00
Kerollmops	df40533741	Expose a route to get the update file content of a task	2025-02-10 14:05:32 +01:00
meili-bors[bot]	0c3e7fe963	Merge #5316 Some checks failed Test suite / Tests on ubuntu-20.04 (push) Failing after 2s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 16s Test suite / Run Clippy (push) Failing after 12s Test suite / Run Rustfmt (push) Failing after 32s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5316: Fix the dumpless upgrade corruption r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5280 ## What does this PR do? - Add a test that ensure we write the version in the index-scheduler even if we have a bug while writing the VERSION file - Do what was described in the issue Co-authored-by: Tamo <tamo@meilisearch.com>	2025-02-10 09:53:57 +00:00
Tamo	45f843ccb9	fmt	2025-02-10 10:46:42 +01:00
Tamo	35b6bca598	remove the failing test	2025-02-10 10:20:14 +01:00
Tamo	7f82d33597	update the version file atomically	2025-02-06 18:23:28 +01:00
Tamo	8c5856007c	flush+sync the version file just in case	2025-02-06 18:04:43 +01:00
Tamo	ae1d7f4d9b	Improve the test and disable it on windows and linux since they don't work on the CI	2025-02-06 17:54:12 +01:00
meili-bors[bot]	792be63567	Merge #5323 5323: exclude network time from processingMs r=Kerollmops a=dureuill Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2025-02-06 16:35:44 +00:00
Louis Dureuil	70aac71c63	exclude network time from processingMs	2025-02-06 17:18:36 +01:00
meili-bors[bot]	a562d6abc1	Merge #5322 5322: Make sure arroy is using the rayon thread-pool r=dureuill a=Kerollmops This PR fixes #5249 by ensuring arroy uses the rayon thread pool. Co-authored-by: Kerollmops <clement@meilisearch.com>	2025-02-06 15:28:47 +00:00
meili-bors[bot]	b7fdd9516c	Merge #4970 4970: Create a new export documents meilitool subcommand r=dureuill a=Kerollmops This subcommand can be useful for extracting documents from an existing database. Co-authored-by: Kerollmops <clement@meilisearch.com>	2025-02-06 14:48:27 +00:00
Kerollmops	5f2a1a4fd1	Skip the documents before fetching them	2025-02-06 15:40:22 +01:00
Kerollmops	2b0e17ede0	Make sure arroy is using the rayon thread-pool	2025-02-06 15:28:10 +01:00
Kerollmops	37092adc71	Show a bit of progress	2025-02-06 10:37:05 +01:00
Kerollmops	86fcad788e	Introduce a parameter to skip the first documents	2025-02-06 10:32:50 +01:00
Kerollmops	2ea5c57871	Create a new export documents meilitool subcommand based on v1.12	2025-02-06 10:32:39 +01:00
Tamo	b63c64395d	add a test ensuring the index-scheduler version is set when we cannot write the version file	2025-02-05 18:08:50 +01:00
Tamo	628119e31e	fix the dumpless upgrade potential corruption when upgrading from the v1.12	2025-02-05 18:08:50 +01:00