Retry in case where the JSON deserialization fails

Add timeout on read and write operations.
Merge #5041
2025-07-19 21:10:34 +00:00 · 2024-11-14 15:28:44 +01:00 · 2024-11-13 17:01:23 +01:00 · 2024-11-06 11:35:26 +00:00 · 2024-11-06 10:56:50 +00:00 · 2024-11-06 08:35:51 +00:00
117 changed files with 3910 additions and 8140 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -386,8 +386,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"

 [[package]]
 name = "arroy"
-version = "0.4.0"
-source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e"
 dependencies = [
 "bytemuck",
 "byteorder",
@ -471,7 +472,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"

 [[package]]
 name = "benchmarks"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "anyhow",
 "bytes",
@ -527,7 +528,7 @@ dependencies = [
 "proc-macro2",
 "quote",
 "regex",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 "shlex",
 "syn 2.0.60",
 ]
@ -652,7 +653,7 @@ dependencies = [

 [[package]]
 name = "build-info"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "anyhow",
 "time",
@ -934,7 +935,8 @@ dependencies = [
 [[package]]
 name = "charabia"
 version = "0.9.1"
-source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798"
 dependencies = [
 "aho-corasick",
 "csv",
@ -1621,7 +1623,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "anyhow",
 "big_s",
@ -1833,7 +1835,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"

 [[package]]
 name = "file-store"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "tempfile",
 "thiserror",
@ -1855,7 +1857,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "insta",
 "nom",
@ -1875,7 +1877,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "criterion",
 "serde_json",
@ -1999,7 +2001,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "arbitrary",
 "clap",
@ -2220,11 +2222,11 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 [[package]]
 name = "grenad"
 version = "0.4.7"
-source = "git+https://github.com/meilisearch/grenad?branch=various-improvements#58ac87d852413571102f44c5e55ca13509a3f1a0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "350d89047298d3b1b40050acd11ab76e487b854a104b760ebc5a7f375093de77"
 dependencies = [
 "bytemuck",
 "byteorder",
- "either",
 "rayon",
 "tempfile",
 ]
@ -2307,9 +2309,9 @@ dependencies = [

 [[package]]
 name = "hashbrown"
-version = "0.14.5"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
 dependencies = [
 "ahash 0.8.11",
 "allocator-api2",
@ -2551,7 +2553,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"

 [[package]]
 name = "index-scheduler"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "anyhow",
 "arroy",
@ -2569,7 +2571,6 @@ dependencies = [
 "meili-snap",
 "meilisearch-auth",
 "meilisearch-types",
- "memmap2",
 "page_size",
 "rayon",
 "roaring",
@ -2591,7 +2592,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
 dependencies = [
 "equivalent",
- "hashbrown 0.14.5",
+ "hashbrown 0.14.3",
 "serde",
 ]

@ -2650,7 +2651,8 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
 [[package]]
 name = "irg-kvariants"
 version = "0.1.1"
-source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26"
 dependencies = [
 "csv",
 "once_cell",
@ -2745,7 +2747,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "criterion",
 "serde_json",
@ -3364,7 +3366,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "insta",
 "md5",
@ -3373,7 +3375,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "actix-cors",
 "actix-http",
@ -3413,6 +3415,7 @@ dependencies = [
 "meilisearch-types",
 "mimalloc",
 "mime",
+ "mopa-maintained",
 "num_cpus",
 "obkv",
 "once_cell",
@ -3462,7 +3465,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "base64 0.22.1",
 "enum-iterator",
@ -3481,7 +3484,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "actix-web",
 "anyhow",
@ -3511,7 +3514,7 @@ dependencies = [

 [[package]]
 name = "meilitool"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "anyhow",
 "clap",
@ -3542,7 +3545,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "arroy",
 "big_s",
@ -3566,7 +3569,6 @@ dependencies = [
 "fxhash",
 "geoutils",
 "grenad",
- "hashbrown 0.14.5",
 "heed",
 "hf-hub",
 "indexmap",
@ -3680,6 +3682,12 @@ dependencies = [
 "syn 2.0.60",
 ]

+[[package]]
+name = "mopa-maintained"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79b7f3e22167862cc7c95b21a6f326c22e4bf40da59cbf000b368a310173ba11"
+
 [[package]]
 name = "mutually_exclusive_features"
 version = "0.0.3"
@ -3836,8 +3844,9 @@ dependencies = [

 [[package]]
 name = "obkv"
-version = "0.3.0"
-source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#ce535874008ecac554f02e0c670e6caf62134d6b"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2e27bcfe835a379d32352112f6b8dbae2d99d16a5fff42abe6e5ba5386c1e5a"

 [[package]]
 name = "once_cell"
@ -3982,7 +3991,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "big_s",
 "serde_json",
@ -4313,7 +4322,7 @@ dependencies = [
 "pin-project-lite",
 "quinn-proto",
 "quinn-udp",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 "rustls",
 "thiserror",
 "tokio",
@ -4322,14 +4331,14 @@ dependencies = [

 [[package]]
 name = "quinn-proto"
-version = "0.11.3"
+version = "0.11.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe"
+checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6"
 dependencies = [
 "bytes",
 "rand",
 "ring",
- "rustc-hash",
+ "rustc-hash 2.0.0",
 "rustls",
 "slab",
 "thiserror",
@ -4580,9 +4589,8 @@ dependencies = [

 [[package]]
 name = "rhai"
-version = "1.19.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61797318be89b1a268a018a92a7657096d83f3ecb31418b9e9c16dcbb043b702"
+version = "1.20.0"
+source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
 dependencies = [
 "ahash 0.8.11",
 "bitflags 2.6.0",
@ -4599,8 +4607,7 @@ dependencies = [
 [[package]]
 name = "rhai_codegen"
 version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5a11a05ee1ce44058fa3d5961d05194fdbe3ad6b40f904af764d81b86450e6b"
+source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
 dependencies = [
 "proc-macro2",
 "quote",
@ -4654,7 +4661,8 @@ dependencies = [
 [[package]]
 name = "roaring"
 version = "0.10.6"
-source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#6bba84b1a47da1d6e52d5c4dc0ce8593ae4646a5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1"
 dependencies = [
 "bytemuck",
 "byteorder",
@ -4701,6 +4709,12 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"

+[[package]]
+name = "rustc-hash"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.0"
@ -4839,9 +4853,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"

 [[package]]
 name = "serde"
-version = "1.0.210"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
+checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
 dependencies = [
 "serde_derive",
 ]
@ -4857,9 +4871,9 @@ dependencies = [

 [[package]]
 name = "serde_derive"
-version = "1.0.210"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
+checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
 dependencies = [
 "proc-macro2",
 "quote",
@ -5352,7 +5366,7 @@ dependencies = [
 "fancy-regex 0.12.0",
 "lazy_static",
 "parking_lot",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 ]

 [[package]]
@ -6040,7 +6054,7 @@ version = "0.16.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
 dependencies = [
- "hashbrown 0.14.5",
+ "hashbrown 0.14.3",
 "once_cell",
 ]

@ -6366,7 +6380,7 @@ dependencies = [

 [[package]]
 name = "xtask"
-version = "1.11.0"
+version = "1.11.1"
 dependencies = [
 "anyhow",
 "build-info",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -22,7 +22,7 @@ members = [
 ]

 [workspace.package]
-version = "1.11.0"
+version = "1.11.1"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
@ -44,5 +44,23 @@ opt-level = 3
 [profile.dev.package.roaring]
 opt-level = 3

-[patch.crates-io]
-roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
+[profile.dev.package.lindera-ipadic-builder]
+opt-level = 3
+[profile.dev.package.encoding]
+opt-level = 3
+[profile.dev.package.yada]
+opt-level = 3
+
+[profile.release.package.lindera-ipadic-builder]
+opt-level = 3
+[profile.release.package.encoding]
+opt-level = 3
+[profile.release.package.yada]
+opt-level = 3
+
+[profile.bench.package.lindera-ipadic-builder]
+opt-level = 3
+[profile.bench.package.encoding]
+opt-level = 3
+[profile.bench.package.yada]
+opt-level = 3
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@ -29,7 +29,6 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
 synchronoise = "1.0.1"
 tempfile = "3.10.1"
 thiserror = "1.0.61"
-memmap2 = "0.9.4"
 time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
@ -41,7 +40,7 @@ ureq = "2.10.0"
 uuid = { version = "1.10.0", features = ["serde", "v4"] }

 [dev-dependencies]
-arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
+arroy = "0.5.0"
 big_s = "1.0.2"
 crossbeam = "0.8.4"
 insta = { version = "1.39.0", features = ["json", "redactions"] }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -28,9 +28,6 @@ use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
-use meilisearch_types::milli::update::new::indexer::{
-    self, retrieve_or_guess_primary_key, DocumentChanges,
-};
 use meilisearch_types::milli::update::{
    IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
@ -878,8 +875,10 @@ impl IndexScheduler {
                            while let Some(doc) =
                                cursor.next_document().map_err(milli::Error::from)?
                            {
-                                dump_content_file
-                                    .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
+                                dump_content_file.push_document(&obkv_to_object(
+                                    &doc,
+                                    &documents_batch_index,
+                                )?)?;
                            }
                            dump_content_file.flush()?;
                        }
@ -1253,52 +1252,58 @@ impl IndexScheduler {
                let must_stop_processing = self.must_stop_processing.clone();
                let indexer_config = self.index_mapper.indexer_config();

-                /// TODO manage errors correctly
-                let rtxn = index.read_txn()?;
-                let first_addition_uuid = operations
-                    .iter()
-                    .find_map(|op| match op {
-                        DocumentOperation::Add(content_uuid) => Some(content_uuid),
-                        _ => None,
-                    })
-                    .unwrap();
-
-                let mut content_files = Vec::new();
-                for operation in &operations {
-                    if let DocumentOperation::Add(content_uuid) = operation {
-                        let content_file = self.file_store.get_update(*content_uuid)?;
-                        let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
-                        if !mmap.is_empty() {
-                            content_files.push(mmap);
+                if let Some(primary_key) = primary_key {
+                    match index.primary_key(index_wtxn)? {
+                        // if a primary key was set AND had already been defined in the index
+                        // but to a different value, we can make the whole batch fail.
+                        Some(pk) => {
+                            if primary_key != pk {
+                                return Err(milli::Error::from(
+                                    milli::UserError::PrimaryKeyCannotBeChanged(pk.to_string()),
+                                )
+                                .into());
+                            }
+                        }
+                        // if the primary key was set and there was no primary key set for this index
+                        // we set it to the received value before starting the indexing process.
+                        None => {
+                            let mut builder =
+                                milli::update::Settings::new(index_wtxn, index, indexer_config);
+                            builder.set_primary_key(primary_key);
+                            builder.execute(
+                                |indexing_step| tracing::debug!(update = ?indexing_step),
+                                || must_stop_processing.clone().get(),
+                            )?;
+                            primary_key_has_been_set = true;
                        }
                    }
                }

-                let mut fields_ids_map = index.fields_ids_map(&rtxn)?;
-                let first_document = match content_files.first() {
-                    Some(mmap) => {
-                        let mut iter = serde_json::Deserializer::from_slice(mmap).into_iter();
-                        iter.next().transpose().map_err(|e| e.into()).map_err(Error::IoError)?
-                    }
-                    None => None,
-                };
+                let config = IndexDocumentsConfig { update_method: method, ..Default::default() };

-                let primary_key = retrieve_or_guess_primary_key(
-                    &rtxn,
+                let embedder_configs = index.embedding_configs(index_wtxn)?;
+                // TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
+                let embedders = self.embedders(embedder_configs)?;
+
+                let mut builder = milli::update::IndexDocuments::new(
+                    index_wtxn,
                    index,
-                    &mut fields_ids_map,
-                    first_document.as_ref(),
-                )?
-                .unwrap();
+                    indexer_config,
+                    config,
+                    |indexing_step| tracing::trace!(?indexing_step, "Update"),
+                    || must_stop_processing.get(),
+                )?;

-                let mut content_files_iter = content_files.iter();
-                let mut indexer = indexer::DocumentOperation::new(method);
                for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
                    match operation {
-                        DocumentOperation::Add(_content_uuid) => {
-                            let mmap = content_files_iter.next().unwrap();
-                            let stats = indexer.add_documents(mmap)?;
-                            // builder = builder.with_embedders(embedders.clone());
+                        DocumentOperation::Add(content_uuid) => {
+                            let content_file = self.file_store.get_update(content_uuid)?;
+                            let reader = DocumentsBatchReader::from_reader(content_file)
+                                .map_err(milli::Error::from)?;
+                            let (new_builder, user_result) = builder.add_documents(reader)?;
+                            builder = new_builder;
+
+                            builder = builder.with_embedders(embedders.clone());

                            let received_documents =
                                if let Some(Details::DocumentAdditionOrUpdate {
@ -1312,17 +1317,30 @@ impl IndexScheduler {
                                    unreachable!();
                                };

-                            task.status = Status::Succeeded;
-                            task.details = Some(Details::DocumentAdditionOrUpdate {
-                                received_documents,
-                                indexed_documents: Some(stats.document_count as u64),
-                            })
+                            match user_result {
+                                Ok(count) => {
+                                    task.status = Status::Succeeded;
+                                    task.details = Some(Details::DocumentAdditionOrUpdate {
+                                        received_documents,
+                                        indexed_documents: Some(count),
+                                    })
+                                }
+                                Err(e) => {
+                                    task.status = Status::Failed;
+                                    task.details = Some(Details::DocumentAdditionOrUpdate {
+                                        received_documents,
+                                        indexed_documents: Some(0),
+                                    });
+                                    task.error = Some(milli::Error::from(e).into());
+                                }
+                            }
                        }
                        DocumentOperation::Delete(document_ids) => {
-                            let count = document_ids.len();
-                            indexer.delete_documents(document_ids);
+                            let (new_builder, user_result) =
+                                builder.remove_documents(document_ids)?;
+                            builder = new_builder;
                            // Uses Invariant: remove documents actually always returns Ok for the inner result
-                            // let count = user_result.unwrap();
+                            let count = user_result.unwrap();
                            let provided_ids =
                                if let Some(Details::DocumentDeletion { provided_ids, .. }) =
                                    task.details
@ -1336,35 +1354,26 @@ impl IndexScheduler {
                            task.status = Status::Succeeded;
                            task.details = Some(Details::DocumentDeletion {
                                provided_ids,
-                                deleted_documents: Some(count as u64),
+                                deleted_documents: Some(count),
                            });
                        }
                    }
                }

                if !tasks.iter().all(|res| res.error.is_some()) {
-                    /// TODO create a pool if needed
-                    // let pool = indexer_config.thread_pool.unwrap();
-                    let pool = rayon::ThreadPoolBuilder::new().build().unwrap();
-
-                    let param = (index, &rtxn, &primary_key);
-                    let document_changes = indexer.document_changes(&mut fields_ids_map, param)?;
-                    /// TODO pass/write the FieldsIdsMap
-                    indexer::index(index_wtxn, index, fields_ids_map, &pool, document_changes)?;
-
-                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
+                    let addition = builder.execute()?;
+                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
+                } else if primary_key_has_been_set {
+                    // Everything failed but we've set a primary key.
+                    // We need to remove it.
+                    let mut builder =
+                        milli::update::Settings::new(index_wtxn, index, indexer_config);
+                    builder.reset_primary_key();
+                    builder.execute(
+                        |indexing_step| tracing::trace!(update = ?indexing_step),
+                        || must_stop_processing.clone().get(),
+                    )?;
                }
-                // else if primary_key_has_been_set {
-                //     // Everything failed but we've set a primary key.
-                //     // We need to remove it.
-                //     let mut builder =
-                //         milli::update::Settings::new(index_wtxn, index, indexer_config);
-                //     builder.reset_primary_key();
-                //     builder.execute(
-                //         |indexing_step| tracing::trace!(update = ?indexing_step),
-                //         || must_stop_processing.clone().get(),
-                //     )?;
-                // }

                Ok(tasks)
            }
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -1263,7 +1263,7 @@ impl IndexScheduler {
                    #[cfg(test)]
                    self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;

-                    tracing::info!("Batch failed {}", error);
+                    tracing::error!("Batch failed {}", error);

                    self.update_task(&mut wtxn, &task)
                        .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
@ -5201,9 +5201,10 @@ mod tests {

            let configs = index_scheduler.embedders(configs).unwrap();
            let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
-            let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
-            let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
-            let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
+            let beagle_embed =
+                hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();
+            let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap();
+            let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap();
            (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
        };

--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -66,5 +66,8 @@ khmer = ["milli/khmer"]
 vietnamese = ["milli/vietnamese"]
 # force swedish character recomposition
 swedish-recomposition = ["milli/swedish-recomposition"]
-# force german character recomposition
+# allow german tokenization
 german = ["milli/german"]
+# allow turkish normalization
+turkish = ["milli/turkish"]
+
--- a/meilisearch-types/src/document_formats.rs
+++ b/meilisearch-types/src/document_formats.rs
@ -1,22 +1,20 @@
 use std::fmt::{self, Debug, Display};
 use std::fs::File;
-use std::io::{self, BufWriter};
+use std::io::{self, BufWriter, Write};
 use std::marker::PhantomData;

-use memmap2::Mmap;
-use milli::documents::Error;
-use milli::update::new::TopLevelMap;
+use memmap2::MmapOptions;
+use milli::documents::{DocumentsBatchBuilder, Error};
 use milli::Object;
 use serde::de::{SeqAccess, Visitor};
 use serde::{Deserialize, Deserializer};
 use serde_json::error::Category;
-use serde_json::{to_writer, Map, Value};

 use crate::error::{Code, ErrorCode};

 type Result<T> = std::result::Result<T, DocumentFormatError>;

-#[derive(Debug, Clone, Copy)]
+#[derive(Debug)]
 pub enum PayloadType {
    Ndjson,
    Json,
@ -90,26 +88,6 @@ impl From<(PayloadType, Error)> for DocumentFormatError {
    }
 }

-impl From<(PayloadType, serde_json::Error)> for DocumentFormatError {
-    fn from((ty, error): (PayloadType, serde_json::Error)) -> Self {
-        if error.classify() == Category::Data {
-            Self::Io(error.into())
-        } else {
-            Self::MalformedPayload(Error::Json(error), ty)
-        }
-    }
-}
-
-impl From<(PayloadType, csv::Error)> for DocumentFormatError {
-    fn from((ty, error): (PayloadType, csv::Error)) -> Self {
-        if error.is_io_error() {
-            Self::Io(error.into())
-        } else {
-            Self::MalformedPayload(Error::Csv(error), ty)
-        }
-    }
-}
-
 impl From<io::Error> for DocumentFormatError {
    fn from(error: io::Error) -> Self {
        Self::Io(error)
@ -125,140 +103,67 @@ impl ErrorCode for DocumentFormatError {
    }
 }

-// TODO remove that from the place I've borrowed it
-#[derive(Debug)]
-enum AllowedType {
-    String,
-    Boolean,
-    Number,
+/// Reads CSV from input and write an obkv batch to writer.
+pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
+    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
+    let mmap = unsafe { MmapOptions::new().map(file)? };
+    let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
+    builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
+
+    let count = builder.documents_count();
+    let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
+
+    Ok(count as u64)
 }

-fn parse_csv_header(header: &str) -> (&str, AllowedType) {
-    // if there are several separators we only split on the last one.
-    match header.rsplit_once(':') {
-        Some((field_name, field_type)) => match field_type {
-            "string" => (field_name, AllowedType::String),
-            "boolean" => (field_name, AllowedType::Boolean),
-            "number" => (field_name, AllowedType::Number),
-            // if the pattern isn't recognized, we keep the whole field.
-            _otherwise => (header, AllowedType::String),
-        },
-        None => (header, AllowedType::String),
-    }
-}
+/// Reads JSON from temporary file and write an obkv batch to writer.
+pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
+    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
+    let mmap = unsafe { MmapOptions::new().map(file)? };
+    let mut deserializer = serde_json::Deserializer::from_slice(&mmap);

-/// Reads CSV from file and write it in NDJSON in a file checking it along the way.
-pub fn read_csv(input: &File, output: impl io::Write, delimiter: u8) -> Result<u64> {
-    let ptype = PayloadType::Csv { delimiter };
-    let mut output = BufWriter::new(output);
-    let mut reader = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(input);
-
-    let headers = reader.headers().map_err(|e| DocumentFormatError::from((ptype, e)))?.clone();
-    let typed_fields: Vec<_> = headers.iter().map(parse_csv_header).collect();
-    let mut object: Map<_, _> =
-        typed_fields.iter().map(|(k, _)| (k.to_string(), Value::Null)).collect();
-
-    let mut line = 0;
-    let mut record = csv::StringRecord::new();
-    while reader.read_record(&mut record).map_err(|e| DocumentFormatError::from((ptype, e)))? {
-        // We increment here and not at the end of the loop
-        // to take the header offset into account.
-        line += 1;
-
-        // Reset the document values
-        object.iter_mut().for_each(|(_, v)| *v = Value::Null);
-
-        for (i, (name, atype)) in typed_fields.iter().enumerate() {
-            let value = &record[i];
-            let trimmed_value = value.trim();
-            let value = match atype {
-                AllowedType::Number if trimmed_value.is_empty() => Value::Null,
-                AllowedType::Number => match trimmed_value.parse::<i64>() {
-                    Ok(integer) => Value::from(integer),
-                    Err(_) => match trimmed_value.parse::<f64>() {
-                        Ok(float) => Value::from(float),
-                        Err(error) => {
-                            return Err(DocumentFormatError::MalformedPayload(
-                                Error::ParseFloat { error, line, value: value.to_string() },
-                                ptype,
-                            ))
-                        }
-                    },
-                },
-                AllowedType::Boolean if trimmed_value.is_empty() => Value::Null,
-                AllowedType::Boolean => match trimmed_value.parse::<bool>() {
-                    Ok(bool) => Value::from(bool),
-                    Err(error) => {
-                        return Err(DocumentFormatError::MalformedPayload(
-                            Error::ParseBool { error, line, value: value.to_string() },
-                            ptype,
-                        ))
-                    }
-                },
-                AllowedType::String if value.is_empty() => Value::Null,
-                AllowedType::String => Value::from(value),
-            };
-
-            *object.get_mut(*name).expect("encountered an unknown field") = value;
-        }
-
-        to_writer(&mut output, &object).map_err(|e| DocumentFormatError::from((ptype, e)))?;
-    }
-
-    Ok(line as u64)
-}
-
-/// Reads JSON from file and write it in NDJSON in a file checking it along the way.
-pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
-    // We memory map to be able to deserailize into a TopLevelMap<'pl> that
-    // does not allocate when possible and only materialize the first/top level.
-    let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
-
-    let mut out = BufWriter::new(output);
-    let mut deserializer = serde_json::Deserializer::from_slice(&input);
-    let count = match array_each(&mut deserializer, |obj: TopLevelMap| to_writer(&mut out, &obj)) {
+    match array_each(&mut deserializer, |obj| builder.append_json_object(&obj)) {
        // The json data has been deserialized and does not need to be processed again.
        // The data has been transferred to the writer during the deserialization process.
-        Ok(Ok(count)) => count,
-        Ok(Err(e)) => return Err(DocumentFormatError::from((PayloadType::Json, e))),
+        Ok(Ok(_)) => (),
+        Ok(Err(e)) => return Err(DocumentFormatError::Io(e)),
        Err(e) => {
            // Attempt to deserialize a single json string when the cause of the exception is not Category.data
            // Other types of deserialisation exceptions are returned directly to the front-end
-            if e.classify() != Category::Data {
-                return Err(DocumentFormatError::from((PayloadType::Json, e)));
+            if e.classify() != serde_json::error::Category::Data {
+                return Err(DocumentFormatError::MalformedPayload(
+                    Error::Json(e),
+                    PayloadType::Json,
+                ));
            }

-            let content: Object = serde_json::from_slice(&input)
+            let content: Object = serde_json::from_slice(&mmap)
                .map_err(Error::Json)
                .map_err(|e| (PayloadType::Json, e))?;
-            to_writer(&mut out, &content)
-                .map(|_| 1)
-                .map_err(|e| DocumentFormatError::from((PayloadType::Json, e)))?
+            builder.append_json_object(&content).map_err(DocumentFormatError::Io)?;
        }
-    };
-
-    match out.into_inner() {
-        Ok(_) => Ok(count),
-        Err(ie) => Err(DocumentFormatError::Io(ie.into_error())),
    }
+
+    let count = builder.documents_count();
+    let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
+
+    Ok(count as u64)
 }

-/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
-pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
-    // We memory map to be able to deserailize into a TopLevelMap<'pl> that
-    // does not allocate when possible and only materialize the first/top level.
-    let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
-    let mut output = BufWriter::new(output);
+/// Reads JSON from temporary file  and write an obkv batch to writer.
+pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
+    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
+    let mmap = unsafe { MmapOptions::new().map(file)? };

-    let mut count = 0;
-    for result in serde_json::Deserializer::from_slice(&input).into_iter() {
-        count += 1;
-        result
-            .and_then(|map: TopLevelMap| to_writer(&mut output, &map))
-            .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
+    for result in serde_json::Deserializer::from_slice(&mmap).into_iter() {
+        let object = result.map_err(Error::Json).map_err(|e| (PayloadType::Ndjson, e))?;
+        builder.append_json_object(&object).map_err(Into::into).map_err(DocumentFormatError::Io)?;
    }

-    Ok(count)
+    let count = builder.documents_count();
+    let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Io)?;
+
+    Ok(count as u64)
 }

 /// The actual handling of the deserialization process in serde
@ -267,23 +172,20 @@ pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
 /// ## References
 /// <https://serde.rs/stream-array.html>
 /// <https://github.com/serde-rs/json/issues/160>
-fn array_each<'de, D, T, F>(
-    deserializer: D,
-    f: F,
-) -> std::result::Result<serde_json::Result<u64>, D::Error>
+fn array_each<'de, D, T, F>(deserializer: D, f: F) -> std::result::Result<io::Result<u64>, D::Error>
 where
    D: Deserializer<'de>,
    T: Deserialize<'de>,
-    F: FnMut(T) -> serde_json::Result<()>,
+    F: FnMut(T) -> io::Result<()>,
 {
    struct SeqVisitor<T, F>(F, PhantomData<T>);

    impl<'de, T, F> Visitor<'de> for SeqVisitor<T, F>
    where
        T: Deserialize<'de>,
-        F: FnMut(T) -> serde_json::Result<()>,
+        F: FnMut(T) -> io::Result<()>,
    {
-        type Value = serde_json::Result<u64>;
+        type Value = io::Result<u64>;

        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("a nonempty sequence")
@ -292,7 +194,7 @@ where
        fn visit_seq<A>(
            mut self,
            mut seq: A,
-        ) -> std::result::Result<serde_json::Result<u64>, <A as SeqAccess<'de>>::Error>
+        ) -> std::result::Result<io::Result<u64>, <A as SeqAccess<'de>>::Error>
        where
            A: SeqAccess<'de>,
        {
@ -301,7 +203,7 @@ where
                match self.0(value) {
                    Ok(()) => max += 1,
                    Err(e) => return Ok(Err(e)),
-                }
+                };
            }
            Ok(Ok(max))
        }
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
 mimalloc = { version = "0.1.43", default-features = false }
 mime = "0.3.17"
 num_cpus = "1.16.0"
-obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
+obkv = "0.2.2"
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 parking_lot = "0.12.3"
@ -75,7 +75,7 @@ reqwest = { version = "0.12.5", features = [
 rustls = { version = "0.23.11", features = ["ring"], default-features = false }
 rustls-pki-types = { version = "1.7.0", features = ["alloc"] }
 rustls-pemfile = "2.1.2"
-segment = { version = "0.2.4", optional = true }
+segment = { version = "0.2.4" }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 sha2 = "0.10.8"
@ -104,6 +104,7 @@ tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.11"
 build-info = { version = "1.7.0", path = "../build-info" }
 roaring = "0.10.2"
+mopa-maintained = "0.2.3"

 [dev-dependencies]
 actix-rt = "2.10.0"
@ -131,8 +132,7 @@ tempfile = { version = "3.10.1", optional = true }
 zip = { version = "2.1.3", optional = true }

 [features]
-default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
-analytics = ["segment"]
+default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
 mini-dashboard = [
    "static-files",
    "anyhow",
@ -154,7 +154,8 @@ khmer = ["meilisearch-types/khmer"]
 vietnamese = ["meilisearch-types/vietnamese"]
 swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
 german = ["meilisearch-types/german"]
+turkish = ["meilisearch-types/turkish"]

 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
-sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.15/build.zip"
+sha1 = "d057600b4a839a2e0c0be7a372cd1b2683f3ca7e"
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@ -1,109 +0,0 @@
-use std::any::Any;
-use std::sync::Arc;
-
-use actix_web::HttpRequest;
-use meilisearch_types::InstanceUid;
-use serde_json::Value;
-
-use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
-use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
-use crate::Opt;
-
-pub struct MockAnalytics {
-    instance_uid: Option<InstanceUid>,
-}
-
-#[derive(Default)]
-pub struct SearchAggregator;
-
-#[allow(dead_code)]
-impl SearchAggregator {
-    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self, _: &dyn Any) {}
-}
-
-#[derive(Default)]
-pub struct SimilarAggregator;
-
-#[allow(dead_code)]
-impl SimilarAggregator {
-    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self, _: &dyn Any) {}
-}
-
-#[derive(Default)]
-pub struct MultiSearchAggregator;
-
-#[allow(dead_code)]
-impl MultiSearchAggregator {
-    pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self) {}
-}
-
-#[derive(Default)]
-pub struct FacetSearchAggregator;
-
-#[allow(dead_code)]
-impl FacetSearchAggregator {
-    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self, _: &dyn Any) {}
-}
-
-impl MockAnalytics {
-    #[allow(clippy::new_ret_no_self)]
-    pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
-        let instance_uid = find_user_id(&opt.db_path);
-        Arc::new(Self { instance_uid })
-    }
-}
-
-impl Analytics for MockAnalytics {
-    fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
-        self.instance_uid.as_ref()
-    }
-
-    // These methods are noop and should be optimized out
-    fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
-    fn get_search(&self, _aggregate: super::SearchAggregator) {}
-    fn post_search(&self, _aggregate: super::SearchAggregator) {}
-    fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
-    fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
-    fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
-    fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
-    fn add_documents(
-        &self,
-        _documents_query: &UpdateDocumentsQuery,
-        _index_creation: bool,
-        _request: &HttpRequest,
-    ) {
-    }
-    fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
-    fn update_documents(
-        &self,
-        _documents_query: &UpdateDocumentsQuery,
-        _index_creation: bool,
-        _request: &HttpRequest,
-    ) {
-    }
-    fn update_documents_by_function(
-        &self,
-        _documents_query: &DocumentEditionByFunction,
-        _index_creation: bool,
-        _request: &HttpRequest,
-    ) {
-    }
-    fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
-    fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
-}
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@ -1,44 +1,45 @@
-mod mock_analytics;
-#[cfg(feature = "analytics")]
-mod segment_analytics;
+pub mod segment_analytics;

 use std::fs;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
+use std::sync::Arc;

 use actix_web::HttpRequest;
+use index_scheduler::IndexScheduler;
+use meilisearch_auth::AuthController;
 use meilisearch_types::InstanceUid;
-pub use mock_analytics::MockAnalytics;
+use mopa::mopafy;
 use once_cell::sync::Lazy;
 use platform_dirs::AppDirs;
-use serde_json::Value;
-
-use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
-
-// if the analytics feature is disabled
-// the `SegmentAnalytics` point to the mock instead of the real analytics
-#[cfg(not(feature = "analytics"))]
-pub type SegmentAnalytics = mock_analytics::MockAnalytics;
-#[cfg(not(feature = "analytics"))]
-pub type SearchAggregator = mock_analytics::SearchAggregator;
-#[cfg(not(feature = "analytics"))]
-pub type SimilarAggregator = mock_analytics::SimilarAggregator;
-#[cfg(not(feature = "analytics"))]
-pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
-#[cfg(not(feature = "analytics"))]
-pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;

 // if the feature analytics is enabled we use the real analytics
-#[cfg(feature = "analytics")]
 pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
-#[cfg(feature = "analytics")]
-pub type SearchAggregator = segment_analytics::SearchAggregator;
-#[cfg(feature = "analytics")]
-pub type SimilarAggregator = segment_analytics::SimilarAggregator;
-#[cfg(feature = "analytics")]
-pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
-#[cfg(feature = "analytics")]
-pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
+
+use crate::Opt;
+
+/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
+#[macro_export]
+macro_rules! empty_analytics {
+    ($struct_name:ident, $event_name:literal) => {
+        #[derive(Default)]
+        struct $struct_name {}
+
+        impl $crate::analytics::Aggregate for $struct_name {
+            fn event_name(&self) -> &'static str {
+                $event_name
+            }
+
+            fn aggregate(self: Box<Self>, _other: Box<Self>) -> Box<Self> {
+                self
+            }
+
+            fn into_event(self: Box<Self>) -> serde_json::Value {
+                serde_json::json!({})
+            }
+        }
+    };
+}

 /// The Meilisearch config dir:
 /// `~/.config/Meilisearch` on *NIX or *BSD.
@ -78,60 +79,88 @@ pub enum DocumentFetchKind {
    Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
 }

-pub trait Analytics: Sync + Send {
-    fn instance_uid(&self) -> Option<&InstanceUid>;
+/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
+pub trait Aggregate: 'static + mopa::Any + Send {
+    /// The name of the event that will be sent to segment.
+    fn event_name(&self) -> &'static str;
+
+    /// Will be called every time an event has been used twice before segment flushed its buffer.
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self>
+    where
+        Self: Sized;
+
+    /// Converts your structure to the final event that'll be sent to segment.
+    fn into_event(self: Box<Self>) -> serde_json::Value;
+}
+
+mopafy!(Aggregate);
+
+/// Helper trait to define multiple aggregates with the same content but a different name.
+/// Commonly used when you must aggregate a search with POST or with GET, for example.
+pub trait AggregateMethod: 'static + Default + Send {
+    fn event_name() -> &'static str;
+}
+
+/// A macro used to quickly define multiple aggregate method with their name
+/// Usage:
+/// ```rust
+/// use meilisearch::aggregate_methods;
+///
+/// aggregate_methods!(
+///     SearchGET => "Documents Searched GET",
+///     SearchPOST => "Documents Searched POST",
+/// );
+/// ```
+#[macro_export]
+macro_rules! aggregate_methods {
+    ($method:ident => $event_name:literal) => {
+        #[derive(Default)]
+        pub struct $method {}
+
+        impl $crate::analytics::AggregateMethod for $method {
+            fn event_name() -> &'static str {
+                $event_name
+            }
+        }
+    };
+    ($($method:ident => $event_name:literal,)+) => {
+        $(
+            aggregate_methods!($method => $event_name);
+        )+
+
+    };
+}
+
+#[derive(Clone)]
+pub struct Analytics {
+    segment: Option<Arc<SegmentAnalytics>>,
+}
+
+impl Analytics {
+    pub async fn new(
+        opt: &Opt,
+        index_scheduler: Arc<IndexScheduler>,
+        auth_controller: Arc<AuthController>,
+    ) -> Self {
+        if opt.no_analytics {
+            Self { segment: None }
+        } else {
+            Self { segment: SegmentAnalytics::new(opt, index_scheduler, auth_controller).await }
+        }
+    }
+
+    pub fn no_analytics() -> Self {
+        Self { segment: None }
+    }
+
+    pub fn instance_uid(&self) -> Option<&InstanceUid> {
+        self.segment.as_ref().map(|segment| segment.instance_uid.as_ref())
+    }

    /// The method used to publish most analytics that do not need to be batched every hours
-    fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
-
-    /// This method should be called to aggregate a get search
-    fn get_search(&self, aggregate: SearchAggregator);
-
-    /// This method should be called to aggregate a post search
-    fn post_search(&self, aggregate: SearchAggregator);
-
-    /// This method should be called to aggregate a get similar request
-    fn get_similar(&self, aggregate: SimilarAggregator);
-
-    /// This method should be called to aggregate a post similar request
-    fn post_similar(&self, aggregate: SimilarAggregator);
-
-    /// This method should be called to aggregate a post array of searches
-    fn post_multi_search(&self, aggregate: MultiSearchAggregator);
-
-    /// This method should be called to aggregate post facet values searches
-    fn post_facet_search(&self, aggregate: FacetSearchAggregator);
-
-    // this method should be called to aggregate an add documents request
-    fn add_documents(
-        &self,
-        documents_query: &UpdateDocumentsQuery,
-        index_creation: bool,
-        request: &HttpRequest,
-    );
-
-    // this method should be called to aggregate a fetch documents request
-    fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
-
-    // this method should be called to aggregate a fetch documents request
-    fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
-
-    // this method should be called to aggregate a add documents request
-    fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
-
-    // this method should be called to batch an update documents request
-    fn update_documents(
-        &self,
-        documents_query: &UpdateDocumentsQuery,
-        index_creation: bool,
-        request: &HttpRequest,
-    );
-
-    // this method should be called to batch an update documents by function request
-    fn update_documents_by_function(
-        &self,
-        documents_query: &DocumentEditionByFunction,
-        index_creation: bool,
-        request: &HttpRequest,
-    );
+    pub fn publish<T: Aggregate>(&self, event: T, request: &HttpRequest) {
+        if let Some(ref segment) = self.segment {
+            let _ = segment.sender.try_send(segment_analytics::Message::new(event, request));
+        }
+    }
 }
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -120,7 +120,7 @@ pub fn create_app(
    search_queue: Data<SearchQueue>,
    opt: Opt,
    logs: (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<dyn Analytics>,
+    analytics: Data<Analytics>,
    enable_dashboard: bool,
 ) -> actix_web::App<
    impl ServiceFactory<
@ -473,14 +473,14 @@ pub fn configure_data(
    search_queue: Data<SearchQueue>,
    opt: &Opt,
    (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<dyn Analytics>,
+    analytics: Data<Analytics>,
 ) {
    let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
    config
        .app_data(index_scheduler)
        .app_data(auth)
        .app_data(search_queue)
-        .app_data(web::Data::from(analytics))
+        .app_data(analytics)
        .app_data(web::Data::new(logs_route))
        .app_data(web::Data::new(logs_stderr))
        .app_data(web::Data::new(opt.clone()))
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@ -5,6 +5,7 @@ use std::path::PathBuf;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::thread::available_parallelism;
+use std::time::Duration;

 use actix_web::http::KeepAlive;
 use actix_web::web::Data;
@ -123,19 +124,12 @@ async fn try_main() -> anyhow::Result<()> {

    let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;

-    #[cfg(all(not(debug_assertions), feature = "analytics"))]
-    let analytics = if !opt.no_analytics {
-        analytics::SegmentAnalytics::new(&opt, index_scheduler.clone(), auth_controller.clone())
-            .await
-    } else {
-        analytics::MockAnalytics::new(&opt)
-    };
-    #[cfg(any(debug_assertions, not(feature = "analytics")))]
-    let analytics = analytics::MockAnalytics::new(&opt);
+    let analytics =
+        analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;

    print_launch_resume(&opt, analytics.clone(), config_read_from);

-    run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
+    run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;

    Ok(())
 }
@ -145,16 +139,23 @@ async fn run_http(
    auth_controller: Arc<AuthController>,
    opt: Opt,
    logs: (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<dyn Analytics>,
+    analytics: Arc<Analytics>,
 ) -> anyhow::Result<()> {
    let enable_dashboard = &opt.env == "development";
    let opt_clone = opt.clone();
    let index_scheduler = Data::from(index_scheduler);
    let auth_controller = Data::from(auth_controller);
+    let analytics = Data::from(analytics);
    let search_queue = SearchQueue::new(
        opt.experimental_search_queue_size,
-        available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
-    );
+        available_parallelism()
+            .unwrap_or(NonZeroUsize::new(2).unwrap())
+            .checked_mul(opt.experimental_nb_searches_per_core)
+            .unwrap_or(NonZeroUsize::MAX),
+    )
+    .with_time_to_abort(Duration::from_secs(
+        usize::from(opt.experimental_drop_search_after) as u64
+    ));
    let search_queue = Data::new(search_queue);

    let http_server = HttpServer::new(move || {
@ -180,11 +181,7 @@ async fn run_http(
    Ok(())
 }

-pub fn print_launch_resume(
-    opt: &Opt,
-    analytics: Arc<dyn Analytics>,
-    config_read_from: Option<PathBuf>,
-) {
+pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Option<PathBuf>) {
    let build_info = build_info::BuildInfo::from_build();

    let protocol =
@ -226,7 +223,6 @@ pub fn print_launch_resume(
        eprintln!("Prototype:\t\t{:?}", prototype);
    }

-    #[cfg(all(not(debug_assertions), feature = "analytics"))]
    {
        if !opt.no_analytics {
            eprintln!(
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@ -2,7 +2,7 @@ use std::env::VarError;
 use std::ffi::OsStr;
 use std::fmt::Display;
 use std::io::{BufReader, Read};
-use std::num::ParseIntError;
+use std::num::{NonZeroUsize, ParseIntError};
 use std::ops::Deref;
 use std::path::PathBuf;
 use std::str::FromStr;
@ -29,7 +29,6 @@ const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
 const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
 const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
-#[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
 const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
@ -55,6 +54,8 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO
 const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
 const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
 const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
+const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
+const MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE: &str = "MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE";
 const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
    "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
 const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
@ -208,7 +209,6 @@ pub struct Opt {
    /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
    /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
    /// at any time.
-    #[cfg(feature = "analytics")]
    #[serde(default)] // we can't send true
    #[clap(long, env = MEILI_NO_ANALYTICS)]
    pub no_analytics: bool,
@ -357,10 +357,26 @@ pub struct Opt {
    /// Lets you customize the size of the search queue. Meilisearch processes your search requests as fast as possible but once the
    /// queue is full it starts returning HTTP 503, Service Unavailable.
    /// The default value is 1000.
-    #[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = 1000)]
-    #[serde(default)]
+    #[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = default_experimental_search_queue_size())]
+    #[serde(default = "default_experimental_search_queue_size")]
    pub experimental_search_queue_size: usize,

+    /// Experimental drop search after. For more information, see: <https://github.com/orgs/meilisearch/discussions/783>
+    ///
+    /// Let you customize after how many seconds Meilisearch should consider a search request irrelevant and drop it.
+    /// The default value is 60.
+    #[clap(long, env = MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, default_value_t = default_drop_search_after())]
+    #[serde(default = "default_drop_search_after")]
+    pub experimental_drop_search_after: NonZeroUsize,
+
+    /// Experimental number of searches per core. For more information, see: <https://github.com/orgs/meilisearch/discussions/784>
+    ///
+    /// Lets you customize how many search requests can run on each core concurrently.
+    /// The default value is 4.
+    #[clap(long, env = MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, default_value_t = default_nb_searches_per_core())]
+    #[serde(default = "default_nb_searches_per_core")]
+    pub experimental_nb_searches_per_core: NonZeroUsize,
+
    /// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>
    ///
    /// Change the mode of the logs on the console.
@ -407,7 +423,6 @@ pub struct Opt {

 impl Opt {
    /// Whether analytics should be enabled or not.
-    #[cfg(all(not(debug_assertions), feature = "analytics"))]
    pub fn analytics(&self) -> bool {
        !self.no_analytics
    }
@ -487,11 +502,12 @@ impl Opt {
            ignore_missing_dump: _,
            ignore_dump_if_db_exists: _,
            config_file_path: _,
-            #[cfg(feature = "analytics")]
            no_analytics,
            experimental_contains_filter,
            experimental_enable_metrics,
            experimental_search_queue_size,
+            experimental_drop_search_after,
+            experimental_nb_searches_per_core,
            experimental_logs_mode,
            experimental_enable_logs_route,
            experimental_replication_parameters,
@ -513,10 +529,7 @@ impl Opt {
            );
        }

-        #[cfg(feature = "analytics")]
-        {
-            export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
-        }
+        export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
        export_to_env_if_not_present(
            MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
            http_payload_size_limit.to_string(),
@ -559,6 +572,14 @@ impl Opt {
            MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE,
            experimental_search_queue_size.to_string(),
        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER,
+            experimental_drop_search_after.to_string(),
+        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE,
+            experimental_nb_searches_per_core.to_string(),
+        );
        export_to_env_if_not_present(
            MEILI_EXPERIMENTAL_LOGS_MODE,
            experimental_logs_mode.to_string(),
@ -890,6 +911,18 @@ fn default_dump_dir() -> PathBuf {
    PathBuf::from(DEFAULT_DUMP_DIR)
 }

+fn default_experimental_search_queue_size() -> usize {
+    1000
+}
+
+fn default_drop_search_after() -> NonZeroUsize {
+    NonZeroUsize::new(60).unwrap()
+}
+
+fn default_nb_searches_per_core() -> NonZeroUsize {
+    NonZeroUsize::new(4).unwrap()
+}
+
 /// Indicates if a snapshot was scheduled, and if yes with which interval.
 #[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
 pub enum ScheduleSnapshot {
--- a/meilisearch/src/routes/dump.rs
+++ b/meilisearch/src/routes/dump.rs
@ -4,7 +4,6 @@ use index_scheduler::IndexScheduler;
 use meilisearch_auth::AuthController;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::tasks::KindWithContent;
-use serde_json::json;
 use tracing::debug;

 use crate::analytics::Analytics;
@ -18,14 +17,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
 }

+crate::empty_analytics!(DumpAnalytics, "Dump Created");
+
 pub async fn create_dump(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
    auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
+    analytics.publish(DumpAnalytics::default(), &req);

    let task = KindWithContent::DumpCreation {
        keys: auth_controller.list_keys()?,
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@ -6,10 +6,10 @@ use index_scheduler::IndexScheduler;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::keys::actions;
-use serde_json::json;
+use serde::Serialize;
 use tracing::debug;

-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, Analytics};
 use crate::extractors::authentication::policies::ActionPolicy;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
@ -17,7 +17,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(
        web::resource("")
-            .route(web::get().to(SeqHandler(get_features)))
+            .route(web::get().to(get_features))
            .route(web::patch().to(SeqHandler(patch_features))),
    );
 }
@ -27,12 +27,9 @@ async fn get_features(
        ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
        Data<IndexScheduler>,
    >,
-    req: HttpRequest,
-    analytics: Data<dyn Analytics>,
 ) -> HttpResponse {
    let features = index_scheduler.features();

-    analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
    let features = features.runtime_features();
    debug!(returns = ?features, "Get features");
    HttpResponse::Ok().json(features)
@ -53,6 +50,35 @@ pub struct RuntimeTogglableFeatures {
    pub contains_filter: Option<bool>,
 }

+#[derive(Serialize)]
+pub struct PatchExperimentalFeatureAnalytics {
+    vector_store: bool,
+    metrics: bool,
+    logs_route: bool,
+    edit_documents_by_function: bool,
+    contains_filter: bool,
+}
+
+impl Aggregate for PatchExperimentalFeatureAnalytics {
+    fn event_name(&self) -> &'static str {
+        "Experimental features Updated"
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            vector_store: new.vector_store,
+            metrics: new.metrics,
+            logs_route: new.logs_route,
+            edit_documents_by_function: new.edit_documents_by_function,
+            contains_filter: new.contains_filter,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
 async fn patch_features(
    index_scheduler: GuardedData<
        ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
@ -60,7 +86,7 @@ async fn patch_features(
    >,
    new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
    req: HttpRequest,
-    analytics: Data<dyn Analytics>,
+    analytics: Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let features = index_scheduler.features();
    debug!(parameters = ?new_features, "Patch features");
@ -89,15 +115,14 @@ async fn patch_features(
    } = new_features;

    analytics.publish(
-        "Experimental features Updated".to_string(),
-        json!({
-            "vector_store": vector_store,
-            "metrics": metrics,
-            "logs_route": logs_route,
-            "edit_documents_by_function": edit_documents_by_function,
-            "contains_filter": contains_filter,
-        }),
-        Some(&req),
+        PatchExperimentalFeatureAnalytics {
+            vector_store,
+            metrics,
+            logs_route,
+            edit_documents_by_function,
+            contains_filter,
+        },
+        &req,
    );
    index_scheduler.put_runtime_features(new_features)?;
    debug!(returns = ?new_features, "Patch features");
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -1,4 +1,6 @@
+use std::collections::HashSet;
 use std::io::ErrorKind;
+use std::marker::PhantomData;

 use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
@ -23,14 +25,14 @@ use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::{milli, Document, Index};
 use mime::Mime;
 use once_cell::sync::Lazy;
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use tempfile::tempfile;
 use tokio::fs::File;
 use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
 use tracing::debug;

-use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
+use crate::analytics::{Aggregate, AggregateMethod, Analytics};
 use crate::error::MeilisearchHttpError;
 use crate::error::PayloadError::ReceivePayload;
 use crate::extractors::authentication::policies::*;
@ -41,7 +43,7 @@ use crate::routes::{
    get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
 };
 use crate::search::{parse_filter, RetrieveVectors};
-use crate::Opt;
+use crate::{aggregate_methods, Opt};

 static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
    vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@ -100,12 +102,84 @@ pub struct GetDocument {
    retrieve_vectors: Param<bool>,
 }

+aggregate_methods!(
+    DocumentsGET => "Documents Fetched GET",
+    DocumentsPOST => "Documents Fetched POST",
+);
+
+#[derive(Serialize)]
+pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
+    // a call on ../documents/:doc_id
+    per_document_id: bool,
+    // if a filter was used
+    per_filter: bool,
+
+    #[serde(rename = "vector.retrieve_vectors")]
+    retrieve_vectors: bool,
+
+    // pagination
+    #[serde(rename = "pagination.max_limit")]
+    max_limit: usize,
+    #[serde(rename = "pagination.max_offset")]
+    max_offset: usize,
+
+    marker: std::marker::PhantomData<Method>,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum DocumentFetchKind {
+    PerDocumentId { retrieve_vectors: bool },
+    Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
+}
+
+impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
+    pub fn from_query(query: &DocumentFetchKind) -> Self {
+        let (limit, offset, retrieve_vectors) = match query {
+            DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
+            DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
+                (*limit, *offset, *retrieve_vectors)
+            }
+        };
+
+        Self {
+            per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
+            per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
+            max_limit: limit,
+            max_offset: offset,
+            retrieve_vectors,
+
+            marker: PhantomData,
+        }
+    }
+}
+
+impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            per_document_id: self.per_document_id | new.per_document_id,
+            per_filter: self.per_filter | new.per_filter,
+            retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
+            max_limit: self.max_limit.max(new.max_limit),
+            max_offset: self.max_offset.max(new.max_offset),
+            marker: PhantomData,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
 pub async fn get_document(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
    document_param: web::Path<DocumentParam>,
    params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let DocumentParam { index_uid, document_id } = document_param.into_inner();
    debug!(parameters = ?params, "Get document");
@ -117,8 +191,15 @@ pub async fn get_document(
    let features = index_scheduler.features();
    let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;

-    analytics.get_fetch_documents(
-        &DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
+    analytics.publish(
+        DocumentsFetchAggregator::<DocumentsGET> {
+            retrieve_vectors: param_retrieve_vectors.0,
+            per_document_id: true,
+            per_filter: false,
+            max_limit: 0,
+            max_offset: 0,
+            marker: PhantomData,
+        },
        &req,
    );

@ -129,17 +210,52 @@ pub async fn get_document(
    Ok(HttpResponse::Ok().json(document))
 }

+#[derive(Serialize)]
+pub struct DocumentsDeletionAggregator {
+    per_document_id: bool,
+    clear_all: bool,
+    per_batch: bool,
+    per_filter: bool,
+}
+
+impl Aggregate for DocumentsDeletionAggregator {
+    fn event_name(&self) -> &'static str {
+        "Documents Deleted"
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            per_document_id: self.per_document_id | new.per_document_id,
+            clear_all: self.clear_all | new.clear_all,
+            per_batch: self.per_batch | new.per_batch,
+            per_filter: self.per_filter | new.per_filter,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
 pub async fn delete_document(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    path: web::Path<DocumentParam>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let DocumentParam { index_uid, document_id } = path.into_inner();
    let index_uid = IndexUid::try_from(index_uid)?;

-    analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator {
+            per_document_id: true,
+            clear_all: false,
+            per_batch: false,
+            per_filter: false,
+        },
+        &req,
+    );

    let task = KindWithContent::DocumentDeletion {
        index_uid: index_uid.to_string(),
@ -190,17 +306,19 @@ pub async fn documents_by_query_post(
    index_uid: web::Path<String>,
    body: AwebJson<BrowseQuery, DeserrJsonError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let body = body.into_inner();
    debug!(parameters = ?body, "Get documents POST");

-    analytics.post_fetch_documents(
-        &DocumentFetchKind::Normal {
-            with_filter: body.filter.is_some(),
-            limit: body.limit,
-            offset: body.offset,
+    analytics.publish(
+        DocumentsFetchAggregator::<DocumentsPOST> {
+            per_filter: body.filter.is_some(),
            retrieve_vectors: body.retrieve_vectors,
+            max_limit: body.limit,
+            max_offset: body.offset,
+            per_document_id: false,
+            marker: PhantomData,
        },
        &req,
    );
@ -213,7 +331,7 @@ pub async fn get_documents(
    index_uid: web::Path<String>,
    params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?params, "Get documents GET");

@ -235,12 +353,14 @@ pub async fn get_documents(
        filter,
    };

-    analytics.get_fetch_documents(
-        &DocumentFetchKind::Normal {
-            with_filter: query.filter.is_some(),
-            limit: query.limit,
-            offset: query.offset,
+    analytics.publish(
+        DocumentsFetchAggregator::<DocumentsGET> {
+            per_filter: query.filter.is_some(),
            retrieve_vectors: query.retrieve_vectors,
+            max_limit: query.limit,
+            max_offset: query.offset,
+            per_document_id: false,
+            marker: PhantomData,
        },
        &req,
    );
@ -298,6 +418,39 @@ fn from_char_csv_delimiter(
    }
 }

+aggregate_methods!(
+    Replaced => "Documents Added",
+    Updated => "Documents Updated",
+);
+
+#[derive(Serialize)]
+pub struct DocumentsAggregator<T: AggregateMethod> {
+    payload_types: HashSet<String>,
+    primary_key: HashSet<String>,
+    index_creation: bool,
+    #[serde(skip)]
+    method: PhantomData<T>,
+}
+
+impl<Method: AggregateMethod> Aggregate for DocumentsAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            payload_types: self.payload_types.union(&new.payload_types).cloned().collect(),
+            primary_key: self.primary_key.union(&new.primary_key).cloned().collect(),
+            index_creation: self.index_creation | new.index_creation,
+            method: PhantomData,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(self).unwrap_or_default()
+    }
+}
+
 pub async fn replace_documents(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
@ -305,16 +458,32 @@ pub async fn replace_documents(
    body: Payload,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    debug!(parameters = ?params, "Replace documents");
    let params = params.into_inner();

-    analytics.add_documents(
-        &params,
-        index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
+    let mut content_types = HashSet::new();
+    let content_type = req
+        .headers()
+        .get(CONTENT_TYPE)
+        .and_then(|s| s.to_str().ok())
+        .unwrap_or("unknown")
+        .to_string();
+    content_types.insert(content_type);
+    let mut primary_keys = HashSet::new();
+    if let Some(primary_key) = params.primary_key.clone() {
+        primary_keys.insert(primary_key);
+    }
+    analytics.publish(
+        DocumentsAggregator::<Replaced> {
+            payload_types: content_types,
+            primary_key: primary_keys,
+            index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
+            method: PhantomData,
+        },
        &req,
    );

@ -346,16 +515,32 @@ pub async fn update_documents(
    body: Payload,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    let params = params.into_inner();
    debug!(parameters = ?params, "Update documents");

-    analytics.add_documents(
-        &params,
-        index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
+    let mut content_types = HashSet::new();
+    let content_type = req
+        .headers()
+        .get(CONTENT_TYPE)
+        .and_then(|s| s.to_str().ok())
+        .unwrap_or("unknown")
+        .to_string();
+    content_types.insert(content_type);
+    let mut primary_keys = HashSet::new();
+    if let Some(primary_key) = params.primary_key.clone() {
+        primary_keys.insert(primary_key);
+    }
+    analytics.publish(
+        DocumentsAggregator::<Updated> {
+            payload_types: content_types,
+            primary_key: primary_keys,
+            index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
+            method: PhantomData,
+        },
        &req,
    );

@ -524,12 +709,20 @@ pub async fn delete_documents_batch(
    body: web::Json<Vec<Value>>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Delete documents by batch");
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

-    analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator {
+            per_batch: true,
+            per_document_id: false,
+            clear_all: false,
+            per_filter: false,
+        },
+        &req,
+    );

    let ids = body
        .iter()
@ -562,14 +755,22 @@ pub async fn delete_documents_by_filter(
    body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Delete documents by filter");
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
    let index_uid = index_uid.into_inner();
    let filter = body.into_inner().filter;

-    analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator {
+            per_filter: true,
+            per_document_id: false,
+            clear_all: false,
+            per_batch: false,
+        },
+        &req,
+    );

    // we ensure the filter is well formed before enqueuing it
    crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
@ -599,13 +800,41 @@ pub struct DocumentEditionByFunction {
    pub function: String,
 }

+#[derive(Serialize)]
+struct EditDocumentsByFunctionAggregator {
+    // Set to true if at least one request was filtered
+    filtered: bool,
+    // Set to true if at least one request contained a context
+    with_context: bool,
+
+    index_creation: bool,
+}
+
+impl Aggregate for EditDocumentsByFunctionAggregator {
+    fn event_name(&self) -> &'static str {
+        "Documents Edited By Function"
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            filtered: self.filtered | new.filtered,
+            with_context: self.with_context | new.with_context,
+            index_creation: self.index_creation | new.index_creation,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
 pub async fn edit_documents_by_function(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
    params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?params, "Edit documents by function");

@ -617,9 +846,12 @@ pub async fn edit_documents_by_function(
    let index_uid = index_uid.into_inner();
    let params = params.into_inner();

-    analytics.update_documents_by_function(
-        &params,
-        index_scheduler.index(&index_uid).is_err(),
+    analytics.publish(
+        EditDocumentsByFunctionAggregator {
+            filtered: params.filter.is_some(),
+            with_context: params.context.is_some(),
+            index_creation: index_scheduler.index(&index_uid).is_err(),
+        },
        &req,
    );

@ -670,10 +902,18 @@ pub async fn clear_all_documents(
    index_uid: web::Path<String>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
-    analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator {
+            clear_all: true,
+            per_document_id: false,
+            per_batch: false,
+            per_filter: false,
+        },
+        &req,
+    );

    let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
    let uid = get_task_id(&req, &opt)?;
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -1,3 +1,5 @@
+use std::collections::{BinaryHeap, HashSet};
+
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::AwebJson;
@ -10,14 +12,15 @@ use meilisearch_types::locales::Locale;
 use serde_json::Value;
 use tracing::debug;

-use crate::analytics::{Analytics, FacetSearchAggregator};
+use crate::analytics::{Aggregate, Analytics};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::routes::indexes::search::search_kind;
 use crate::search::{
-    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
-    SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+    add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy,
+    RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_SEARCH_OFFSET,
 };
 use crate::search_queue::SearchQueue;

@ -53,20 +56,122 @@ pub struct FacetSearchQuery {
    pub locales: Option<Vec<Locale>>,
 }

+#[derive(Default)]
+pub struct FacetSearchAggregator {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // The set of all facetNames that were used
+    facet_names: HashSet<String>,
+
+    // As there been any other parameter than the facetName or facetQuery ones?
+    additional_search_parameters_provided: bool,
+}
+
+impl FacetSearchAggregator {
+    #[allow(clippy::field_reassign_with_default)]
+    pub fn from_query(query: &FacetSearchQuery) -> Self {
+        let FacetSearchQuery {
+            facet_query: _,
+            facet_name,
+            vector,
+            q,
+            filter,
+            matching_strategy,
+            attributes_to_search_on,
+            hybrid,
+            ranking_score_threshold,
+            locales,
+        } = query;
+
+        Self {
+            total_received: 1,
+            facet_names: Some(facet_name.clone()).into_iter().collect(),
+            additional_search_parameters_provided: q.is_some()
+                || vector.is_some()
+                || filter.is_some()
+                || *matching_strategy != MatchingStrategy::default()
+                || attributes_to_search_on.is_some()
+                || hybrid.is_some()
+                || ranking_score_threshold.is_some()
+                || locales.is_some(),
+            ..Default::default()
+        }
+    }
+
+    pub fn succeed(&mut self, result: &FacetSearchResult) {
+        let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result;
+        self.total_succeeded = 1;
+        self.time_spent.push(*processing_time_ms as usize);
+    }
+}
+
+impl Aggregate for FacetSearchAggregator {
+    fn event_name(&self) -> &'static str {
+        "Facet Searched POST"
+    }
+
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        for time in new.time_spent {
+            self.time_spent.push(time);
+        }
+
+        Box::new(Self {
+            total_received: self.total_received.saturating_add(new.total_received),
+            total_succeeded: self.total_succeeded.saturating_add(new.total_succeeded),
+            time_spent: self.time_spent,
+            facet_names: self.facet_names.union(&new.facet_names).cloned().collect(),
+            additional_search_parameters_provided: self.additional_search_parameters_provided
+                | new.additional_search_parameters_provided,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            time_spent,
+            facet_names,
+            additional_search_parameters_provided,
+        } = *self;
+        // the index of the 99th percentage of value
+        let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.;
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th as usize);
+
+        serde_json::json!({
+            "requests": {
+                "99th_response_time":  time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "facets": {
+                "total_distinct_facet_count": facet_names.len(),
+                "additional_search_parameters_provided": additional_search_parameters_provided,
+            },
+        })
+    }
+}
+
 pub async fn search(
    index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
    search_queue: Data<SearchQueue>,
    index_uid: web::Path<String>,
    params: AwebJson<FacetSearchQuery, DeserrJsonError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    let query = params.into_inner();
    debug!(parameters = ?query, "Facet search");

-    let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
+    let mut aggregate = FacetSearchAggregator::from_query(&query);

    let facet_query = query.facet_query.clone();
    let facet_name = query.facet_name.clone();
@ -100,7 +205,7 @@ pub async fn search(
    if let Ok(ref search_result) = search_result {
        aggregate.succeed(search_result);
    }
-    analytics.post_facet_search(aggregate);
+    analytics.publish(aggregate, &req);

    let search_result = search_result?;

--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@ -1,3 +1,4 @@
+use std::collections::BTreeSet;
 use std::convert::Infallible;

 use actix_web::web::Data;
@ -13,12 +14,11 @@ use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::{self, FieldDistribution, Index};
 use meilisearch_types::tasks::KindWithContent;
 use serde::Serialize;
-use serde_json::json;
 use time::OffsetDateTime;
 use tracing::debug;

 use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, Analytics};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;
@ -28,8 +28,11 @@ use crate::Opt;
 pub mod documents;
 pub mod facet_search;
 pub mod search;
+mod search_analytics;
 pub mod settings;
+mod settings_analytics;
 pub mod similar;
+mod similar_analytics;

 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(
@ -123,12 +126,31 @@ pub struct IndexCreateRequest {
    primary_key: Option<String>,
 }

+#[derive(Serialize)]
+struct IndexCreatedAggregate {
+    primary_key: BTreeSet<String>,
+}
+
+impl Aggregate for IndexCreatedAggregate {
+    fn event_name(&self) -> &'static str {
+        "Index Created"
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
 pub async fn create_index(
    index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
    body: AwebJson<IndexCreateRequest, DeserrJsonError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Create index");
    let IndexCreateRequest { primary_key, uid } = body.into_inner();
@ -136,9 +158,8 @@ pub async fn create_index(
    let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
    if allow_index_creation {
        analytics.publish(
-            "Index Created".to_string(),
-            json!({ "primary_key": primary_key }),
-            Some(&req),
+            IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() },
+            &req,
        );

        let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
@ -194,21 +215,38 @@ pub async fn get_index(
    Ok(HttpResponse::Ok().json(index_view))
 }

+#[derive(Serialize)]
+struct IndexUpdatedAggregate {
+    primary_key: BTreeSet<String>,
+}
+
+impl Aggregate for IndexUpdatedAggregate {
+    fn event_name(&self) -> &'static str {
+        "Index Updated"
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
 pub async fn update_index(
    index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
    body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Update index");
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
    let body = body.into_inner();
    analytics.publish(
-        "Index Updated".to_string(),
-        json!({ "primary_key": body.primary_key }),
-        Some(&req),
+        IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() },
+        &req,
    );

    let task = KindWithContent::IndexUpdate {
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -13,12 +13,13 @@ use meilisearch_types::serde_cs::vec::CS;
 use serde_json::Value;
 use tracing::debug;

-use crate::analytics::{Analytics, SearchAggregator};
+use crate::analytics::Analytics;
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
+use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
 use crate::search::{
    add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
    RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
@ -225,7 +226,7 @@ pub async fn search_with_url_query(
    index_uid: web::Path<String>,
    params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?params, "Search get");
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@ -237,7 +238,7 @@ pub async fn search_with_url_query(
        add_search_rules(&mut query.filter, search_rules);
    }

-    let mut aggregate = SearchAggregator::from_query(&query, &req);
+    let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query);

    let index = index_scheduler.index(&index_uid)?;
    let features = index_scheduler.features();
@ -254,7 +255,7 @@ pub async fn search_with_url_query(
    if let Ok(ref search_result) = search_result {
        aggregate.succeed(search_result);
    }
-    analytics.get_search(aggregate);
+    analytics.publish(aggregate, &req);

    let search_result = search_result?;

@ -268,7 +269,7 @@ pub async fn search_with_post(
    index_uid: web::Path<String>,
    params: AwebJson<SearchQuery, DeserrJsonError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

@ -280,7 +281,7 @@ pub async fn search_with_post(
        add_search_rules(&mut query.filter, search_rules);
    }

-    let mut aggregate = SearchAggregator::from_query(&query, &req);
+    let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);

    let index = index_scheduler.index(&index_uid)?;

@ -302,7 +303,7 @@ pub async fn search_with_post(
            MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
        }
    }
-    analytics.post_search(aggregate);
+    analytics.publish(aggregate, &req);

    let search_result = search_result?;

--- a/meilisearch/src/routes/indexes/search_analytics.rs
+++ b/meilisearch/src/routes/indexes/search_analytics.rs
@ -0,0 +1,485 @@
+use once_cell::sync::Lazy;
+use regex::Regex;
+use serde_json::{json, Value};
+use std::collections::{BTreeSet, BinaryHeap, HashMap};
+
+use meilisearch_types::locales::Locale;
+
+use crate::{
+    aggregate_methods,
+    analytics::{Aggregate, AggregateMethod},
+    search::{
+        SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+        DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+        DEFAULT_SEMANTIC_RATIO,
+    },
+};
+
+aggregate_methods!(
+    SearchGET => "Documents Searched GET",
+    SearchPOST => "Documents Searched POST",
+);
+
+#[derive(Default)]
+pub struct SearchAggregator<Method: AggregateMethod> {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    total_degraded: usize,
+    total_used_negative_operator: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // sort
+    sort_with_geo_point: bool,
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
+    sort_sum_of_criteria_terms: usize,
+    // every time a request has a filter, this field must be incremented by one
+    sort_total_number_of_criteria: usize,
+
+    // distinct
+    distinct: bool,
+
+    // filter
+    filter_with_geo_radius: bool,
+    filter_with_geo_bounding_box: bool,
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
+    filter_sum_of_criteria_terms: usize,
+    // every time a request has a filter, this field must be incremented by one
+    filter_total_number_of_criteria: usize,
+    used_syntax: HashMap<String, usize>,
+
+    // attributes_to_search_on
+    // every time a search is done using attributes_to_search_on
+    attributes_to_search_on_total_number_of_uses: usize,
+
+    // q
+    // The maximum number of terms in a q request
+    max_terms_number: usize,
+
+    // vector
+    // The maximum number of floats in a vector request
+    max_vector_size: usize,
+    // Whether the semantic ratio passed to a hybrid search equals the default ratio.
+    semantic_ratio: bool,
+    hybrid: bool,
+    retrieve_vectors: bool,
+
+    // every time a search is done, we increment the counter linked to the used settings
+    matching_strategy: HashMap<String, usize>,
+
+    // List of the unique Locales passed as parameter
+    locales: BTreeSet<Locale>,
+
+    // pagination
+    max_limit: usize,
+    max_offset: usize,
+    finite_pagination: usize,
+
+    // formatting
+    max_attributes_to_retrieve: usize,
+    max_attributes_to_highlight: usize,
+    highlight_pre_tag: bool,
+    highlight_post_tag: bool,
+    max_attributes_to_crop: usize,
+    crop_marker: bool,
+    show_matches_position: bool,
+    crop_length: bool,
+
+    // facets
+    facets_sum_of_terms: usize,
+    facets_total_number_of_facets: usize,
+
+    // scoring
+    show_ranking_score: bool,
+    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
+
+    marker: std::marker::PhantomData<Method>,
+}
+
+impl<Method: AggregateMethod> SearchAggregator<Method> {
+    #[allow(clippy::field_reassign_with_default)]
+    pub fn from_query(query: &SearchQuery) -> Self {
+        let SearchQuery {
+            q,
+            vector,
+            offset,
+            limit,
+            page,
+            hits_per_page,
+            attributes_to_retrieve: _,
+            retrieve_vectors,
+            attributes_to_crop: _,
+            crop_length,
+            attributes_to_highlight: _,
+            show_matches_position,
+            show_ranking_score,
+            show_ranking_score_details,
+            filter,
+            sort,
+            distinct,
+            facets: _,
+            highlight_pre_tag,
+            highlight_post_tag,
+            crop_marker,
+            matching_strategy,
+            attributes_to_search_on,
+            hybrid,
+            ranking_score_threshold,
+            locales,
+        } = query;
+
+        let mut ret = Self::default();
+
+        ret.total_received = 1;
+
+        if let Some(ref sort) = sort {
+            ret.sort_total_number_of_criteria = 1;
+            ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
+            ret.sort_sum_of_criteria_terms = sort.len();
+        }
+
+        ret.distinct = distinct.is_some();
+
+        if let Some(ref filter) = filter {
+            static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
+            ret.filter_total_number_of_criteria = 1;
+
+            let syntax = match filter {
+                Value::String(_) => "string".to_string(),
+                Value::Array(values) => {
+                    if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
+                        "mixed".to_string()
+                    } else {
+                        "array".to_string()
+                    }
+                }
+                _ => "none".to_string(),
+            };
+            // convert the string to a HashMap
+            ret.used_syntax.insert(syntax, 1);
+
+            let stringified_filters = filter.to_string();
+            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
+            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
+            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
+        }
+
+        // attributes_to_search_on
+        if attributes_to_search_on.is_some() {
+            ret.attributes_to_search_on_total_number_of_uses = 1;
+        }
+
+        if let Some(ref q) = q {
+            ret.max_terms_number = q.split_whitespace().count();
+        }
+
+        if let Some(ref vector) = vector {
+            ret.max_vector_size = vector.len();
+        }
+        ret.retrieve_vectors |= retrieve_vectors;
+
+        if query.is_finite_pagination() {
+            let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
+            ret.max_limit = limit;
+            ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
+            ret.finite_pagination = 1;
+        } else {
+            ret.max_limit = *limit;
+            ret.max_offset = *offset;
+            ret.finite_pagination = 0;
+        }
+
+        ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
+
+        if let Some(locales) = locales {
+            ret.locales = locales.iter().copied().collect();
+        }
+
+        ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
+        ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
+        ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
+        ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
+        ret.show_matches_position = *show_matches_position;
+
+        ret.show_ranking_score = *show_ranking_score;
+        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();
+
+        if let Some(hybrid) = hybrid {
+            ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
+            ret.hybrid = true;
+        }
+
+        ret
+    }
+
+    pub fn succeed(&mut self, result: &SearchResult) {
+        let SearchResult {
+            hits: _,
+            query: _,
+            processing_time_ms,
+            hits_info: _,
+            semantic_hit_count: _,
+            facet_distribution: _,
+            facet_stats: _,
+            degraded,
+            used_negative_operator,
+        } = result;
+
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+        if *degraded {
+            self.total_degraded = self.total_degraded.saturating_add(1);
+        }
+        if *used_negative_operator {
+            self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
+        }
+        self.time_spent.push(*processing_time_ms as usize);
+    }
+}
+
+impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        let Self {
+            total_received,
+            total_succeeded,
+            mut time_spent,
+            sort_with_geo_point,
+            sort_sum_of_criteria_terms,
+            sort_total_number_of_criteria,
+            distinct,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            attributes_to_search_on_total_number_of_uses,
+            max_terms_number,
+            max_vector_size,
+            retrieve_vectors,
+            matching_strategy,
+            max_limit,
+            max_offset,
+            finite_pagination,
+            max_attributes_to_retrieve,
+            max_attributes_to_highlight,
+            highlight_pre_tag,
+            highlight_post_tag,
+            max_attributes_to_crop,
+            crop_marker,
+            show_matches_position,
+            crop_length,
+            facets_sum_of_terms,
+            facets_total_number_of_facets,
+            show_ranking_score,
+            show_ranking_score_details,
+            semantic_ratio,
+            hybrid,
+            total_degraded,
+            total_used_negative_operator,
+            ranking_score_threshold,
+            mut locales,
+            marker: _,
+        } = *new;
+
+        // request
+        self.total_received = self.total_received.saturating_add(total_received);
+        self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
+        self.total_degraded = self.total_degraded.saturating_add(total_degraded);
+        self.total_used_negative_operator =
+            self.total_used_negative_operator.saturating_add(total_used_negative_operator);
+        self.time_spent.append(&mut time_spent);
+
+        // sort
+        self.sort_with_geo_point |= sort_with_geo_point;
+        self.sort_sum_of_criteria_terms =
+            self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
+        self.sort_total_number_of_criteria =
+            self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
+
+        // distinct
+        self.distinct |= distinct;
+
+        // filter
+        self.filter_with_geo_radius |= filter_with_geo_radius;
+        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
+        self.filter_sum_of_criteria_terms =
+            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
+        self.filter_total_number_of_criteria =
+            self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
+        for (key, value) in used_syntax.into_iter() {
+            let used_syntax = self.used_syntax.entry(key).or_insert(0);
+            *used_syntax = used_syntax.saturating_add(value);
+        }
+
+        // attributes_to_search_on
+        self.attributes_to_search_on_total_number_of_uses = self
+            .attributes_to_search_on_total_number_of_uses
+            .saturating_add(attributes_to_search_on_total_number_of_uses);
+
+        // q
+        self.max_terms_number = self.max_terms_number.max(max_terms_number);
+
+        // vector
+        self.max_vector_size = self.max_vector_size.max(max_vector_size);
+        self.retrieve_vectors |= retrieve_vectors;
+        self.semantic_ratio |= semantic_ratio;
+        self.hybrid |= hybrid;
+
+        // pagination
+        self.max_limit = self.max_limit.max(max_limit);
+        self.max_offset = self.max_offset.max(max_offset);
+        self.finite_pagination += finite_pagination;
+
+        // formatting
+        self.max_attributes_to_retrieve =
+            self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
+        self.max_attributes_to_highlight =
+            self.max_attributes_to_highlight.max(max_attributes_to_highlight);
+        self.highlight_pre_tag |= highlight_pre_tag;
+        self.highlight_post_tag |= highlight_post_tag;
+        self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
+        self.crop_marker |= crop_marker;
+        self.show_matches_position |= show_matches_position;
+        self.crop_length |= crop_length;
+
+        // facets
+        self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
+        self.facets_total_number_of_facets =
+            self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
+
+        // matching strategy
+        for (key, value) in matching_strategy.into_iter() {
+            let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
+            *matching_strategy = matching_strategy.saturating_add(value);
+        }
+
+        // scoring
+        self.show_ranking_score |= show_ranking_score;
+        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
+
+        // locales
+        self.locales.append(&mut locales);
+
+        self
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            time_spent,
+            sort_with_geo_point,
+            sort_sum_of_criteria_terms,
+            sort_total_number_of_criteria,
+            distinct,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            attributes_to_search_on_total_number_of_uses,
+            max_terms_number,
+            max_vector_size,
+            retrieve_vectors,
+            matching_strategy,
+            max_limit,
+            max_offset,
+            finite_pagination,
+            max_attributes_to_retrieve,
+            max_attributes_to_highlight,
+            highlight_pre_tag,
+            highlight_post_tag,
+            max_attributes_to_crop,
+            crop_marker,
+            show_matches_position,
+            crop_length,
+            facets_sum_of_terms,
+            facets_total_number_of_facets,
+            show_ranking_score,
+            show_ranking_score_details,
+            semantic_ratio,
+            hybrid,
+            total_degraded,
+            total_used_negative_operator,
+            ranking_score_threshold,
+            locales,
+            marker: _,
+        } = *self;
+
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // the index of the 99th percentage of value
+        let percentile_99th = time_spent.len() * 99 / 100;
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th);
+
+        json!({
+            "requests": {
+                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+                "total_degraded": total_degraded,
+                "total_used_negative_operator": total_used_negative_operator,
+            },
+            "sort": {
+                "with_geoPoint": sort_with_geo_point,
+                "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
+            },
+            "distinct": distinct,
+            "filter": {
+               "with_geoRadius": filter_with_geo_radius,
+               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
+               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "attributes_to_search_on": {
+               "total_number_of_uses": attributes_to_search_on_total_number_of_uses,
+            },
+            "q": {
+               "max_terms_number": max_terms_number,
+            },
+            "vector": {
+                "max_vector_size": max_vector_size,
+                "retrieve_vectors": retrieve_vectors,
+            },
+            "hybrid": {
+                "enabled": hybrid,
+                "semantic_ratio": semantic_ratio,
+            },
+            "pagination": {
+               "max_limit": max_limit,
+               "max_offset": max_offset,
+               "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
+            },
+            "formatting": {
+                "max_attributes_to_retrieve": max_attributes_to_retrieve,
+                "max_attributes_to_highlight": max_attributes_to_highlight,
+                "highlight_pre_tag": highlight_pre_tag,
+                "highlight_post_tag": highlight_post_tag,
+                "max_attributes_to_crop": max_attributes_to_crop,
+                "crop_marker": crop_marker,
+                "show_matches_position": show_matches_position,
+                "crop_length": crop_length,
+            },
+            "facets": {
+                "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
+            },
+            "matching_strategy": {
+                "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "locales": locales,
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+                "ranking_score_threshold": ranking_score_threshold,
+            },
+        })
+    }
+}
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@ -1,15 +1,14 @@
+use super::settings_analytics::*;
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::AwebJson;
 use index_scheduler::IndexScheduler;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::ResponseError;
-use meilisearch_types::facet_values_sort::FacetValuesSort;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::update::Setting;
-use meilisearch_types::settings::{settings, RankingRuleView, SecretPolicy, Settings, Unchecked};
+use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked};
 use meilisearch_types::tasks::KindWithContent;
-use serde_json::json;
 use tracing::debug;

 use crate::analytics::Analytics;
@ -20,7 +19,7 @@ use crate::Opt;

 #[macro_export]
 macro_rules! make_setting_route {
-    ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
+    ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => {
        pub mod $attr {
            use actix_web::web::Data;
            use actix_web::{web, HttpRequest, HttpResponse, Resource};
@ -80,7 +79,7 @@ macro_rules! make_setting_route {
                body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
                req: HttpRequest,
                opt: web::Data<Opt>,
-                $analytics_var: web::Data<dyn Analytics>,
+                analytics: web::Data<Analytics>,
            ) -> std::result::Result<HttpResponse, ResponseError> {
                let index_uid = IndexUid::try_from(index_uid.into_inner())?;

@ -88,7 +87,10 @@ macro_rules! make_setting_route {
                debug!(parameters = ?body, "Update settings");

                #[allow(clippy::redundant_closure_call)]
-                $analytics(&body, &req);
+                analytics.publish(
+                    $crate::routes::indexes::settings_analytics::$analytics::new(body.as_ref()).into_settings(),
+                    &req,
+                );

                let new_settings = Settings {
                    $attr: match body {
@ -160,21 +162,7 @@ make_setting_route!(
    >,
    filterable_attributes,
    "filterableAttributes",
-    analytics,
-    |setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "FilterableAttributes Updated".to_string(),
-            json!({
-                "filterable_attributes": {
-                    "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
-                    "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
-                }
-            }),
-            Some(req),
-        );
-    }
+    FilterableAttributesAnalytics
 );

 make_setting_route!(
@ -186,21 +174,7 @@ make_setting_route!(
    >,
    sortable_attributes,
    "sortableAttributes",
-    analytics,
-    |setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "SortableAttributes Updated".to_string(),
-            json!({
-                "sortable_attributes": {
-                    "total": setting.as_ref().map(|sort| sort.len()),
-                    "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
-                },
-            }),
-            Some(req),
-        );
-    }
+    SortableAttributesAnalytics
 );

 make_setting_route!(
@ -212,21 +186,7 @@ make_setting_route!(
    >,
    displayed_attributes,
    "displayedAttributes",
-    analytics,
-    |displayed: &Option<Vec<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "DisplayedAttributes Updated".to_string(),
-            json!({
-                "displayed_attributes": {
-                    "total": displayed.as_ref().map(|displayed| displayed.len()),
-                    "with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
-                },
-            }),
-            Some(req),
-        );
-    }
+    DisplayedAttributesAnalytics
 );

 make_setting_route!(
@ -238,40 +198,7 @@ make_setting_route!(
    >,
    typo_tolerance,
    "typoTolerance",
-    analytics,
-    |setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "TypoTolerance Updated".to_string(),
-            json!({
-                "typo_tolerance": {
-                    "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
-                    "disable_on_attributes": setting
-                        .as_ref()
-                        .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
-                    "disable_on_words": setting
-                        .as_ref()
-                        .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
-                    "min_word_size_for_one_typo": setting
-                        .as_ref()
-                        .and_then(|s| s.min_word_size_for_typos
-                            .as_ref()
-                            .set()
-                            .map(|s| s.one_typo.set()))
-                        .flatten(),
-                    "min_word_size_for_two_typos": setting
-                        .as_ref()
-                        .and_then(|s| s.min_word_size_for_typos
-                            .as_ref()
-                            .set()
-                            .map(|s| s.two_typos.set()))
-                        .flatten(),
-                },
-            }),
-            Some(req),
-        );
-    }
+    TypoToleranceAnalytics
 );

 make_setting_route!(
@ -283,21 +210,7 @@ make_setting_route!(
    >,
    searchable_attributes,
    "searchableAttributes",
-    analytics,
-    |setting: &Option<Vec<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "SearchableAttributes Updated".to_string(),
-            json!({
-                "searchable_attributes": {
-                    "total": setting.as_ref().map(|searchable| searchable.len()),
-                    "with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
-                },
-            }),
-            Some(req),
-        );
-    }
+    SearchableAttributesAnalytics
 );

 make_setting_route!(
@ -309,20 +222,7 @@ make_setting_route!(
    >,
    stop_words,
    "stopWords",
-    analytics,
-    |stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "StopWords Updated".to_string(),
-            json!({
-                "stop_words": {
-                    "total": stop_words.as_ref().map(|stop_words| stop_words.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
+    StopWordsAnalytics
 );

 make_setting_route!(
@ -334,20 +234,7 @@ make_setting_route!(
    >,
    non_separator_tokens,
    "nonSeparatorTokens",
-    analytics,
-    |non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "nonSeparatorTokens Updated".to_string(),
-            json!({
-                "non_separator_tokens": {
-                    "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
+    NonSeparatorTokensAnalytics
 );

 make_setting_route!(
@ -359,20 +246,7 @@ make_setting_route!(
    >,
    separator_tokens,
    "separatorTokens",
-    analytics,
-    |separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "separatorTokens Updated".to_string(),
-            json!({
-                "separator_tokens": {
-                    "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
+    SeparatorTokensAnalytics
 );

 make_setting_route!(
@ -384,20 +258,7 @@ make_setting_route!(
    >,
    dictionary,
    "dictionary",
-    analytics,
-    |dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "dictionary Updated".to_string(),
-            json!({
-                "dictionary": {
-                    "total": dictionary.as_ref().map(|dictionary| dictionary.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
+    DictionaryAnalytics
 );

 make_setting_route!(
@ -409,20 +270,7 @@ make_setting_route!(
    >,
    synonyms,
    "synonyms",
-    analytics,
-    |synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "Synonyms Updated".to_string(),
-            json!({
-                "synonyms": {
-                    "total": synonyms.as_ref().map(|synonyms| synonyms.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
+    SynonymsAnalytics
 );

 make_setting_route!(
@ -434,19 +282,7 @@ make_setting_route!(
    >,
    distinct_attribute,
    "distinctAttribute",
-    analytics,
-    |distinct: &Option<String>, req: &HttpRequest| {
-        use serde_json::json;
-        analytics.publish(
-            "DistinctAttribute Updated".to_string(),
-            json!({
-                "distinct_attribute": {
-                    "set": distinct.is_some(),
-                }
-            }),
-            Some(req),
-        );
-    }
+    DistinctAttributeAnalytics
 );

 make_setting_route!(
@ -458,20 +294,7 @@ make_setting_route!(
    >,
    proximity_precision,
    "proximityPrecision",
-    analytics,
-    |precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
-        use serde_json::json;
-        analytics.publish(
-            "ProximityPrecision Updated".to_string(),
-            json!({
-                "proximity_precision": {
-                    "set": precision.is_some(),
-                    "value": precision.unwrap_or_default(),
-                }
-            }),
-            Some(req),
-        );
-    }
+    ProximityPrecisionAnalytics
 );

 make_setting_route!(
@ -483,17 +306,7 @@ make_setting_route!(
    >,
    localized_attributes,
    "localizedAttributes",
-    analytics,
-    |rules: &Option<Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>, req: &HttpRequest| {
-        use serde_json::json;
-        analytics.publish(
-            "LocalizedAttributesRules Updated".to_string(),
-            json!({
-                "locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>())
-            }),
-            Some(req),
-        );
-    }
+    LocalesAnalytics
 );

 make_setting_route!(
@ -505,26 +318,7 @@ make_setting_route!(
    >,
    ranking_rules,
    "rankingRules",
-    analytics,
-    |setting: &Option<Vec<meilisearch_types::settings::RankingRuleView>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "RankingRules Updated".to_string(),
-            json!({
-                "ranking_rules": {
-                    "words_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))),
-                    "typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))),
-                    "proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Proximity))),
-                    "attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Attribute))),
-                    "sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))),
-                    "exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Exactness))),
-                    "values": setting.as_ref().map(|rr| rr.iter().filter(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
-                }
-            }),
-            Some(req),
-        );
-    }
+    RankingRulesAnalytics
 );

 make_setting_route!(
@ -536,25 +330,7 @@ make_setting_route!(
    >,
    faceting,
    "faceting",
-    analytics,
-    |setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
-        use serde_json::json;
-        use meilisearch_types::facet_values_sort::FacetValuesSort;
-
-        analytics.publish(
-            "Faceting Updated".to_string(),
-            json!({
-                "faceting": {
-                    "max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
-                    "sort_facet_values_by_star_count": setting.as_ref().and_then(|s| {
-                        s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
-                    }),
-                    "sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
-                },
-            }),
-            Some(req),
-        );
-    }
+    FacetingAnalytics
 );

 make_setting_route!(
@ -566,20 +342,7 @@ make_setting_route!(
    >,
    pagination,
    "pagination",
-    analytics,
-    |setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "Pagination Updated".to_string(),
-            json!({
-                "pagination": {
-                    "max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
-                },
-            }),
-            Some(req),
-        );
-    }
+    PaginationAnalytics
 );

 make_setting_route!(
@ -591,75 +354,9 @@ make_setting_route!(
    >,
    embedders,
    "embedders",
-    analytics,
-    |setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
-
-
-        analytics.publish(
-            "Embedders Updated".to_string(),
-            serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
-            Some(req),
-        );
-    }
+    EmbeddersAnalytics
 );

-fn embedder_analytics(
-    setting: Option<
-        &std::collections::BTreeMap<
-            String,
-            Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
-        >,
-    >,
-) -> serde_json::Value {
-    let mut sources = std::collections::HashSet::new();
-
-    if let Some(s) = &setting {
-        for source in s
-            .values()
-            .filter_map(|config| config.clone().set())
-            .filter_map(|config| config.source.set())
-        {
-            use meilisearch_types::milli::vector::settings::EmbedderSource;
-            match source {
-                EmbedderSource::OpenAi => sources.insert("openAi"),
-                EmbedderSource::HuggingFace => sources.insert("huggingFace"),
-                EmbedderSource::UserProvided => sources.insert("userProvided"),
-                EmbedderSource::Ollama => sources.insert("ollama"),
-                EmbedderSource::Rest => sources.insert("rest"),
-            };
-        }
-    };
-
-    let document_template_used = setting.as_ref().map(|map| {
-        map.values()
-            .filter_map(|config| config.clone().set())
-            .any(|config| config.document_template.set().is_some())
-    });
-
-    let document_template_max_bytes = setting.as_ref().and_then(|map| {
-        map.values()
-            .filter_map(|config| config.clone().set())
-            .filter_map(|config| config.document_template_max_bytes.set())
-            .max()
-    });
-
-    let binary_quantization_used = setting.as_ref().map(|map| {
-        map.values()
-            .filter_map(|config| config.clone().set())
-            .any(|config| config.binary_quantized.set().is_some())
-    });
-
-    json!(
-        {
-            "total": setting.as_ref().map(|s| s.len()),
-            "sources": sources,
-            "document_template_used": document_template_used,
-            "document_template_max_bytes": document_template_max_bytes,
-            "binary_quantization_used": binary_quantization_used,
-        }
-    )
-}
-
 make_setting_route!(
    "/search-cutoff-ms",
    put,
@ -669,14 +366,7 @@ make_setting_route!(
    >,
    search_cutoff_ms,
    "searchCutoffMs",
-    analytics,
-    |setting: &Option<u64>, req: &HttpRequest| {
-        analytics.publish(
-            "Search Cutoff Updated".to_string(),
-            serde_json::json!({"search_cutoff_ms": setting }),
-            Some(req),
-        );
-    }
+    SearchCutoffMsAnalytics
 );

 macro_rules! generate_configure {
@ -720,7 +410,7 @@ pub async fn update_all(
    body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

@ -729,104 +419,45 @@ pub async fn update_all(
    let new_settings = validate_settings(new_settings, &index_scheduler)?;

    analytics.publish(
-        "Settings Updated".to_string(),
-        json!({
-           "ranking_rules": {
-                "words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Words))),
-                "typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Typo))),
-                "proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Proximity))),
-                "attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Attribute))),
-                "sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Sort))),
-                "exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Exactness))),
-                "values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !matches!(s, RankingRuleView::Asc(_) | RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
-            },
-            "searchable_attributes": {
-                "total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
-                "with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
-            },
-            "displayed_attributes": {
-                "total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()),
-                "with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
-            },
-           "sortable_attributes": {
-                "total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
-                "has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
-            },
-           "filterable_attributes": {
-                "total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
-                "has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
-            },
-            "distinct_attribute": {
-                "set": new_settings.distinct_attribute.as_ref().set().is_some()
-            },
-            "proximity_precision": {
-                "set": new_settings.proximity_precision.as_ref().set().is_some(),
-                "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
-            },
-            "typo_tolerance": {
-                "enabled": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.enabled.as_ref().set())
-                    .copied(),
-                "disable_on_attributes": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
-                "disable_on_words": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
-                "min_word_size_for_one_typo": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.min_word_size_for_typos
-                        .as_ref()
-                        .set()
-                        .map(|s| s.one_typo.set()))
-                    .flatten(),
-                "min_word_size_for_two_typos": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.min_word_size_for_typos
-                        .as_ref()
-                        .set()
-                        .map(|s| s.two_typos.set()))
-                    .flatten(),
-            },
-            "faceting": {
-                "max_values_per_facet": new_settings.faceting
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.max_values_per_facet.as_ref().set()),
-                "sort_facet_values_by_star_count": new_settings.faceting
-                    .as_ref()
-                    .set()
-                    .and_then(|s| {
-                        s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
-                    }),
-                "sort_facet_values_by_total": new_settings.faceting
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
-            },
-            "pagination": {
-                "max_total_hits": new_settings.pagination
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.max_total_hits.as_ref().set()),
-            },
-            "stop_words": {
-                "total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()),
-            },
-            "synonyms": {
-                "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
-            },
-            "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
-            "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
-            "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>()),
-        }),
-        Some(&req),
+        SettingsAnalytics {
+            ranking_rules: RankingRulesAnalytics::new(new_settings.ranking_rules.as_ref().set()),
+            searchable_attributes: SearchableAttributesAnalytics::new(
+                new_settings.searchable_attributes.as_ref().set(),
+            ),
+            displayed_attributes: DisplayedAttributesAnalytics::new(
+                new_settings.displayed_attributes.as_ref().set(),
+            ),
+            sortable_attributes: SortableAttributesAnalytics::new(
+                new_settings.sortable_attributes.as_ref().set(),
+            ),
+            filterable_attributes: FilterableAttributesAnalytics::new(
+                new_settings.filterable_attributes.as_ref().set(),
+            ),
+            distinct_attribute: DistinctAttributeAnalytics::new(
+                new_settings.distinct_attribute.as_ref().set(),
+            ),
+            proximity_precision: ProximityPrecisionAnalytics::new(
+                new_settings.proximity_precision.as_ref().set(),
+            ),
+            typo_tolerance: TypoToleranceAnalytics::new(new_settings.typo_tolerance.as_ref().set()),
+            faceting: FacetingAnalytics::new(new_settings.faceting.as_ref().set()),
+            pagination: PaginationAnalytics::new(new_settings.pagination.as_ref().set()),
+            stop_words: StopWordsAnalytics::new(new_settings.stop_words.as_ref().set()),
+            synonyms: SynonymsAnalytics::new(new_settings.synonyms.as_ref().set()),
+            embedders: EmbeddersAnalytics::new(new_settings.embedders.as_ref().set()),
+            search_cutoff_ms: SearchCutoffMsAnalytics::new(
+                new_settings.search_cutoff_ms.as_ref().set(),
+            ),
+            locales: LocalesAnalytics::new(new_settings.localized_attributes.as_ref().set()),
+            dictionary: DictionaryAnalytics::new(new_settings.dictionary.as_ref().set()),
+            separator_tokens: SeparatorTokensAnalytics::new(
+                new_settings.separator_tokens.as_ref().set(),
+            ),
+            non_separator_tokens: NonSeparatorTokensAnalytics::new(
+                new_settings.non_separator_tokens.as_ref().set(),
+            ),
+        },
+        &req,
    );

    let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
--- a/meilisearch/src/routes/indexes/settings_analytics.rs
+++ b/meilisearch/src/routes/indexes/settings_analytics.rs
@ -0,0 +1,621 @@
+//! All the structures used to make the analytics on the settings works.
+//! The signatures of the `new` functions are not very rust idiomatic because they must match the types received
+//! through the sub-settings route directly without any manipulation.
+//! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`.
+
+use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
+use meilisearch_types::milli::update::Setting;
+use meilisearch_types::milli::vector::settings::EmbeddingSettings;
+use meilisearch_types::settings::{
+    FacetingSettings, PaginationSettings, ProximityPrecisionView, TypoSettings,
+};
+use meilisearch_types::{facet_values_sort::FacetValuesSort, settings::RankingRuleView};
+use serde::Serialize;
+use std::collections::{BTreeMap, BTreeSet, HashSet};
+
+use crate::analytics::Aggregate;
+
+#[derive(Serialize, Default)]
+pub struct SettingsAnalytics {
+    pub ranking_rules: RankingRulesAnalytics,
+    pub searchable_attributes: SearchableAttributesAnalytics,
+    pub displayed_attributes: DisplayedAttributesAnalytics,
+    pub sortable_attributes: SortableAttributesAnalytics,
+    pub filterable_attributes: FilterableAttributesAnalytics,
+    pub distinct_attribute: DistinctAttributeAnalytics,
+    pub proximity_precision: ProximityPrecisionAnalytics,
+    pub typo_tolerance: TypoToleranceAnalytics,
+    pub faceting: FacetingAnalytics,
+    pub pagination: PaginationAnalytics,
+    pub stop_words: StopWordsAnalytics,
+    pub synonyms: SynonymsAnalytics,
+    pub embedders: EmbeddersAnalytics,
+    pub search_cutoff_ms: SearchCutoffMsAnalytics,
+    pub locales: LocalesAnalytics,
+    pub dictionary: DictionaryAnalytics,
+    pub separator_tokens: SeparatorTokensAnalytics,
+    pub non_separator_tokens: NonSeparatorTokensAnalytics,
+}
+
+impl Aggregate for SettingsAnalytics {
+    fn event_name(&self) -> &'static str {
+        "Settings Updated"
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            ranking_rules: RankingRulesAnalytics {
+                words_position: new
+                    .ranking_rules
+                    .words_position
+                    .or(self.ranking_rules.words_position),
+                typo_position: new.ranking_rules.typo_position.or(self.ranking_rules.typo_position),
+                proximity_position: new
+                    .ranking_rules
+                    .proximity_position
+                    .or(self.ranking_rules.proximity_position),
+                attribute_position: new
+                    .ranking_rules
+                    .attribute_position
+                    .or(self.ranking_rules.attribute_position),
+                sort_position: new.ranking_rules.sort_position.or(self.ranking_rules.sort_position),
+                exactness_position: new
+                    .ranking_rules
+                    .exactness_position
+                    .or(self.ranking_rules.exactness_position),
+                values: new.ranking_rules.values.or(self.ranking_rules.values),
+            },
+            searchable_attributes: SearchableAttributesAnalytics {
+                total: new.searchable_attributes.total.or(self.searchable_attributes.total),
+                with_wildcard: new
+                    .searchable_attributes
+                    .with_wildcard
+                    .or(self.searchable_attributes.with_wildcard),
+            },
+            displayed_attributes: DisplayedAttributesAnalytics {
+                total: new.displayed_attributes.total.or(self.displayed_attributes.total),
+                with_wildcard: new
+                    .displayed_attributes
+                    .with_wildcard
+                    .or(self.displayed_attributes.with_wildcard),
+            },
+            sortable_attributes: SortableAttributesAnalytics {
+                total: new.sortable_attributes.total.or(self.sortable_attributes.total),
+                has_geo: new.sortable_attributes.has_geo.or(self.sortable_attributes.has_geo),
+            },
+            filterable_attributes: FilterableAttributesAnalytics {
+                total: new.filterable_attributes.total.or(self.filterable_attributes.total),
+                has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo),
+            },
+            distinct_attribute: DistinctAttributeAnalytics {
+                set: self.distinct_attribute.set | new.distinct_attribute.set,
+            },
+            proximity_precision: ProximityPrecisionAnalytics {
+                set: self.proximity_precision.set | new.proximity_precision.set,
+                value: new.proximity_precision.value.or(self.proximity_precision.value),
+            },
+            typo_tolerance: TypoToleranceAnalytics {
+                enabled: new.typo_tolerance.enabled.or(self.typo_tolerance.enabled),
+                disable_on_attributes: new
+                    .typo_tolerance
+                    .disable_on_attributes
+                    .or(self.typo_tolerance.disable_on_attributes),
+                disable_on_words: new
+                    .typo_tolerance
+                    .disable_on_words
+                    .or(self.typo_tolerance.disable_on_words),
+                min_word_size_for_one_typo: new
+                    .typo_tolerance
+                    .min_word_size_for_one_typo
+                    .or(self.typo_tolerance.min_word_size_for_one_typo),
+                min_word_size_for_two_typos: new
+                    .typo_tolerance
+                    .min_word_size_for_two_typos
+                    .or(self.typo_tolerance.min_word_size_for_two_typos),
+            },
+            faceting: FacetingAnalytics {
+                max_values_per_facet: new
+                    .faceting
+                    .max_values_per_facet
+                    .or(self.faceting.max_values_per_facet),
+                sort_facet_values_by_star_count: new
+                    .faceting
+                    .sort_facet_values_by_star_count
+                    .or(self.faceting.sort_facet_values_by_star_count),
+                sort_facet_values_by_total: new
+                    .faceting
+                    .sort_facet_values_by_total
+                    .or(self.faceting.sort_facet_values_by_total),
+            },
+            pagination: PaginationAnalytics {
+                max_total_hits: new.pagination.max_total_hits.or(self.pagination.max_total_hits),
+            },
+            stop_words: StopWordsAnalytics {
+                total: new.stop_words.total.or(self.stop_words.total),
+            },
+            synonyms: SynonymsAnalytics { total: new.synonyms.total.or(self.synonyms.total) },
+            embedders: EmbeddersAnalytics {
+                total: new.embedders.total.or(self.embedders.total),
+                sources: match (self.embedders.sources, new.embedders.sources) {
+                    (None, None) => None,
+                    (Some(sources), None) | (None, Some(sources)) => Some(sources),
+                    (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()),
+                },
+                document_template_used: match (
+                    self.embedders.document_template_used,
+                    new.embedders.document_template_used,
+                ) {
+                    (None, None) => None,
+                    (Some(used), None) | (None, Some(used)) => Some(used),
+                    (Some(this), Some(other)) => Some(this | other),
+                },
+                document_template_max_bytes: match (
+                    self.embedders.document_template_max_bytes,
+                    new.embedders.document_template_max_bytes,
+                ) {
+                    (None, None) => None,
+                    (Some(bytes), None) | (None, Some(bytes)) => Some(bytes),
+                    (Some(this), Some(other)) => Some(this.max(other)),
+                },
+                binary_quantization_used: match (
+                    self.embedders.binary_quantization_used,
+                    new.embedders.binary_quantization_used,
+                ) {
+                    (None, None) => None,
+                    (Some(bq), None) | (None, Some(bq)) => Some(bq),
+                    (Some(this), Some(other)) => Some(this | other),
+                },
+            },
+            search_cutoff_ms: SearchCutoffMsAnalytics {
+                search_cutoff_ms: new
+                    .search_cutoff_ms
+                    .search_cutoff_ms
+                    .or(self.search_cutoff_ms.search_cutoff_ms),
+            },
+            locales: LocalesAnalytics { locales: new.locales.locales.or(self.locales.locales) },
+            dictionary: DictionaryAnalytics {
+                total: new.dictionary.total.or(self.dictionary.total),
+            },
+            separator_tokens: SeparatorTokensAnalytics {
+                total: new.non_separator_tokens.total.or(self.separator_tokens.total),
+            },
+            non_separator_tokens: NonSeparatorTokensAnalytics {
+                total: new.non_separator_tokens.total.or(self.non_separator_tokens.total),
+            },
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct RankingRulesAnalytics {
+    pub words_position: Option<usize>,
+    pub typo_position: Option<usize>,
+    pub proximity_position: Option<usize>,
+    pub attribute_position: Option<usize>,
+    pub sort_position: Option<usize>,
+    pub exactness_position: Option<usize>,
+    pub values: Option<String>,
+}
+
+impl RankingRulesAnalytics {
+    pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
+        RankingRulesAnalytics {
+            words_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
+            }),
+            typo_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
+            }),
+            proximity_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
+                })
+            }),
+            attribute_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
+                })
+            }),
+            sort_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
+            }),
+            exactness_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
+                })
+            }),
+            values: rr.as_ref().map(|rr| {
+                rr.iter()
+                    .filter(|s| {
+                        matches!(
+                            s,
+                            meilisearch_types::settings::RankingRuleView::Asc(_)
+                                | meilisearch_types::settings::RankingRuleView::Desc(_)
+                        )
+                    })
+                    .map(|x| x.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            }),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { ranking_rules: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SearchableAttributesAnalytics {
+    pub total: Option<usize>,
+    pub with_wildcard: Option<bool>,
+}
+
+impl SearchableAttributesAnalytics {
+    pub fn new(setting: Option<&Vec<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|searchable| searchable.len()),
+            with_wildcard: setting
+                .as_ref()
+                .map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { searchable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct DisplayedAttributesAnalytics {
+    pub total: Option<usize>,
+    pub with_wildcard: Option<bool>,
+}
+
+impl DisplayedAttributesAnalytics {
+    pub fn new(displayed: Option<&Vec<String>>) -> Self {
+        Self {
+            total: displayed.as_ref().map(|displayed| displayed.len()),
+            with_wildcard: displayed
+                .as_ref()
+                .map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { displayed_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SortableAttributesAnalytics {
+    pub total: Option<usize>,
+    pub has_geo: Option<bool>,
+}
+
+impl SortableAttributesAnalytics {
+    pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|sort| sort.len()),
+            has_geo: setting.as_ref().map(|sort| sort.contains("_geo")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { sortable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct FilterableAttributesAnalytics {
+    pub total: Option<usize>,
+    pub has_geo: Option<bool>,
+}
+
+impl FilterableAttributesAnalytics {
+    pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|filter| filter.len()),
+            has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { filterable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct DistinctAttributeAnalytics {
+    pub set: bool,
+}
+
+impl DistinctAttributeAnalytics {
+    pub fn new(distinct: Option<&String>) -> Self {
+        Self { set: distinct.is_some() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { distinct_attribute: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct ProximityPrecisionAnalytics {
+    pub set: bool,
+    pub value: Option<ProximityPrecisionView>,
+}
+
+impl ProximityPrecisionAnalytics {
+    pub fn new(precision: Option<&ProximityPrecisionView>) -> Self {
+        Self { set: precision.is_some(), value: precision.cloned() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { proximity_precision: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct TypoToleranceAnalytics {
+    pub enabled: Option<bool>,
+    pub disable_on_attributes: Option<bool>,
+    pub disable_on_words: Option<bool>,
+    pub min_word_size_for_one_typo: Option<u8>,
+    pub min_word_size_for_two_typos: Option<u8>,
+}
+
+impl TypoToleranceAnalytics {
+    pub fn new(setting: Option<&TypoSettings>) -> Self {
+        Self {
+            enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
+            disable_on_attributes: setting
+                .as_ref()
+                .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
+            disable_on_words: setting
+                .as_ref()
+                .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
+            min_word_size_for_one_typo: setting
+                .as_ref()
+                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set()))
+                .flatten(),
+            min_word_size_for_two_typos: setting
+                .as_ref()
+                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set()))
+                .flatten(),
+        }
+    }
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { typo_tolerance: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct FacetingAnalytics {
+    pub max_values_per_facet: Option<usize>,
+    pub sort_facet_values_by_star_count: Option<bool>,
+    pub sort_facet_values_by_total: Option<usize>,
+}
+
+impl FacetingAnalytics {
+    pub fn new(setting: Option<&FacetingSettings>) -> Self {
+        Self {
+            max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
+            sort_facet_values_by_star_count: setting.as_ref().and_then(|s| {
+                s.sort_facet_values_by
+                    .as_ref()
+                    .set()
+                    .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
+            }),
+            sort_facet_values_by_total: setting
+                .as_ref()
+                .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { faceting: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct PaginationAnalytics {
+    pub max_total_hits: Option<usize>,
+}
+
+impl PaginationAnalytics {
+    pub fn new(setting: Option<&PaginationSettings>) -> Self {
+        Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { pagination: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct StopWordsAnalytics {
+    pub total: Option<usize>,
+}
+
+impl StopWordsAnalytics {
+    pub fn new(stop_words: Option<&BTreeSet<String>>) -> Self {
+        Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { stop_words: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SynonymsAnalytics {
+    pub total: Option<usize>,
+}
+
+impl SynonymsAnalytics {
+    pub fn new(synonyms: Option<&BTreeMap<String, Vec<String>>>) -> Self {
+        Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { synonyms: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct EmbeddersAnalytics {
+    // last
+    pub total: Option<usize>,
+    // Merge the sources
+    pub sources: Option<HashSet<String>>,
+    // |=
+    pub document_template_used: Option<bool>,
+    // max
+    pub document_template_max_bytes: Option<usize>,
+    // |=
+    pub binary_quantization_used: Option<bool>,
+}
+
+impl EmbeddersAnalytics {
+    pub fn new(setting: Option<&BTreeMap<String, Setting<EmbeddingSettings>>>) -> Self {
+        let mut sources = std::collections::HashSet::new();
+
+        if let Some(s) = &setting {
+            for source in s
+                .values()
+                .filter_map(|config| config.clone().set())
+                .filter_map(|config| config.source.set())
+            {
+                use meilisearch_types::milli::vector::settings::EmbedderSource;
+                match source {
+                    EmbedderSource::OpenAi => sources.insert("openAi".to_string()),
+                    EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()),
+                    EmbedderSource::UserProvided => sources.insert("userProvided".to_string()),
+                    EmbedderSource::Ollama => sources.insert("ollama".to_string()),
+                    EmbedderSource::Rest => sources.insert("rest".to_string()),
+                };
+            }
+        };
+
+        Self {
+            total: setting.as_ref().map(|s| s.len()),
+            sources: Some(sources),
+            document_template_used: setting.as_ref().map(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .any(|config| config.document_template.set().is_some())
+            }),
+            document_template_max_bytes: setting.as_ref().and_then(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .filter_map(|config| config.document_template_max_bytes.set())
+                    .max()
+            }),
+            binary_quantization_used: setting.as_ref().map(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .any(|config| config.binary_quantized.set().is_some())
+            }),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { embedders: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+#[serde(transparent)]
+pub struct SearchCutoffMsAnalytics {
+    pub search_cutoff_ms: Option<u64>,
+}
+
+impl SearchCutoffMsAnalytics {
+    pub fn new(setting: Option<&u64>) -> Self {
+        Self { search_cutoff_ms: setting.copied() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { search_cutoff_ms: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+#[serde(transparent)]
+pub struct LocalesAnalytics {
+    pub locales: Option<BTreeSet<Locale>>,
+}
+
+impl LocalesAnalytics {
+    pub fn new(rules: Option<&Vec<LocalizedAttributesRuleView>>) -> Self {
+        LocalesAnalytics {
+            locales: rules.as_ref().map(|rules| {
+                rules
+                    .iter()
+                    .flat_map(|rule| rule.locales.iter().cloned())
+                    .collect::<std::collections::BTreeSet<_>>()
+            }),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { locales: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct DictionaryAnalytics {
+    pub total: Option<usize>,
+}
+
+impl DictionaryAnalytics {
+    pub fn new(dictionary: Option<&BTreeSet<String>>) -> Self {
+        Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { dictionary: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SeparatorTokensAnalytics {
+    pub total: Option<usize>,
+}
+
+impl SeparatorTokensAnalytics {
+    pub fn new(separator_tokens: Option<&BTreeSet<String>>) -> Self {
+        Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { separator_tokens: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct NonSeparatorTokensAnalytics {
+    pub total: Option<usize>,
+}
+
+impl NonSeparatorTokensAnalytics {
+    pub fn new(non_separator_tokens: Option<&BTreeSet<String>>) -> Self {
+        Self {
+            total: non_separator_tokens
+                .as_ref()
+                .map(|non_separator_tokens| non_separator_tokens.len()),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
+    }
+}
--- a/meilisearch/src/routes/indexes/similar.rs
+++ b/meilisearch/src/routes/indexes/similar.rs
@ -13,9 +13,10 @@ use serde_json::Value;
 use tracing::debug;

 use super::ActionPolicy;
-use crate::analytics::{Analytics, SimilarAggregator};
+use crate::analytics::Analytics;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
+use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
 use crate::search::{
    add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
    SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
@ -34,13 +35,13 @@ pub async fn similar_get(
    index_uid: web::Path<String>,
    params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    let query = params.0.try_into()?;

-    let mut aggregate = SimilarAggregator::from_query(&query, &req);
+    let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);

    debug!(parameters = ?query, "Similar get");

@ -49,7 +50,7 @@ pub async fn similar_get(
    if let Ok(similar) = &similar {
        aggregate.succeed(similar);
    }
-    analytics.get_similar(aggregate);
+    analytics.publish(aggregate, &req);

    let similar = similar?;

@ -62,21 +63,21 @@ pub async fn similar_post(
    index_uid: web::Path<String>,
    params: AwebJson<SimilarQuery, DeserrJsonError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    let query = params.into_inner();
    debug!(parameters = ?query, "Similar post");

-    let mut aggregate = SimilarAggregator::from_query(&query, &req);
+    let mut aggregate = SimilarAggregator::<SimilarPOST>::from_query(&query);

    let similar = similar(index_scheduler, index_uid, query).await;

    if let Ok(similar) = &similar {
        aggregate.succeed(similar);
    }
-    analytics.post_similar(aggregate);
+    analytics.publish(aggregate, &req);

    let similar = similar?;

--- a/meilisearch/src/routes/indexes/similar_analytics.rs
+++ b/meilisearch/src/routes/indexes/similar_analytics.rs
@ -0,0 +1,235 @@
+use std::collections::{BinaryHeap, HashMap};
+
+use once_cell::sync::Lazy;
+use regex::Regex;
+use serde_json::{json, Value};
+
+use crate::{
+    aggregate_methods,
+    analytics::{Aggregate, AggregateMethod},
+    search::{SimilarQuery, SimilarResult},
+};
+
+aggregate_methods!(
+    SimilarPOST => "Similar POST",
+    SimilarGET => "Similar GET",
+);
+
+#[derive(Default)]
+pub struct SimilarAggregator<Method: AggregateMethod> {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // filter
+    filter_with_geo_radius: bool,
+    filter_with_geo_bounding_box: bool,
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
+    filter_sum_of_criteria_terms: usize,
+    // every time a request has a filter, this field must be incremented by one
+    filter_total_number_of_criteria: usize,
+    used_syntax: HashMap<String, usize>,
+
+    // Whether a non-default embedder was specified
+    retrieve_vectors: bool,
+
+    // pagination
+    max_limit: usize,
+    max_offset: usize,
+
+    // formatting
+    max_attributes_to_retrieve: usize,
+
+    // scoring
+    show_ranking_score: bool,
+    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
+
+    marker: std::marker::PhantomData<Method>,
+}
+
+impl<Method: AggregateMethod> SimilarAggregator<Method> {
+    #[allow(clippy::field_reassign_with_default)]
+    pub fn from_query(query: &SimilarQuery) -> Self {
+        let SimilarQuery {
+            id: _,
+            embedder: _,
+            offset,
+            limit,
+            attributes_to_retrieve: _,
+            retrieve_vectors,
+            show_ranking_score,
+            show_ranking_score_details,
+            filter,
+            ranking_score_threshold,
+        } = query;
+
+        let mut ret = Self::default();
+
+        ret.total_received = 1;
+
+        if let Some(ref filter) = filter {
+            static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
+            ret.filter_total_number_of_criteria = 1;
+
+            let syntax = match filter {
+                Value::String(_) => "string".to_string(),
+                Value::Array(values) => {
+                    if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
+                        "mixed".to_string()
+                    } else {
+                        "array".to_string()
+                    }
+                }
+                _ => "none".to_string(),
+            };
+            // convert the string to a HashMap
+            ret.used_syntax.insert(syntax, 1);
+
+            let stringified_filters = filter.to_string();
+            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
+            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
+            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
+        }
+
+        ret.max_limit = *limit;
+        ret.max_offset = *offset;
+
+        ret.show_ranking_score = *show_ranking_score;
+        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();
+
+        ret.retrieve_vectors = *retrieve_vectors;
+
+        ret
+    }
+
+    pub fn succeed(&mut self, result: &SimilarResult) {
+        let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
+
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+
+        self.time_spent.push(*processing_time_ms as usize);
+    }
+}
+
+impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    /// Aggregate one [SimilarAggregator] into another.
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        let Self {
+            total_received,
+            total_succeeded,
+            mut time_spent,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            max_limit,
+            max_offset,
+            max_attributes_to_retrieve,
+            show_ranking_score,
+            show_ranking_score_details,
+            ranking_score_threshold,
+            retrieve_vectors,
+            marker: _,
+        } = *new;
+
+        // request
+        self.total_received = self.total_received.saturating_add(total_received);
+        self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
+        self.time_spent.append(&mut time_spent);
+
+        // filter
+        self.filter_with_geo_radius |= filter_with_geo_radius;
+        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
+        self.filter_sum_of_criteria_terms =
+            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
+        self.filter_total_number_of_criteria =
+            self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
+        for (key, value) in used_syntax.into_iter() {
+            let used_syntax = self.used_syntax.entry(key).or_insert(0);
+            *used_syntax = used_syntax.saturating_add(value);
+        }
+
+        self.retrieve_vectors |= retrieve_vectors;
+
+        // pagination
+        self.max_limit = self.max_limit.max(max_limit);
+        self.max_offset = self.max_offset.max(max_offset);
+
+        // formatting
+        self.max_attributes_to_retrieve =
+            self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
+
+        // scoring
+        self.show_ranking_score |= show_ranking_score;
+        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
+
+        self
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            time_spent,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            max_limit,
+            max_offset,
+            max_attributes_to_retrieve,
+            show_ranking_score,
+            show_ranking_score_details,
+            ranking_score_threshold,
+            retrieve_vectors,
+            marker: _,
+        } = *self;
+
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // the index of the 99th percentage of value
+        let percentile_99th = time_spent.len() * 99 / 100;
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th);
+
+        json!({
+            "requests": {
+                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "filter": {
+               "with_geoRadius": filter_with_geo_radius,
+               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
+               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "vector": {
+                "retrieve_vectors": retrieve_vectors,
+            },
+            "pagination": {
+               "max_limit": max_limit,
+               "max_offset": max_offset,
+            },
+            "formatting": {
+                "max_attributes_to_retrieve": max_attributes_to_retrieve,
+            },
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+                "ranking_score_threshold": ranking_score_threshold,
+            }
+        })
+    }
+}
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@ -25,6 +25,7 @@ pub mod indexes;
 mod logs;
 mod metrics;
 mod multi_search;
+mod multi_search_analytics;
 mod snapshot;
 mod swap_indexes;
 pub mod tasks;
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@ -9,7 +9,7 @@ use meilisearch_types::keys::actions;
 use serde::Serialize;
 use tracing::debug;

-use crate::analytics::{Analytics, MultiSearchAggregator};
+use crate::analytics::Analytics;
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::ActionPolicy;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
@ -21,6 +21,8 @@ use crate::search::{
 };
 use crate::search_queue::SearchQueue;

+use super::multi_search_analytics::MultiSearchAggregator;
+
 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
 }
@ -35,7 +37,7 @@ pub async fn multi_search_with_post(
    search_queue: Data<SearchQueue>,
    params: AwebJson<FederatedSearch, DeserrJsonError>,
    req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    // Since we don't want to process half of the search requests and then get a permit refused
    // we're going to get one permit for the whole duration of the multi-search request.
@ -43,7 +45,7 @@ pub async fn multi_search_with_post(

    let federated_search = params.into_inner();

-    let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
+    let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search);

    let FederatedSearch { mut queries, federation } = federated_search;

@ -87,7 +89,7 @@ pub async fn multi_search_with_post(
                multi_aggregate.succeed();
            }

-            analytics.post_multi_search(multi_aggregate);
+            analytics.publish(multi_aggregate, &req);
            HttpResponse::Ok().json(search_result??)
        }
        None => {
@ -149,7 +151,7 @@ pub async fn multi_search_with_post(
            if search_results.is_ok() {
                multi_aggregate.succeed();
            }
-            analytics.post_multi_search(multi_aggregate);
+            analytics.publish(multi_aggregate, &req);

            let search_results = search_results.map_err(|(mut err, query_index)| {
                // Add the query index that failed as context for the error message.
--- a/meilisearch/src/routes/multi_search_analytics.rs
+++ b/meilisearch/src/routes/multi_search_analytics.rs
@ -0,0 +1,170 @@
+use std::collections::HashSet;
+
+use serde_json::json;
+
+use crate::{
+    analytics::Aggregate,
+    search::{FederatedSearch, SearchQueryWithIndex},
+};
+
+#[derive(Default)]
+pub struct MultiSearchAggregator {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+
+    // sum of the number of distinct indexes in each single request, use with total_received to compute an avg
+    total_distinct_index_count: usize,
+    // number of queries with a single index, use with total_received to compute a proportion
+    total_single_index: usize,
+
+    // sum of the number of search queries in the requests, use with total_received to compute an average
+    total_search_count: usize,
+
+    // scoring
+    show_ranking_score: bool,
+    show_ranking_score_details: bool,
+
+    // federation
+    use_federation: bool,
+}
+
+impl MultiSearchAggregator {
+    pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
+        let use_federation = federated_search.federation.is_some();
+
+        let distinct_indexes: HashSet<_> = federated_search
+            .queries
+            .iter()
+            .map(|query| {
+                let query = &query;
+                // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
+                let SearchQueryWithIndex {
+                    index_uid,
+                    federation_options: _,
+                    q: _,
+                    vector: _,
+                    offset: _,
+                    limit: _,
+                    page: _,
+                    hits_per_page: _,
+                    attributes_to_retrieve: _,
+                    retrieve_vectors: _,
+                    attributes_to_crop: _,
+                    crop_length: _,
+                    attributes_to_highlight: _,
+                    show_ranking_score: _,
+                    show_ranking_score_details: _,
+                    show_matches_position: _,
+                    filter: _,
+                    sort: _,
+                    distinct: _,
+                    facets: _,
+                    highlight_pre_tag: _,
+                    highlight_post_tag: _,
+                    crop_marker: _,
+                    matching_strategy: _,
+                    attributes_to_search_on: _,
+                    hybrid: _,
+                    ranking_score_threshold: _,
+                    locales: _,
+                } = query;
+
+                index_uid.as_str()
+            })
+            .collect();
+
+        let show_ranking_score =
+            federated_search.queries.iter().any(|query| query.show_ranking_score);
+        let show_ranking_score_details =
+            federated_search.queries.iter().any(|query| query.show_ranking_score_details);
+
+        Self {
+            total_received: 1,
+            total_succeeded: 0,
+            total_distinct_index_count: distinct_indexes.len(),
+            total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
+            total_search_count: federated_search.queries.len(),
+            show_ranking_score,
+            show_ranking_score_details,
+            use_federation,
+        }
+    }
+
+    pub fn succeed(&mut self) {
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+    }
+}
+
+impl Aggregate for MultiSearchAggregator {
+    fn event_name(&self) -> &'static str {
+        "Documents Searched by Multi-Search POST"
+    }
+
+    /// Aggregate one [MultiSearchAggregator] into another.
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        // write the aggregate in a way that will cause a compilation error if a field is added.
+
+        // get ownership of self, replacing it by a default value.
+        let this = *self;
+
+        let total_received = this.total_received.saturating_add(new.total_received);
+        let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
+        let total_distinct_index_count =
+            this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
+        let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
+        let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
+        let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
+        let show_ranking_score_details =
+            this.show_ranking_score_details || new.show_ranking_score_details;
+        let use_federation = this.use_federation || new.use_federation;
+
+        Box::new(Self {
+            total_received,
+            total_succeeded,
+            total_distinct_index_count,
+            total_single_index,
+            total_search_count,
+            show_ranking_score,
+            show_ranking_score_details,
+            use_federation,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            total_distinct_index_count,
+            total_single_index,
+            total_search_count,
+            show_ranking_score,
+            show_ranking_score_details,
+            use_federation,
+        } = *self;
+
+        json!({
+            "requests": {
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "indexes": {
+                "total_single_index": total_single_index,
+                "total_distinct_index_count": total_distinct_index_count,
+                "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
+            },
+            "searches": {
+                "total_search_count": total_search_count,
+                "avg_search_count": (total_search_count as f64) / (total_received as f64),
+            },
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+            },
+            "federation": {
+                "use_federation": use_federation,
+            }
+        })
+    }
+}
--- a/meilisearch/src/routes/snapshot.rs
+++ b/meilisearch/src/routes/snapshot.rs
@ -3,7 +3,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
 use index_scheduler::IndexScheduler;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::tasks::KindWithContent;
-use serde_json::json;
 use tracing::debug;

 use crate::analytics::Analytics;
@ -17,13 +16,15 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
 }

+crate::empty_analytics!(SnapshotAnalytics, "Snapshot Created");
+
 pub async fn create_snapshot(
    index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
+    analytics.publish(SnapshotAnalytics::default(), &req);

    let task = KindWithContent::SnapshotCreation;
    let uid = get_task_id(&req, &opt)?;
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@ -8,10 +8,10 @@ use meilisearch_types::error::deserr_codes::InvalidSwapIndexes;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::tasks::{IndexSwap, KindWithContent};
-use serde_json::json;
+use serde::Serialize;

 use super::{get_task_id, is_dry_run, SummarizedTaskView};
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, Analytics};
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
@ -29,21 +29,36 @@ pub struct SwapIndexesPayload {
    indexes: Vec<IndexUid>,
 }

+#[derive(Serialize)]
+struct IndexSwappedAnalytics {
+    swap_operation_number: usize,
+}
+
+impl Aggregate for IndexSwappedAnalytics {
+    fn event_name(&self) -> &'static str {
+        "Indexes Swapped"
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            swap_operation_number: self.swap_operation_number.max(new.swap_operation_number),
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
 pub async fn swap_indexes(
    index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
    params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let params = params.into_inner();
-    analytics.publish(
-        "Indexes Swapped".to_string(),
-        json!({
-            "swap_operation_number": params.len(),
-        }),
-        Some(&req),
-    );
+    analytics.publish(IndexSwappedAnalytics { swap_operation_number: params.len() }, &req);
    let filters = index_scheduler.filters();

    let mut swaps = vec![];
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@ -12,18 +12,17 @@ use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
 use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status};
 use serde::Serialize;
-use serde_json::json;
 use time::format_description::well_known::Rfc3339;
 use time::macros::format_description;
 use time::{Date, Duration, OffsetDateTime, Time};
 use tokio::task;

 use super::{get_task_id, is_dry_run, SummarizedTaskView};
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, AggregateMethod, Analytics};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
-use crate::Opt;
+use crate::{aggregate_methods, Opt};

 const DEFAULT_LIMIT: u32 = 20;

@ -158,12 +157,69 @@ impl TaskDeletionOrCancelationQuery {
    }
 }

+aggregate_methods!(
+    CancelTasks => "Tasks Canceled",
+    DeleteTasks => "Tasks Deleted",
+);
+
+#[derive(Serialize)]
+struct TaskFilterAnalytics<Method: AggregateMethod> {
+    filtered_by_uid: bool,
+    filtered_by_index_uid: bool,
+    filtered_by_type: bool,
+    filtered_by_status: bool,
+    filtered_by_canceled_by: bool,
+    filtered_by_before_enqueued_at: bool,
+    filtered_by_after_enqueued_at: bool,
+    filtered_by_before_started_at: bool,
+    filtered_by_after_started_at: bool,
+    filtered_by_before_finished_at: bool,
+    filtered_by_after_finished_at: bool,
+
+    #[serde(skip)]
+    marker: std::marker::PhantomData<Method>,
+}
+
+impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            filtered_by_uid: self.filtered_by_uid | new.filtered_by_uid,
+            filtered_by_index_uid: self.filtered_by_index_uid | new.filtered_by_index_uid,
+            filtered_by_type: self.filtered_by_type | new.filtered_by_type,
+            filtered_by_status: self.filtered_by_status | new.filtered_by_status,
+            filtered_by_canceled_by: self.filtered_by_canceled_by | new.filtered_by_canceled_by,
+            filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at
+                | new.filtered_by_before_enqueued_at,
+            filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at
+                | new.filtered_by_after_enqueued_at,
+            filtered_by_before_started_at: self.filtered_by_before_started_at
+                | new.filtered_by_before_started_at,
+            filtered_by_after_started_at: self.filtered_by_after_started_at
+                | new.filtered_by_after_started_at,
+            filtered_by_before_finished_at: self.filtered_by_before_finished_at
+                | new.filtered_by_before_finished_at,
+            filtered_by_after_finished_at: self.filtered_by_after_finished_at
+                | new.filtered_by_after_finished_at,
+
+            marker: std::marker::PhantomData,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
 async fn cancel_tasks(
    index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
    params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let params = params.into_inner();

@ -172,21 +228,22 @@ async fn cancel_tasks(
    }

    analytics.publish(
-        "Tasks Canceled".to_string(),
-        json!({
-            "filtered_by_uid": params.uids.is_some(),
-            "filtered_by_index_uid": params.index_uids.is_some(),
-            "filtered_by_type": params.types.is_some(),
-            "filtered_by_status": params.statuses.is_some(),
-            "filtered_by_canceled_by": params.canceled_by.is_some(),
-            "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
-            "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
-            "filtered_by_before_started_at": params.before_started_at.is_some(),
-            "filtered_by_after_started_at": params.after_started_at.is_some(),
-            "filtered_by_before_finished_at": params.before_finished_at.is_some(),
-            "filtered_by_after_finished_at": params.after_finished_at.is_some(),
-        }),
-        Some(&req),
+        TaskFilterAnalytics::<CancelTasks> {
+            filtered_by_uid: params.uids.is_some(),
+            filtered_by_index_uid: params.index_uids.is_some(),
+            filtered_by_type: params.types.is_some(),
+            filtered_by_status: params.statuses.is_some(),
+            filtered_by_canceled_by: params.canceled_by.is_some(),
+            filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
+            filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
+            filtered_by_before_started_at: params.before_started_at.is_some(),
+            filtered_by_after_started_at: params.after_started_at.is_some(),
+            filtered_by_before_finished_at: params.before_finished_at.is_some(),
+            filtered_by_after_finished_at: params.after_finished_at.is_some(),
+
+            marker: std::marker::PhantomData,
+        },
+        &req,
    );

    let query = params.into_query();
@ -214,7 +271,7 @@ async fn delete_tasks(
    params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
    req: HttpRequest,
    opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let params = params.into_inner();

@ -223,22 +280,24 @@ async fn delete_tasks(
    }

    analytics.publish(
-        "Tasks Deleted".to_string(),
-        json!({
-            "filtered_by_uid": params.uids.is_some(),
-            "filtered_by_index_uid": params.index_uids.is_some(),
-            "filtered_by_type": params.types.is_some(),
-            "filtered_by_status": params.statuses.is_some(),
-            "filtered_by_canceled_by": params.canceled_by.is_some(),
-            "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
-            "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
-            "filtered_by_before_started_at": params.before_started_at.is_some(),
-            "filtered_by_after_started_at": params.after_started_at.is_some(),
-            "filtered_by_before_finished_at": params.before_finished_at.is_some(),
-            "filtered_by_after_finished_at": params.after_finished_at.is_some(),
-        }),
-        Some(&req),
+        TaskFilterAnalytics::<DeleteTasks> {
+            filtered_by_uid: params.uids.is_some(),
+            filtered_by_index_uid: params.index_uids.is_some(),
+            filtered_by_type: params.types.is_some(),
+            filtered_by_status: params.statuses.is_some(),
+            filtered_by_canceled_by: params.canceled_by.is_some(),
+            filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
+            filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
+            filtered_by_before_started_at: params.before_started_at.is_some(),
+            filtered_by_after_started_at: params.after_started_at.is_some(),
+            filtered_by_before_finished_at: params.before_finished_at.is_some(),
+            filtered_by_after_finished_at: params.after_finished_at.is_some(),
+
+            marker: std::marker::PhantomData,
+        },
+        &req,
    );
+
    let query = params.into_query();

    let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@ -796,8 +796,10 @@ fn prepare_search<'t>(
                    let span = tracing::trace_span!(target: "search::vector", "embed_one");
                    let _entered = span.enter();

+                    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
+
                    embedder
-                        .embed_one(query.q.clone().unwrap())
+                        .embed_one(query.q.clone().unwrap(), Some(deadline))
                        .map_err(milli::vector::Error::from)
                        .map_err(milli::Error::from)?
                }
@ -1195,8 +1197,13 @@ impl<'a> HitMaker<'a> {
        let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
            // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
            (None, _) => false,
-            // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
-            (Some(_), None) => true,
+            // vectors has no fid, so check its explicit name
+            (Some(_), None) => {
+                // unwrap as otherwise we'd go to the first one
+                let displayed_names = index.displayed_fields(rtxn)?.unwrap();
+                !displayed_names
+                    .contains(&milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME)
+            }
            // displayed_ids is a finit list, so hide if `_vectors` is not part of it
            (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
        };
@ -1682,7 +1689,7 @@ fn add_non_formatted_ids_to_formatted_options(
 fn make_document(
    displayed_attributes: &BTreeSet<FieldId>,
    field_ids_map: &FieldsIdsMap,
-    obkv: &obkv::KvReaderU16,
+    obkv: obkv::KvReaderU16,
 ) -> Result<Document, MeilisearchHttpError> {
    let mut document = serde_json::Map::new();

--- a/meilisearch/tests/common/server.rs
+++ b/meilisearch/tests/common/server.rs
@ -381,7 +381,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
        db_path: dir.as_ref().join("db"),
        dump_dir: dir.as_ref().join("dumps"),
        env: "development".to_owned(),
-        #[cfg(feature = "analytics")]
        no_analytics: true,
        max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
        max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),
--- a/meilisearch/tests/common/service.rs
+++ b/meilisearch/tests/common/service.rs
@ -9,8 +9,9 @@ use actix_web::test;
 use actix_web::test::TestRequest;
 use actix_web::web::Data;
 use index_scheduler::IndexScheduler;
+use meilisearch::analytics::Analytics;
 use meilisearch::search_queue::SearchQueue;
-use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
+use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
 use meilisearch_auth::AuthController;
 use tracing::level_filters::LevelFilter;
 use tracing_subscriber::Layer;
@ -141,7 +142,7 @@ impl Service {
            Data::new(search_queue),
            self.options.clone(),
            (route_layer_handle, stderr_layer_handle),
-            analytics::MockAnalytics::new(&self.options),
+            Data::new(Analytics::no_analytics()),
            true,
        ))
        .await
--- a/meilisearch/tests/logs/mod.rs
+++ b/meilisearch/tests/logs/mod.rs
@ -7,8 +7,9 @@ use std::str::FromStr;
 use actix_web::http::header::ContentType;
 use actix_web::web::Data;
 use meili_snap::snapshot;
+use meilisearch::analytics::Analytics;
 use meilisearch::search_queue::SearchQueue;
-use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
+use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
 use tracing::level_filters::LevelFilter;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::Layer;
@ -54,7 +55,7 @@ async fn basic_test_log_stream_route() {
        Data::new(search_queue),
        server.service.options.clone(),
        (route_layer_handle, stderr_layer_handle),
-        analytics::MockAnalytics::new(&server.service.options),
+        Data::new(Analytics::no_analytics()),
        true,
    ))
    .await;
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@ -568,6 +568,57 @@ async fn retrieve_vectors() {
    ]
    "###);

+    // use explicit `_vectors` in displayed attributes
+    let (response, code) = index
+        .update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} ))
+        .await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"},  @r###"
+    [
+      {
+        "title": "Captain Planet",
+        "desc": "He's not part of the Marvel Cinematic Universe",
+        "id": "2",
+        "_vectors": {
+          "default": {
+            "embeddings": "[vectors]",
+            "regenerate": true
+          }
+        }
+      },
+      {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "3",
+        "_vectors": {
+          "default": {
+            "embeddings": "[vectors]",
+            "regenerate": true
+          }
+        }
+      },
+      {
+        "title": "Shazam!",
+        "desc": "a Captain Marvel ersatz",
+        "id": "1",
+        "_vectors": {
+          "default": {
+            "embeddings": "[vectors]",
+            "regenerate": true
+          }
+        }
+      }
+    ]
+    "###);
+
    // remove `_vectors` from displayed attributes
    let (response, code) =
        index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await;
--- a/meilisearch/tests/vector/openai.rs
+++ b/meilisearch/tests/vector/openai.rs
@ -137,13 +137,14 @@ fn long_text() -> &'static str {
 }

 async fn create_mock_tokenized() -> (MockServer, Value) {
-    create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false).await
+    create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false, false).await
 }

 async fn create_mock_with_template(
    document_template: &str,
    model_dimensions: ModelDimensions,
    fallible: bool,
+    slow: bool,
 ) -> (MockServer, Value) {
    let mock_server = MockServer::start().await;
    const API_KEY: &str = "my-api-key";
@ -154,7 +155,11 @@ async fn create_mock_with_template(
    Mock::given(method("POST"))
        .and(path("/"))
        .respond_with(move |req: &Request| {
-            // 0. maybe return 500
+            // 0. wait for a long time
+            if slow {
+              std::thread::sleep(std::time::Duration::from_secs(1));
+            }
+            // 1. maybe return 500
            if fallible {
             let attempt = attempt.fetch_add(1, Ordering::Relaxed);
             let failed = matches!(attempt % 4, 0 | 1 | 3);
@ -167,7 +172,7 @@ async fn create_mock_with_template(
                }))
             }
            }
-            // 1. check API key
+            // 3. check API key
            match req.headers.get("Authorization") {
                Some(api_key) if api_key == API_KEY_BEARER => {
                    {}
@ -202,7 +207,7 @@ async fn create_mock_with_template(
                    )
                }
            }
-            // 2. parse text inputs
+            // 3. parse text inputs
            let query: serde_json::Value = match req.body_json() {
                Ok(query) => query,
                Err(_error) => return ResponseTemplate::new(400).set_body_json(
@ -223,7 +228,7 @@ async fn create_mock_with_template(
                panic!("Expected {model_dimensions:?}, got {query_model_dimensions:?}")
            }

-            // 3. for each text, find embedding in responses
+            // 4. for each text, find embedding in responses
            let serde_json::Value::Array(inputs) = &query["input"] else {
                panic!("Unexpected `input` value")
            };
@ -283,7 +288,7 @@ async fn create_mock_with_template(
                "embedding": embedding,
            })).collect();

-            // 4. produce output from embeddings
+            // 5. produce output from embeddings
            ResponseTemplate::new(200).set_body_json(json!({
                "object": "list",
                "data": data,
@ -317,23 +322,27 @@ const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée
        {%- endif %}, de race {{doc.breed}}."#;

 async fn create_mock() -> (MockServer, Value) {
-    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false).await
+    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false, false).await
 }

 async fn create_mock_dimensions() -> (MockServer, Value) {
-    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false).await
+    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false, false).await
 }

 async fn create_mock_small_embedding_model() -> (MockServer, Value) {
-    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false).await
+    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false, false).await
 }

 async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
-    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false).await
+    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false, false).await
 }

 async fn create_fallible_mock() -> (MockServer, Value) {
-    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true).await
+    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, false).await
+}
+
+async fn create_slow_mock() -> (MockServer, Value) {
+    create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, true).await
 }

 // basic test "it works"
@ -1873,4 +1882,114 @@ async fn it_still_works() {
    ]
    "###);
 }
+
+// test with a server that responds 500 on 3 out of 4 calls
+#[actix_rt::test]
+async fn timeout() {
+    let (_mock, setting) = create_slow_mock().await;
+    let server = get_server_vector().await;
+    let index = server.index("doggo");
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "default": setting,
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    let task = server.wait_task(response.uid()).await;
+    snapshot!(task["status"], @r###""succeeded""###);
+    let documents = json!([
+      {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": "[uid]",
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, _code) = index
+        .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+        .await;
+    snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "name": "kefir",
+          "gender": "M",
+          "birthyear": 2023,
+          "breed": "Patou",
+          "_vectors": {
+            "default": {
+              "embeddings": "[vector]",
+              "regenerate": true
+            }
+          }
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({
+            "q": "grand chien de berger des montagnes",
+            "hybrid": {"semanticRatio": 0.99, "embedder": "default"}
+        }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response["semanticHitCount"]), @"0");
+    snapshot!(json_string!(response["hits"]), @"[]");
+
+    let (response, code) = index
+        .search_post(json!({
+            "q": "grand chien de berger des montagnes",
+            "hybrid": {"semanticRatio": 0.99, "embedder": "default"}
+        }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response["semanticHitCount"]), @"1");
+    snapshot!(json_string!(response["hits"]), @r###"
+    [
+      {
+        "id": 0,
+        "name": "kefir",
+        "gender": "M",
+        "birthyear": 2023,
+        "breed": "Patou"
+      }
+    ]
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({
+            "q": "grand chien de berger des montagnes",
+            "hybrid": {"semanticRatio": 0.99, "embedder": "default"}
+        }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response["semanticHitCount"]), @"0");
+    snapshot!(json_string!(response["hits"]), @"[]");
+}
+
 // test with a server that wrongly responds 400
--- a/meilisearch/tests/vector/settings.rs
+++ b/meilisearch/tests/vector/settings.rs
@ -4,6 +4,53 @@ use crate::common::{GetAllDocumentsOptions, Server};
 use crate::json;
 use crate::vector::generate_default_user_provided_documents;

+#[actix_rt::test]
+async fn field_unavailable_for_source() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false,
+      "editDocumentsByFunction": false,
+      "containsFilter": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": { "manual": {"source": "userProvided", "documentTemplate": "{{doc.documentTemplate}}"}},
+        }))
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided` (only available for sources: `huggingFace`, `openAi`, `ollama`, `rest`). Available fields: `source`, `dimensions`, `distribution`, `binaryQuantized`",
+      "code": "invalid_settings_embedders",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": { "default": {"source": "openAi", "revision": "42"}},
+        }))
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `dimensions`, `distribution`, `url`, `binaryQuantized`",
+      "code": "invalid_settings_embedders",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn update_embedder() {
    let server = Server::new().await;
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@ -682,7 +682,7 @@ fn export_a_dump(
                        format!("While iterating on content file {:?}", content_file_uuid)
                    })? {
                        dump_content_file
-                            .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
+                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
                    }
                    dump_content_file.flush()?;
                    count += 1;
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -12,14 +12,12 @@ readme.workspace = true
 license.workspace = true

 [dependencies]
-big_s = "1.0.2"
 bimap = { version = "0.6.3", features = ["serde"] }
 bincode = "1.3.3"
 bstr = "1.9.1"
 bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-# charabia = { version = "0.9.0", default-features = false }
-charabia = { git = "https://github.com/meilisearch/charabia", branch = "mutualize-char-normalizer", default-features = false }
+charabia = { version = "0.9.1", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.13"
 deserr = "0.6.2"
@ -29,9 +27,9 @@ fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
 grenad = { version = "0.4.7", default-features = false, features = [
-    "rayon", # TODO Should we keep this feature
-    "tempfile"
-], git = "https://github.com/meilisearch/grenad", branch = "various-improvements" }
+    "rayon",
+    "tempfile",
+] }
 heed = { version = "0.20.3", default-features = false, features = [
    "serde-json",
    "serde-bincode",
@ -42,14 +40,14 @@ json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memchr = "2.5.0"
 memmap2 = "0.9.4"
-obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
+obkv = "0.2.2"
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 rayon = "1.10.0"
 roaring = { version = "0.10.6", features = ["serde"] }
 rstar = { version = "0.12.0", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
-serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
 slice-group-by = "0.3.1"
 smallstr = { version = "0.3.0", features = ["serde"] }
 smallvec = "1.13.2"
@ -81,17 +79,17 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.9"
 liquid = "0.26.6"
-rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
-arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
+rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
+arroy = "0.5.0"
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.10.0", features = ["json"] }
 url = "2.5.2"
 rayon-par-bridge = "0.1.0"
-hashbrown = "0.14.5"

 [dev-dependencies]
 mimalloc = { version = "0.1.43", default-features = false }
+big_s = "1.0.2"
 insta = "1.39.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
@ -100,16 +98,7 @@ rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
 all-tokenizations = [
-    "charabia/chinese",
-    "charabia/hebrew",
-    "charabia/japanese",
-    "charabia/thai",
-    "charabia/korean",
-    "charabia/greek",
-    "charabia/khmer",
-    "charabia/vietnamese",
-    "charabia/swedish-recomposition",
-    "charabia/german-segmentation",
+    "charabia/default",
 ]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
@ -148,5 +137,8 @@ german = ["charabia/german-segmentation"]
 # force swedish character recomposition
 swedish-recomposition = ["charabia/swedish-recomposition"]

+# allow turkish specialized tokenization
+turkish = ["charabia/turkish"]
+
 # allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
 cuda = ["candle-core/cuda"]
--- a/milli/src/documents/builder.rs
+++ b/milli/src/documents/builder.rs
@ -292,7 +292,7 @@ mod test {
            .unwrap()
            .into_cursor_and_fields_index();
        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -321,7 +321,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -348,7 +348,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -375,7 +375,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -402,7 +402,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -429,7 +429,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -456,7 +456,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -483,7 +483,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -510,7 +510,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@ -555,7 +555,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
--- a/milli/src/documents/enriched.rs
+++ b/milli/src/documents/enriched.rs
@ -69,7 +69,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {

 #[derive(Debug, Clone)]
 pub struct EnrichedDocument<'a> {
-    pub document: &'a KvReader<FieldId>,
+    pub document: KvReader<'a, FieldId>,
    pub document_id: DocumentId,
 }

--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
 const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();

 /// Helper function to convert an obkv reader into a JSON object.
-pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
+pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
    obkv.iter()
        .map(|(field_id, value)| {
            let field_name = index
@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
        self.0.get_by_right(name).cloned()
    }

-    pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
+    pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
        let mut map = Object::new();

        for (k, v) in document.iter() {
--- a/milli/src/documents/primary_key.rs
+++ b/milli/src/documents/primary_key.rs
@ -1,10 +1,8 @@
-use std::borrow::Cow;
 use std::iter;
 use std::result::Result as StdResult;

-use serde_json::{from_str, Value};
+use serde_json::Value;

-use crate::update::new::{CowStr, TopLevelMap};
 use crate::{FieldId, InternalError, Object, Result, UserError};

 /// The symbol used to define levels in a nested primary key.
@ -54,7 +52,7 @@ impl<'a> PrimaryKey<'a> {

    pub fn document_id(
        &self,
-        document: &obkv::KvReader<FieldId>,
+        document: &obkv::KvReader<'_, FieldId>,
        fields: &impl FieldIdMapper,
    ) -> Result<StdResult<String, DocumentIdExtractionError>> {
        match self {
@ -102,45 +100,6 @@ impl<'a> PrimaryKey<'a> {
        }
    }

-    /// Returns the document ID based on the primary and
-    /// search for it recursively in zero-copy-deserialized documents.
-    pub fn document_id_from_top_level_map<'p>(
-        &self,
-        document: &TopLevelMap<'p>,
-    ) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
-        fn get_docid<'p>(
-            document: &TopLevelMap<'p>,
-            primary_key: &[&str],
-        ) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
-            match primary_key {
-                [] => unreachable!("arrrgh"), // would None be ok?
-                [primary_key] => match document.0.get(*primary_key) {
-                    Some(value) => match from_str::<u64>(value.get()) {
-                        Ok(value) => Ok(Ok(CowStr(Cow::Owned(value.to_string())))),
-                        Err(_) => match from_str(value.get()) {
-                            Ok(document_id) => Ok(Ok(document_id)),
-                            Err(e) => Ok(Err(DocumentIdExtractionError::InvalidDocumentId(
-                                UserError::SerdeJson(e),
-                            ))),
-                        },
-                    },
-                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
-                },
-                [head, tail @ ..] => match document.0.get(*head) {
-                    Some(value) => {
-                        let document = from_str(value.get()).map_err(InternalError::SerdeJson)?;
-                        get_docid(&document, tail)
-                    }
-                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
-                },
-            }
-        }
-
-        /// TODO do not allocate a vec everytime here
-        let primary_key: Vec<_> = self.name().split(PRIMARY_KEY_SPLIT_SYMBOL).collect();
-        get_docid(document, &primary_key)
-    }
-
    /// Returns an `Iterator` that gives all the possible fields names the primary key
    /// can have depending of the first level name and depth of the objects.
    pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
--- a/milli/src/documents/reader.rs
+++ b/milli/src/documents/reader.rs
@ -72,24 +72,15 @@ impl<R> DocumentsBatchCursor<R> {
 }

 impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
-    /// Returns a single document from the database.
-    pub fn get(
-        &mut self,
-        offset: u32,
-    ) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
-        match self.cursor.move_on_key_equal_to(offset.to_be_bytes())? {
-            Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
-            _otherwise => Ok(None),
-        }
-    }
-
    /// Returns the next document, starting from the first one. Subsequent calls to
    /// `next_document` advance the document reader until all the documents have been read.
    pub fn next_document(
        &mut self,
-    ) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
+    ) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
        match self.cursor.move_on_next()? {
-            Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
+            Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
+                Ok(Some(KvReader::new(value)))
+            }
            _otherwise => Ok(None),
        }
    }
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -297,6 +297,7 @@ impl From<arroy::Error> for Error {
            arroy::Error::InvalidVecDimension { expected, received } => {
                Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
            }
+            arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
            arroy::Error::DatabaseFull
            | arroy::Error::InvalidItemAppend
            | arroy::Error::UnmatchingDistance { .. }
--- a/milli/src/fields_ids_map.rs
+++ b/milli/src/fields_ids_map.rs
@ -4,9 +4,6 @@ use serde::{Deserialize, Serialize};

 use crate::FieldId;

-mod global;
-pub use global::GlobalFieldsIdsMap;
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FieldsIdsMap {
    names_ids: BTreeMap<String, FieldId>,
--- a/milli/src/fields_ids_map/global.rs
+++ b/milli/src/fields_ids_map/global.rs
@ -1,86 +0,0 @@
-use std::collections::BTreeMap;
-use std::sync::RwLock;
-
-use crate::{FieldId, FieldsIdsMap};
-
-/// A fields ids map that can be globally updated to add fields
-#[derive(Debug, Clone)]
-pub struct GlobalFieldsIdsMap<'indexing> {
-    global: &'indexing RwLock<FieldsIdsMap>,
-    local: LocalFieldsIdsMap,
-}
-
-#[derive(Debug, Clone)]
-struct LocalFieldsIdsMap {
-    names_ids: BTreeMap<String, FieldId>,
-    ids_names: BTreeMap<FieldId, String>,
-}
-
-impl LocalFieldsIdsMap {
-    fn new(global: &RwLock<FieldsIdsMap>) -> Self {
-        let global = global.read().unwrap();
-        Self { names_ids: global.names_ids.clone(), ids_names: global.ids_names.clone() }
-    }
-
-    fn insert(&mut self, name: &str, field_id: FieldId) {
-        self.names_ids.insert(name.to_owned(), field_id);
-        self.ids_names.insert(field_id, name.to_owned());
-    }
-
-    fn name(&self, id: FieldId) -> Option<&str> {
-        self.ids_names.get(&id).map(String::as_str)
-    }
-
-    fn id(&self, name: &str) -> Option<FieldId> {
-        self.names_ids.get(name).copied()
-    }
-}
-
-impl<'indexing> GlobalFieldsIdsMap<'indexing> {
-    pub fn new(global: &'indexing RwLock<FieldsIdsMap>) -> Self {
-        Self { local: LocalFieldsIdsMap::new(global), global }
-    }
-
-    /// Returns the field id related to a field name, it will create a new field id if the
-    /// name is not already known. Returns `None` if the maximum field id as been reached.
-    pub fn id_or_insert(&mut self, name: &str) -> Option<FieldId> {
-        if let Some(field_id) = self.local.id(name) {
-            return Some(field_id);
-        }
-
-        {
-            // optimistically lookup the global map
-            let global = self.global.read().unwrap();
-
-            if let Some(field_id) = global.id(name) {
-                self.local.insert(name, field_id);
-                return Some(field_id);
-            }
-        }
-
-        {
-            let mut global = self.global.write().unwrap();
-
-            if let Some(field_id) = global.id(name) {
-                self.local.insert(name, field_id);
-                return Some(field_id);
-            }
-
-            let field_id = global.insert(name)?;
-            self.local.insert(name, field_id);
-            Some(field_id)
-        }
-    }
-
-    /// Get the name of a field based on its id.
-    pub fn name(&mut self, id: FieldId) -> Option<&str> {
-        if self.local.name(id).is_none() {
-            let global = self.global.read().unwrap();
-
-            let name = global.name(id)?;
-            self.local.insert(name, id);
-        }
-
-        self.local.name(id)
-    }
-}
--- a/milli/src/heed_codec/obkv_codec.rs
+++ b/milli/src/heed_codec/obkv_codec.rs
@ -6,10 +6,10 @@ use obkv::{KvReaderU16, KvWriterU16};
 pub struct ObkvCodec;

 impl<'a> heed::BytesDecode<'a> for ObkvCodec {
-    type DItem = &'a KvReaderU16;
+    type DItem = KvReaderU16<'a>;

    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
-        Ok(KvReaderU16::from_slice(bytes))
+        Ok(KvReaderU16::new(bytes))
    }
 }

--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -122,7 +122,7 @@ impl CboRoaringBitmapCodec {

    /// Merges a DelAdd delta into a CboRoaringBitmap.
    pub fn merge_deladd_into<'a>(
-        deladd: &KvReaderDelAdd,
+        deladd: KvReaderDelAdd<'_>,
        previous: &[u8],
        buffer: &'a mut Vec<u8>,
    ) -> io::Result<Option<&'a [u8]>> {
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -1251,20 +1251,12 @@ impl Index {

    /* documents */

-    /// Returns a document by using the document id.
-    pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<&'t obkv::KvReaderU16> {
-        self.documents
-            .get(rtxn, &id)?
-            .ok_or(UserError::UnknownInternalDocumentId { document_id: id })
-            .map_err(Into::into)
-    }
-
    /// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
    pub fn iter_documents<'a, 't: 'a>(
        &'a self,
        rtxn: &'t RoTxn<'t>,
        ids: impl IntoIterator<Item = DocumentId> + 'a,
-    ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
        Ok(ids.into_iter().map(move |id| {
            let kv = self
                .documents
@ -1279,7 +1271,7 @@ impl Index {
        &self,
        rtxn: &'t RoTxn<'t>,
        ids: impl IntoIterator<Item = DocumentId>,
-    ) -> Result<Vec<(DocumentId, &'t obkv::KvReaderU16)>> {
+    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
        self.iter_documents(rtxn, ids)?.collect()
    }

@ -1287,7 +1279,7 @@ impl Index {
    pub fn all_documents<'a, 't: 'a>(
        &'a self,
        rtxn: &'t RoTxn<'t>,
-    ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
        self.iter_documents(rtxn, self.documents_ids(rtxn)?)
    }

@ -1311,7 +1303,7 @@ impl Index {
        })?;
        Ok(self.iter_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
            let (_docid, obkv) = entry?;
-            match primary_key.document_id(obkv, &fields)? {
+            match primary_key.document_id(&obkv, &fields)? {
                Ok(document_id) => Ok(document_id),
                Err(_) => Err(InternalError::DocumentsError(
                    crate::documents::Error::InvalidDocumentFormat,
@ -1618,24 +1610,6 @@ impl Index {
            .unwrap_or_default())
    }

-    pub fn arroy_readers<'a>(
-        &'a self,
-        rtxn: &'a RoTxn<'a>,
-        embedder_id: u8,
-        quantized: bool,
-    ) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
-        crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
-            let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
-            // Here we don't care about the dimensions, but we want to know if we can read
-            // in the database or if its metadata are missing because there is no document with that many vectors.
-            match reader.dimensions(rtxn) {
-                Ok(_) => Some(Ok(reader)),
-                Err(arroy::Error::MissingMetadata(_)) => None,
-                Err(e) => Some(Err(e.into())),
-            }
-        })
-    }
-
    pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
        self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
    }
@ -1657,14 +1631,9 @@ impl Index {
        let embedding_configs = self.embedding_configs(rtxn)?;
        for config in embedding_configs {
            let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
-            let embeddings = self
-                .arroy_readers(rtxn, embedder_id, config.config.quantized())
-                .map_while(|reader| {
-                    reader
-                        .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
-                        .transpose()
-                })
-                .collect::<Result<Vec<_>>>()?;
+            let reader =
+                ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
+            let embeddings = reader.item_vectors(rtxn, docid)?;
            res.insert(config.name.to_owned(), embeddings);
        }
        Ok(res)
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -55,7 +55,7 @@ pub use self::error::{
 };
 pub use self::external_documents_ids::ExternalDocumentsIds;
 pub use self::fieldids_weights_map::FieldidsWeightsMap;
-pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
+pub use self::fields_ids_map::FieldsIdsMap;
 pub use self::heed_codec::{
    BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
@ -214,7 +214,7 @@ pub fn bucketed_position(relative: u16) -> u16 {
 pub fn obkv_to_json(
    displayed_fields: &[FieldId],
    fields_ids_map: &FieldsIdsMap,
-    obkv: &obkv::KvReaderU16,
+    obkv: obkv::KvReaderU16<'_>,
 ) -> Result<Object> {
    displayed_fields
        .iter()
@ -232,7 +232,10 @@ pub fn obkv_to_json(
 }

 /// Transform every field of a raw obkv store into a JSON Object.
-pub fn all_obkv_to_json(obkv: &obkv::KvReaderU16, fields_ids_map: &FieldsIdsMap) -> Result<Object> {
+pub fn all_obkv_to_json(
+    obkv: obkv::KvReaderU16<'_>,
+    fields_ids_map: &FieldsIdsMap,
+) -> Result<Object> {
    let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
    obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
 }
@ -431,7 +434,7 @@ mod tests {
        writer.insert(id1, b"1234").unwrap();
        writer.insert(id2, b"4321").unwrap();
        let contents = writer.into_inner().unwrap();
-        let obkv = obkv::KvReaderU16::from_slice(&contents);
+        let obkv = obkv::KvReaderU16::new(&contents);

        let expected = json!({
            "field1": 1234,
--- a/milli/src/prompt/document.rs
+++ b/milli/src/prompt/document.rs
@ -30,13 +30,13 @@ impl ParsedValue {

 impl<'a> Document<'a> {
    pub fn new(
-        data: &'a obkv::KvReaderU16,
+        data: obkv::KvReaderU16<'a>,
        side: DelAdd,
        inverted_field_map: &'a FieldsIdsMap,
    ) -> Self {
        let mut out_data = BTreeMap::new();
        for (fid, raw) in data {
-            let obkv = KvReaderDelAdd::from_slice(raw);
+            let obkv = KvReaderDelAdd::new(raw);
            let Some(raw) = obkv.get(side) else {
                continue;
            };
--- a/milli/src/prompt/mod.rs
+++ b/milli/src/prompt/mod.rs
@ -111,7 +111,7 @@ impl Prompt {

    pub fn render(
        &self,
-        document: &obkv::KvReaderU16,
+        document: obkv::KvReaderU16<'_>,
        side: DelAdd,
        field_id_map: &FieldsIdsMapWithMetadata,
    ) -> Result<String, RenderPromptError> {
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@ -201,7 +201,9 @@ impl<'a> Search<'a> {
                let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
                let _entered = span.enter();

-                match embedder.embed_one(query) {
+                let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
+
+                match embedder.embed_one(query, Some(deadline)) {
                    Ok(embedding) => embedding,
                    Err(error) => {
                        tracing::error!(error=%error, "Embedding failed");
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@ -3,7 +3,6 @@ use std::collections::hash_map::Entry;
 use std::hash::Hash;

 use fxhash::FxHashMap;
-use grenad::MergeFunction;
 use heed::types::Bytes;
 use heed::{BytesEncode, Database, RoTxn};
 use roaring::RoaringBitmap;
@ -12,7 +11,7 @@ use super::interner::Interned;
 use super::Word;
 use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
 use crate::proximity::ProximityPrecision;
-use crate::update::MergeCboRoaringBitmaps;
+use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
 use crate::{
    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
 };
@ -111,21 +110,19 @@ impl<'ctx> DatabaseCache<'ctx> {
            .map_err(Into::into)
    }

-    fn get_value_from_keys<'v, K1, KC, MF>(
+    fn get_value_from_keys<'v, K1, KC>(
        txn: &'ctx RoTxn<'_>,
        cache_key: K1,
        db_keys: &'v [KC::EItem],
        cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
        db: Database<KC, Bytes>,
        universe: Option<&RoaringBitmap>,
-        merger: MF,
+        merger: MergeFn,
    ) -> Result<Option<RoaringBitmap>>
    where
        K1: Copy + Eq + Hash,
        KC: BytesEncode<'v>,
        KC::EItem: Sized,
-        MF: MergeFunction,
-        crate::Error: From<MF::Error>,
    {
        if let Entry::Vacant(entry) = cache.entry(cache_key) {
            let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys {
@ -141,7 +138,7 @@ impl<'ctx> DatabaseCache<'ctx> {
                    if bitmaps.is_empty() {
                        None
                    } else {
-                        Some(merger.merge(&[], &bitmaps[..])?)
+                        Some(merger(&[], &bitmaps[..])?)
                    }
                }
            };
@ -216,17 +213,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys(
+                DatabaseCache::get_value_from_keys::<_, _>(
                    self.txn,
                    word,
                    &keys[..],
                    &mut self.db_cache.word_docids,
                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    MergeCboRoaringBitmaps,
+                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value(
+            None => DatabaseCache::get_value::<_, _>(
                self.txn,
                word,
                self.word_interner.get(word).as_str(),
@ -248,17 +245,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys(
+                DatabaseCache::get_value_from_keys::<_, _>(
                    self.txn,
                    word,
                    &keys[..],
                    &mut self.db_cache.exact_word_docids,
                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    MergeCboRoaringBitmaps,
+                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value(
+            None => DatabaseCache::get_value::<_, _>(
                self.txn,
                word,
                self.word_interner.get(word).as_str(),
@ -305,17 +302,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys(
+                DatabaseCache::get_value_from_keys::<_, _>(
                    self.txn,
                    prefix,
                    &keys[..],
                    &mut self.db_cache.word_prefix_docids,
                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    MergeCboRoaringBitmaps,
+                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value(
+            None => DatabaseCache::get_value::<_, _>(
                self.txn,
                prefix,
                self.word_interner.get(prefix).as_str(),
@ -337,17 +334,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys(
+                DatabaseCache::get_value_from_keys::<_, _>(
                    self.txn,
                    prefix,
                    &keys[..],
                    &mut self.db_cache.exact_word_prefix_docids,
                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    MergeCboRoaringBitmaps,
+                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value(
+            None => DatabaseCache::get_value::<_, _>(
                self.txn,
                prefix,
                self.word_interner.get(prefix).as_str(),
@ -408,7 +405,7 @@ impl<'ctx> SearchContext<'ctx> {

                Ok(docids)
            }
-            ProximityPrecision::ByWord => DatabaseCache::get_value(
+            ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _>(
                self.txn,
                (proximity, word1, word2),
                &(
@ -541,7 +538,7 @@ impl<'ctx> SearchContext<'ctx> {
            return Ok(None);
        }

-        DatabaseCache::get_value(
+        DatabaseCache::get_value::<_, _>(
            self.txn,
            (word, fid),
            &(self.word_interner.get(word).as_str(), fid),
@ -562,7 +559,7 @@ impl<'ctx> SearchContext<'ctx> {
            return Ok(None);
        }

-        DatabaseCache::get_value(
+        DatabaseCache::get_value::<_, _>(
            self.txn,
            (word_prefix, fid),
            &(self.word_interner.get(word_prefix).as_str(), fid),
@ -632,7 +629,7 @@ impl<'ctx> SearchContext<'ctx> {
        word: Interned<String>,
        position: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value(
+        DatabaseCache::get_value::<_, _>(
            self.txn,
            (word, position),
            &(self.word_interner.get(word).as_str(), position),
@ -648,7 +645,7 @@ impl<'ctx> SearchContext<'ctx> {
        word_prefix: Interned<String>,
        position: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value(
+        DatabaseCache::get_value::<_, _>(
            self.txn,
            (word_prefix, position),
            &(self.word_interner.get(word_prefix).as_str(), position),
--- a/milli/src/search/new/vector_sort.rs
+++ b/milli/src/search/new/vector_sort.rs
@ -1,11 +1,10 @@
 use std::iter::FromIterator;

-use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;

 use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
 use crate::score_details::{self, ScoreDetails};
-use crate::vector::{DistributionShift, Embedder};
+use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
 use crate::{DocumentId, Result, SearchContext, SearchLogger};

 pub struct VectorSort<Q: RankingRuleQueryTrait> {
@ -53,14 +52,9 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
        vector_candidates: &RoaringBitmap,
    ) -> Result<()> {
        let target = &self.target;
-        let mut results = Vec::new();

-        for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) {
-            let nns_by_vector =
-                reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
-            results.extend(nns_by_vector.into_iter());
-        }
-        results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
+        let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
+        let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
        self.cached_sorted_docids = results.into_iter();

        Ok(())
--- a/milli/src/search/similar.rs
+++ b/milli/src/search/similar.rs
@ -1,10 +1,9 @@
 use std::sync::Arc;

-use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;

 use crate::score_details::{self, ScoreDetails};
-use crate::vector::Embedder;
+use crate::vector::{ArroyWrapper, Embedder};
 use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};

 pub struct Similar<'a> {
@ -71,23 +70,13 @@ impl<'a> Similar<'a> {
                .get(self.rtxn, &self.embedder_name)?
                .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;

-        let mut results = Vec::new();
-
-        for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) {
-            let nns_by_item = reader?.nns_by_item(
-                self.rtxn,
-                self.id,
-                self.limit + self.offset + 1,
-                Some(&universe),
-            )?;
-            if let Some(mut nns_by_item) = nns_by_item {
-                results.append(&mut nns_by_item);
-            } else {
-                break;
-            }
-        }
-
-        results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
+        let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized);
+        let results = reader.nns_by_item(
+            self.rtxn,
+            self.id,
+            self.limit + self.offset + 1,
+            Some(&universe),
+        )?;

        let mut documents_ids = Vec::with_capacity(self.limit);
        let mut document_scores = Vec::with_capacity(self.limit);
--- a/milli/src/update/available_documents_ids.rs
+++ b/milli/src/update/available_documents_ids.rs
@ -3,12 +3,12 @@ use std::ops::RangeInclusive;

 use roaring::bitmap::{IntoIter, RoaringBitmap};

-pub struct AvailableIds {
+pub struct AvailableDocumentsIds {
    iter: Chain<IntoIter, RangeInclusive<u32>>,
 }

-impl AvailableIds {
-    pub fn new(docids: &RoaringBitmap) -> AvailableIds {
+impl AvailableDocumentsIds {
+    pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
        match docids.max() {
            Some(last_id) => {
                let mut available = RoaringBitmap::from_iter(0..last_id);
@ -20,17 +20,17 @@ impl AvailableIds {
                    None => 1..=0, // empty range iterator
                };

-                AvailableIds { iter: available.into_iter().chain(iter) }
+                AvailableDocumentsIds { iter: available.into_iter().chain(iter) }
            }
            None => {
                let empty = RoaringBitmap::new().into_iter();
-                AvailableIds { iter: empty.chain(0..=u32::MAX) }
+                AvailableDocumentsIds { iter: empty.chain(0..=u32::MAX) }
            }
        }
    }
 }

-impl Iterator for AvailableIds {
+impl Iterator for AvailableDocumentsIds {
    type Item = u32;

    fn next(&mut self) -> Option<Self::Item> {
@ -45,7 +45,7 @@ mod tests {
    #[test]
    fn empty() {
        let base = RoaringBitmap::new();
-        let left = AvailableIds::new(&base);
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = 0..=u32::MAX;
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
@ -58,7 +58,7 @@ mod tests {
        base.insert(100);
        base.insert(405);

-        let left = AvailableIds::new(&base);
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = (0..=u32::MAX).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
--- a/milli/src/update/concurrent_available_ids.rs
+++ b/milli/src/update/concurrent_available_ids.rs
@ -1,59 +0,0 @@
-use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
-
-use roaring::RoaringBitmap;
-
-/// A concurrent ID generate that will never return the same ID twice.
-#[derive(Debug)]
-pub struct ConcurrentAvailableIds {
-    /// The current tree node ID we should use if there is no other IDs available.
-    current: AtomicU32,
-    /// The total number of tree node IDs used.
-    used: AtomicU64,
-
-    /// A list of IDs to exhaust before picking IDs from `current`.
-    available: RoaringBitmap,
-    /// The current Nth ID to select in the bitmap.
-    select_in_bitmap: AtomicU32,
-    /// Tells if you should look in the roaring bitmap or if all the IDs are already exhausted.
-    look_into_bitmap: AtomicBool,
-}
-
-impl ConcurrentAvailableIds {
-    /// Creates an ID generator returning unique IDs, avoiding the specified used IDs.
-    pub fn new(used: RoaringBitmap) -> ConcurrentAvailableIds {
-        let last_id = used.max().map_or(0, |id| id + 1);
-        let used_ids = used.len();
-        let available = RoaringBitmap::from_sorted_iter(0..last_id).unwrap() - used;
-
-        ConcurrentAvailableIds {
-            current: AtomicU32::new(last_id),
-            used: AtomicU64::new(used_ids),
-            select_in_bitmap: AtomicU32::new(0),
-            look_into_bitmap: AtomicBool::new(!available.is_empty()),
-            available,
-        }
-    }
-
-    /// Returns a new unique ID and increase the count of IDs used.
-    pub fn next(&self) -> Option<u32> {
-        if self.used.fetch_add(1, Ordering::Relaxed) > u32::MAX as u64 {
-            None
-        } else if self.look_into_bitmap.load(Ordering::Relaxed) {
-            let current = self.select_in_bitmap.fetch_add(1, Ordering::Relaxed);
-            match self.available.select(current) {
-                Some(id) => Some(id),
-                None => {
-                    self.look_into_bitmap.store(false, Ordering::Relaxed);
-                    Some(self.current.fetch_add(1, Ordering::Relaxed))
-                }
-            }
-        } else {
-            Some(self.current.fetch_add(1, Ordering::Relaxed))
-        }
-    }
-
-    /// Returns the number of used ids in total.
-    pub fn used(&self) -> u64 {
-        self.used.load(Ordering::Relaxed)
-    }
-}
--- a/milli/src/update/del_add.rs
+++ b/milli/src/update/del_add.rs
@ -1,7 +1,7 @@
 use obkv::Key;

 pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
-pub type KvReaderDelAdd = obkv::KvReader<DelAdd>;
+pub type KvReaderDelAdd<'a> = obkv::KvReader<'a, DelAdd>;

 /// DelAdd defines the new value to add in the database and old value to delete from the database.
 ///
@ -36,7 +36,7 @@ impl Key for DelAdd {
 /// Addition: put all the values under DelAdd::Addition,
 /// DeletionAndAddition: put all the values under DelAdd::Deletion and DelAdd::Addition,
 pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
-    reader: &obkv::KvReader<K>,
+    reader: obkv::KvReader<'_, K>,
    operation: DelAddOperation,
    buffer: &mut Vec<u8>,
 ) -> Result<(), std::io::Error> {
@ -46,7 +46,7 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
 /// Akin to the [into_del_add_obkv] function but lets you
 /// conditionally define the `DelAdd` variant based on the obkv key.
 pub fn into_del_add_obkv_conditional_operation<K, F>(
-    reader: &obkv::KvReader<K>,
+    reader: obkv::KvReader<'_, K>,
    buffer: &mut Vec<u8>,
    operation: F,
 ) -> std::io::Result<()>
@ -86,8 +86,8 @@ pub enum DelAddOperation {
 /// putting each deletion obkv's keys under an DelAdd::Deletion
 /// and putting each addition obkv's keys under an DelAdd::Addition
 pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
-    deletion: &obkv::KvReader<K>,
-    addition: &obkv::KvReader<K>,
+    deletion: &obkv::KvReader<'_, K>,
+    addition: &obkv::KvReader<'_, K>,
    buffer: &mut Vec<u8>,
 ) -> Result<(), std::io::Error> {
    use itertools::merge_join_by;
@ -121,7 +121,7 @@ pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
    writer.finish()
 }

-pub fn is_noop_del_add_obkv(del_add: &KvReaderDelAdd) -> bool {
+pub fn is_noop_del_add_obkv(del_add: KvReaderDelAdd<'_>) -> bool {
    del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition)
 }

@ -136,5 +136,5 @@ pub fn deladd_serialize_add_side<'a>(
    obkv: &'a [u8],
    _buffer: &mut Vec<u8>,
 ) -> crate::Result<&'a [u8]> {
-    Ok(KvReaderDelAdd::from_slice(obkv).get(DelAdd::Addition).unwrap_or_default())
+    Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
 }
--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@ -14,7 +14,7 @@ use crate::heed_codec::facet::{
 use crate::heed_codec::BytesRefCodec;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
 use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
-use crate::update::MergeDeladdCboRoaringBitmaps;
+use crate::update::MergeFn;
 use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};

 /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
@ -29,7 +29,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    delta_data: Option<Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>>,
+    delta_data: Option<Merger<BufReader<File>, MergeFn>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@ -37,7 +37,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+        delta_data: Merger<BufReader<File>, MergeFn>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@ -90,7 +90,7 @@ impl<'i> FacetsUpdateBulk<'i> {
 /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
 pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
    pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
-    pub delta_data: Option<Merger<R, MergeDeladdCboRoaringBitmaps>>,
+    pub delta_data: Option<Merger<R, MergeFn>>,
    pub group_size: u8,
    pub min_level_size: u8,
 }
@ -135,7 +135,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
                if !valid_lmdb_key(key) {
                    continue;
                }
-                let value = KvReaderDelAdd::from_slice(value);
+                let value = KvReaderDelAdd::new(value);

                // DB is empty, it is safe to ignore Del operations
                let Some(value) = value.get(DelAdd::Addition) else {
@ -161,7 +161,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
                    continue;
                }

-                let value = KvReaderDelAdd::from_slice(value);
+                let value = KvReaderDelAdd::new(value);

                // the value is a CboRoaringBitmap, but I still need to prepend the
                // group size for level 0 (= 1) to it
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@ -15,7 +15,7 @@ use crate::heed_codec::BytesRefCodec;
 use crate::search::facet::get_highest_level;
 use crate::update::del_add::DelAdd;
 use crate::update::index_documents::valid_lmdb_key;
-use crate::update::MergeDeladdCboRoaringBitmaps;
+use crate::update::MergeFn;
 use crate::{CboRoaringBitmapCodec, Index, Result};

 /// Enum used as a return value for the facet incremental indexing.
@ -57,14 +57,14 @@ enum ModificationResult {
 /// `facet_id_(string/f64)_docids` databases.
 pub struct FacetsUpdateIncremental {
    inner: FacetsUpdateIncrementalInner,
-    delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+    delta_data: Merger<BufReader<File>, MergeFn>,
 }

 impl FacetsUpdateIncremental {
    pub fn new(
        index: &Index,
        facet_type: FacetType,
-        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+        delta_data: Merger<BufReader<File>, MergeFn>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
@ -109,7 +109,7 @@ impl FacetsUpdateIncremental {
            }
            current_field_id = Some(key.field_id);

-            let value = KvReader::from_slice(value);
+            let value = KvReader::new(value);
            let docids_to_delete = value
                .get(DelAdd::Deletion)
                .map(CboRoaringBitmapCodec::bytes_decode)
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -86,11 +86,12 @@ use time::OffsetDateTime;
 use tracing::debug;

 use self::incremental::FacetsUpdateIncremental;
-use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
+use super::FacetsUpdateBulk;
 use crate::facet::FacetType;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
 use crate::heed_codec::BytesRefCodec;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::update::MergeFn;
 use crate::{try_split_array_at, FieldId, Index, Result};

 pub mod bulk;
@ -104,8 +105,8 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
-    normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
+    delta_data: Merger<BufReader<File>, MergeFn>,
+    normalized_delta_data: Option<Merger<BufReader<File>, MergeFn>>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
@ -115,8 +116,8 @@ impl<'i> FacetsUpdate<'i> {
    pub fn new(
        index: &'i Index,
        facet_type: FacetType,
-        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
-        normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
+        delta_data: Merger<BufReader<File>, MergeFn>,
+        normalized_delta_data: Option<Merger<BufReader<File>, MergeFn>>,
        data_size: u64,
    ) -> Self {
        let database = match facet_type {
@ -181,12 +182,12 @@ impl<'i> FacetsUpdate<'i> {

 fn index_facet_search(
    wtxn: &mut heed::RwTxn<'_>,
-    normalized_delta_data: Merger<BufReader<File>, MergeDeladdBtreesetString>,
+    normalized_delta_data: Merger<BufReader<File>, MergeFn>,
    index: &Index,
 ) -> Result<()> {
    let mut iter = normalized_delta_data.into_stream_merger_iter()?;
    while let Some((key_bytes, delta_bytes)) = iter.next()? {
-        let deladd_reader = KvReaderDelAdd::from_slice(delta_bytes);
+        let deladd_reader = KvReaderDelAdd::new(delta_bytes);

        let database_set = index
            .facet_id_normalized_string_strings
@ -297,8 +298,8 @@ pub(crate) mod test_helpers {
    use crate::search::facet::get_highest_level;
    use crate::snapshot_tests::display_bitmap;
    use crate::update::del_add::{DelAdd, KvWriterDelAdd};
-    use crate::update::index_documents::MergeDeladdCboRoaringBitmaps;
-    use crate::update::FacetsUpdateIncrementalInner;
+    use crate::update::index_documents::merge_deladd_cbo_roaring_bitmaps;
+    use crate::update::{FacetsUpdateIncrementalInner, MergeFn};
    use crate::CboRoaringBitmapCodec;

    /// Utility function to generate a string whose position in a lexicographically
@ -483,7 +484,7 @@ pub(crate) mod test_helpers {
            }
            writer.finish().unwrap();
            let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();
-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            builder.push(reader.into_cursor().unwrap());
            let merger = builder.build();

--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@ -47,7 +47,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
                return match cursor.next_document()? {
                    Some(first_document) => Ok(Err(UserError::MissingDocumentId {
                        primary_key: primary_key.to_string(),
-                        document: obkv_to_object(first_document, &documents_batch_index)?,
+                        document: obkv_to_object(&first_document, &documents_batch_index)?,
                    })),
                    None => unreachable!("Called with reader.is_empty()"),
                };
@ -106,7 +106,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
    let mut count = 0;
    while let Some(document) = cursor.next_document()? {
        let document_id = match fetch_or_generate_document_id(
-            document,
+            &document,
            &documents_batch_index,
            primary_key,
            autogenerate_docids,
@ -145,7 +145,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
 #[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
 target = "indexing::documents")]
 fn fetch_or_generate_document_id(
-    document: &obkv::KvReader<FieldId>,
+    document: &obkv::KvReader<'_, FieldId>,
    documents_batch_index: &DocumentsBatchIndex,
    primary_key: PrimaryKey<'_>,
    autogenerate_docids: bool,
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -8,7 +8,7 @@ use obkv::{KvReader, KvWriterU16};
 use roaring::RoaringBitmap;
 use serde_json::Value;

-use super::helpers::{create_sorter, sorter_into_reader, GrenadParameters, KeepLatestObkv};
+use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
 use crate::error::{InternalError, SerializationError};
 use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
@ -35,7 +35,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    let mut documents_ids = RoaringBitmap::new();
    let mut docid_word_positions_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        KeepLatestObkv,
+        keep_latest_obkv,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -80,10 +80,10 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            .try_into()
            .map(u32::from_be_bytes)
            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-        let obkv = KvReader::<FieldId>::from_slice(value);
+        let obkv = KvReader::<FieldId>::new(value);

        // if the searchable fields didn't change, skip the searchable indexing for this document.
-        if !force_reindexing && !searchable_fields_changed(obkv, settings_diff) {
+        if !force_reindexing && !searchable_fields_changed(&obkv, settings_diff) {
            continue;
        }

@ -98,7 +98,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            || {
                // deletions
                tokens_from_document(
-                    obkv,
+                    &obkv,
                    &settings_diff.old,
                    &del_tokenizer,
                    max_positions_per_attributes,
@ -109,7 +109,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            || {
                // additions
                tokens_from_document(
-                    obkv,
+                    &obkv,
                    &settings_diff.new,
                    &add_tokenizer,
                    max_positions_per_attributes,
@ -126,13 +126,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
        value_buffer.clear();
        del_add_from_two_obkvs(
-            KvReader::<FieldId>::from_slice(del_obkv),
-            KvReader::<FieldId>::from_slice(add_obkv),
+            &KvReader::<FieldId>::new(del_obkv),
+            &KvReader::<FieldId>::new(add_obkv),
            &mut value_buffer,
        )?;

        // write each KV<DelAdd, KV<u16, String>> into the sorter, field by field.
-        let obkv = KvReader::<FieldId>::from_slice(&value_buffer);
+        let obkv = KvReader::<FieldId>::new(&value_buffer);
        for (field_id, value) in obkv.iter() {
            key_buffer.truncate(mem::size_of::<u32>());
            key_buffer.extend_from_slice(&field_id.to_be_bytes());
@ -146,13 +146,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(

 /// Check if any searchable fields of a document changed.
 fn searchable_fields_changed(
-    obkv: &KvReader<FieldId>,
+    obkv: &KvReader<'_, FieldId>,
    settings_diff: &InnerIndexSettingsDiff,
 ) -> bool {
    let searchable_fields = &settings_diff.new.searchable_fields_ids;
    for (field_id, field_bytes) in obkv.iter() {
        if searchable_fields.contains(&field_id) {
-            let del_add = KvReaderDelAdd::from_slice(field_bytes);
+            let del_add = KvReaderDelAdd::new(field_bytes);
            match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
                // if both fields are None, check the next field.
                (None, None) => (),
@ -189,7 +189,7 @@ fn tokenizer_builder<'a>(

 /// Extract words mapped with their positions of a document.
 fn tokens_from_document<'a>(
-    obkv: &'a KvReader<FieldId>,
+    obkv: &KvReader<'a, FieldId>,
    settings: &InnerIndexSettings,
    tokenizer: &Tokenizer<'_>,
    max_positions_per_attributes: u32,
@ -202,7 +202,7 @@ fn tokens_from_document<'a>(
        // if field is searchable.
        if settings.searchable_fields_ids.contains(&field_id) {
            // extract deletion or addition only.
-            if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
+            if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
                // parse json.
                let value =
                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@ -4,7 +4,7 @@ use std::io::{self, BufReader};
 use heed::{BytesDecode, BytesEncode};

 use super::helpers::{
-    create_sorter, sorter_into_reader, GrenadParameters, MergeDeladdCboRoaringBitmaps,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
 };
 use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
@ -27,7 +27,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(

    let mut facet_number_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -45,7 +45,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(

        buffer.clear();
        let mut obkv = KvWriterDelAdd::new(&mut buffer);
-        for (deladd_key, _) in KvReaderDelAdd::from_slice(deladd_obkv_bytes).iter() {
+        for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() {
            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
        }
        obkv.finish()?;
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -15,7 +15,7 @@ use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
 use crate::localized_attributes_rules::LocalizedFieldIds;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::helpers::{
-    MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
+    merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
 };
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
@ -56,7 +56,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -65,7 +65,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(

    let mut normalized_facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        MergeDeladdBtreesetString,
+        merge_deladd_btreeset_string,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -75,7 +75,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
    let mut buffer = Vec::new();
    let mut cursor = docid_fid_facet_string.into_cursor()?;
    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
-        let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);
+        let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);

        let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
            && deladd_reader.get(DelAdd::Addition).is_some();
@ -144,7 +144,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -153,7 +153,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(

    let mut normalized_facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        MergeDeladdBtreesetString,
+        merge_deladd_btreeset_string,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -163,7 +163,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
    let mut buffer = Vec::new();
    let mut cursor = docid_fid_facet_string.into_cursor()?;
    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
-        let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);
+        let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);

        let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
            && deladd_reader.get(DelAdd::Addition).is_some();
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -1,8 +1,10 @@
+use std::borrow::Cow;
 use std::collections::{BTreeMap, BTreeSet};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
 use std::mem::size_of;
+use std::result::Result as StdResult;

 use bytemuck::bytes_of;
 use grenad::Sorter;
@ -13,13 +15,13 @@ use roaring::RoaringBitmap;
 use serde_json::{from_slice, Value};
 use FilterableValues::{Empty, Null, Values};

-use super::helpers::{create_sorter, sorter_into_reader, GrenadParameters, KeepFirst};
+use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
 use crate::error::InternalError;
 use crate::facet::value_encoding::f64_into_bytes;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::{create_writer, writer_into_reader};
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
+use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};

 /// The length of the elements that are always in the buffer when inserting new values.
 const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@ -48,7 +50,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(

    let mut fid_docid_facet_numbers_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        KeepFirst,
+        keep_first,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -57,7 +59,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(

    let mut fid_docid_facet_strings_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        KeepFirst,
+        keep_first,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -81,10 +83,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
        let mut cursor = obkv_documents.into_cursor()?;
        while let Some((docid_bytes, value)) = cursor.move_on_next()? {
-            let obkv = obkv::KvReader::from_slice(value);
+            let obkv = obkv::KvReader::new(value);
            let get_document_json_value = move |field_id, side| {
                obkv.get(field_id)
-                    .map(KvReaderDelAdd::from_slice)
+                    .map(KvReaderDelAdd::new)
                    .and_then(|kv| kv.get(side))
                    .map(from_slice)
                    .transpose()
@ -328,12 +330,15 @@ fn truncate_str(s: &str) -> &str {

 /// Computes the diff between both Del and Add numbers and
 /// only inserts the parts that differ in the sorter.
-fn insert_numbers_diff(
-    fid_docid_facet_numbers_sorter: &mut Sorter<KeepFirst>,
+fn insert_numbers_diff<MF>(
+    fid_docid_facet_numbers_sorter: &mut Sorter<MF>,
    key_buffer: &mut Vec<u8>,
    mut del_numbers: Vec<f64>,
    mut add_numbers: Vec<f64>,
-) -> Result<()> {
+) -> Result<()>
+where
+    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
+{
    // We sort and dedup the float numbers
    del_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
    add_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
@ -385,12 +390,15 @@ fn insert_numbers_diff(

 /// Computes the diff between both Del and Add strings and
 /// only inserts the parts that differ in the sorter.
-fn insert_strings_diff(
-    fid_docid_facet_strings_sorter: &mut Sorter<KeepFirst>,
+fn insert_strings_diff<MF>(
+    fid_docid_facet_strings_sorter: &mut Sorter<MF>,
    key_buffer: &mut Vec<u8>,
    mut del_strings: Vec<(String, String)>,
    mut add_strings: Vec<(String, String)>,
-) -> Result<()> {
+) -> Result<()>
+where
+    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
+{
    // We sort and dedup the normalized and original strings
    del_strings.sort_unstable();
    add_strings.sort_unstable();
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@ -4,8 +4,8 @@ use std::io::{self, BufReader};
 use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
-    MergeDeladdCboRoaringBitmaps,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
+    GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
@ -30,7 +30,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(

    let mut fid_word_count_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -45,23 +45,19 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let del_add_reader = KvReaderDelAdd::from_slice(value);
+        let del_add_reader = KvReaderDelAdd::new(value);
        let deletion = del_add_reader
            // get deleted words
            .get(DelAdd::Deletion)
            // count deleted words
-            .map(|deletion| {
-                KvReaderU16::from_slice(deletion).iter().take(MAX_COUNTED_WORDS + 1).count()
-            })
+            .map(|deletion| KvReaderU16::new(deletion).iter().take(MAX_COUNTED_WORDS + 1).count())
            // keep the count if under or equal to MAX_COUNTED_WORDS
            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
        let addition = del_add_reader
            // get added words
            .get(DelAdd::Addition)
            // count added words
-            .map(|addition| {
-                KvReaderU16::from_slice(addition).iter().take(MAX_COUNTED_WORDS + 1).count()
-            })
+            .map(|addition| KvReaderU16::new(addition).iter().take(MAX_COUNTED_WORDS + 1).count())
            // keep the count if under or equal to MAX_COUNTED_WORDS
            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);

--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -29,20 +29,22 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(

    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
-        let obkv = obkv::KvReader::from_slice(value);
+        let obkv = obkv::KvReader::new(value);
        // since we only need the primary key when we throw an error
        // we create this getter to lazily get it when needed
        let document_id = || -> Value {
-            let reader = KvReaderDelAdd::from_slice(obkv.get(primary_key_id).unwrap());
+            let reader = KvReaderDelAdd::new(obkv.get(primary_key_id).unwrap());
            let document_id =
                reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap();
            serde_json::from_slice(document_id).unwrap()
        };

        // extract old version
-        let del_lat_lng = extract_lat_lng(obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
+        let del_lat_lng =
+            extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
        // extract new version
-        let add_lat_lng = extract_lat_lng(obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
+        let add_lat_lng =
+            extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;

        if del_lat_lng != add_lat_lng {
            let mut obkv = KvWriterDelAdd::memory();
@ -66,17 +68,15 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(

 /// Extract the finite floats lat and lng from two bytes slices.
 fn extract_lat_lng(
-    document: &obkv::KvReader<FieldId>,
+    document: &obkv::KvReader<'_, FieldId>,
    settings: &InnerIndexSettings,
    deladd: DelAdd,
    document_id: impl Fn() -> Value,
 ) -> Result<Option<[f64; 2]>> {
    match settings.geo_fields_ids {
        Some((lat_fid, lng_fid)) => {
-            let lat =
-                document.get(lat_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
-            let lng =
-                document.get(lng_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
+            let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
+            let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
            let (lat, lng) = match (lat, lng) {
                (Some(lat), Some(lng)) => (lat, lng),
                (Some(_), None) => {
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@ -313,7 +313,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
        debug_assert!(from_utf8(external_id_bytes).is_ok());
        let docid = DocumentId::from_be_bytes(docid_bytes);

-        let obkv = obkv::KvReader::from_slice(value);
+        let obkv = obkv::KvReader::new(value);
        key_buffer.clear();
        key_buffer.extend_from_slice(docid_bytes.as_slice());

@ -481,7 +481,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
 #[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments
 fn extract_vector_document_diff(
    docid: DocumentId,
-    obkv: &obkv::KvReader<FieldId>,
+    obkv: obkv::KvReader<'_, FieldId>,
    prompt: &Prompt,
    (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
    (old, new): (VectorState, VectorState),
@ -526,7 +526,7 @@ fn extract_vector_document_diff(
            // Do we keep this document?
            let document_is_kept = obkv
                .iter()
-                .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
+                .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
                .any(|deladd| deladd.get(DelAdd::Addition).is_some());

            if document_is_kept {
@ -562,7 +562,7 @@ fn extract_vector_document_diff(
            // Do we keep this document?
            let document_is_kept = obkv
                .iter()
-                .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
+                .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
                .any(|deladd| deladd.get(DelAdd::Addition).is_some());
            if document_is_kept {
                if embedder_is_manual {
@ -588,7 +588,7 @@ fn extract_vector_document_diff(
            // Do we keep this document?
            let document_is_kept = obkv
                .iter()
-                .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
+                .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
                .any(|deladd| deladd.get(DelAdd::Addition).is_some());
            if document_is_kept {
                // if the new version of documents has the vectors in the DB,
@ -606,7 +606,7 @@ fn extract_vector_document_diff(
 }

 fn regenerate_if_prompt_changed(
-    obkv: &obkv::KvReader<FieldId>,
+    obkv: obkv::KvReader<'_, FieldId>,
    (old_prompt, new_prompt): (&Prompt, &Prompt),
    (old_fields_ids_map, new_fields_ids_map): (
        &FieldsIdsMapWithMetadata,
@ -624,7 +624,7 @@ fn regenerate_if_prompt_changed(
 }

 fn regenerate_prompt(
-    obkv: &obkv::KvReader<FieldId>,
+    obkv: obkv::KvReader<'_, FieldId>,
    prompt: &Prompt,
    new_fields_ids_map: &FieldsIdsMapWithMetadata,
 ) -> Result<VectorStateDelta> {
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -7,8 +7,8 @@ use obkv::KvReaderU16;
 use roaring::RoaringBitmap;

 use super::helpers::{
-    create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
-    MergeDeladdCboRoaringBitmaps,
+    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
+    writer_into_reader, GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::heed_codec::StrBEU16Codec;
@ -16,6 +16,7 @@ use crate::index::db_name::DOCID_WORD_POSITIONS;
 use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::helpers::sorter_into_reader;
 use crate::update::settings::InnerIndexSettingsDiff;
+use crate::update::MergeFn;
 use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result};

 /// Extracts the word and the documents ids where this word appear.
@ -39,7 +40,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(

    let mut word_fid_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -57,17 +58,17 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
        let document_id = u32::from_be_bytes(document_id_bytes);
        let fid = u16::from_be_bytes(fid_bytes);

-        let del_add_reader = KvReaderDelAdd::from_slice(value);
+        let del_add_reader = KvReaderDelAdd::new(value);
        // extract all unique words to remove.
        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
-            for (_pos, word) in KvReaderU16::from_slice(deletion).iter() {
+            for (_pos, word) in KvReaderU16::new(deletion).iter() {
                del_words.insert(word.to_vec());
            }
        }

        // extract all unique additional words.
        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-            for (_pos, word) in KvReaderU16::from_slice(addition).iter() {
+            for (_pos, word) in KvReaderU16::new(addition).iter() {
                add_words.insert(word.to_vec());
            }
        }
@ -93,7 +94,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(

    let mut word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -102,7 +103,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(

    let mut exact_word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -114,7 +115,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    // NOTE: replacing sorters by bitmap merging is less efficient, so, use sorters.
    while let Some((key, value)) = iter.next()? {
        // only keep the value if their is a change to apply in the DB.
-        if !is_noop_del_add_obkv(KvReaderDelAdd::from_slice(value)) {
+        if !is_noop_del_add_obkv(KvReaderDelAdd::new(value)) {
            word_fid_docids_writer.insert(key, value)?;
        }

@ -122,7 +123,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
            .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;

        // merge all deletions
-        let obkv = KvReaderDelAdd::from_slice(value);
+        let obkv = KvReaderDelAdd::new(value);
        if let Some(value) = obkv.get(DelAdd::Deletion) {
            let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid);
            buffer.clear();
@ -162,7 +163,7 @@ fn words_into_sorter(
    key_buffer: &mut Vec<u8>,
    del_words: &BTreeSet<Vec<u8>>,
    add_words: &BTreeSet<Vec<u8>>,
-    word_fid_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
+    word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
 ) -> Result<()> {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@ -6,8 +6,8 @@ use std::{cmp, io};
 use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
-    MergeDeladdCboRoaringBitmaps,
+    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
+    writer_into_reader, GrenadParameters, MergeFn,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
@ -44,7 +44,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
        .map(|_| {
            create_sorter(
                grenad::SortAlgorithm::Unstable,
-                MergeDeladdCboRoaringBitmaps,
+                merge_deladd_cbo_roaring_bitmaps,
                indexer.chunk_compression_type,
                indexer.chunk_compression_level,
                indexer.max_nb_chunks,
@ -92,8 +92,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
                }

                // deletions
-                if let Some(deletion) = KvReaderDelAdd::from_slice(value).get(DelAdd::Deletion) {
-                    for (position, word) in KvReaderU16::from_slice(deletion).iter() {
+                if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
+                    for (position, word) in KvReaderU16::new(deletion).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
                        while del_word_positions.front().map_or(false, |(_w, p)| {
                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
@ -125,8 +125,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
                }

                // additions
-                if let Some(addition) = KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
-                    for (position, word) in KvReaderU16::from_slice(addition).iter() {
+                if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
+                    for (position, word) in KvReaderU16::new(addition).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
                        while add_word_positions.front().map_or(false, |(_w, p)| {
                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
@ -197,7 +197,7 @@ fn document_word_positions_into_sorter(
    document_id: DocumentId,
    del_word_pair_proximity: &BTreeMap<(String, String), u8>,
    add_word_pair_proximity: &BTreeMap<(String, String), u8>,
-    word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeDeladdCboRoaringBitmaps>],
+    word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeFn>],
 ) -> Result<()> {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@ -5,13 +5,14 @@ use std::io::{self, BufReader};
 use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
-    MergeDeladdCboRoaringBitmaps,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
+    GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::settings::InnerIndexSettingsDiff;
+use crate::update::MergeFn;
 use crate::{bucketed_position, DocumentId, Result};

 /// Extracts the word positions and the documents ids where this word appear.
@ -28,7 +29,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

    let mut word_position_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        MergeDeladdCboRoaringBitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -59,10 +60,10 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

        current_document_id = Some(document_id);

-        let del_add_reader = KvReaderDelAdd::from_slice(value);
+        let del_add_reader = KvReaderDelAdd::new(value);
        // extract all unique words to remove.
        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
-            for (position, word_bytes) in KvReaderU16::from_slice(deletion).iter() {
+            for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
                let position = bucketed_position(position);
                del_word_positions.insert((position, word_bytes.to_vec()));
            }
@ -70,7 +71,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

        // extract all unique additional words.
        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-            for (position, word_bytes) in KvReaderU16::from_slice(addition).iter() {
+            for (position, word_bytes) in KvReaderU16::new(addition).iter() {
                let position = bucketed_position(position);
                add_word_positions.insert((position, word_bytes.to_vec()));
            }
@ -99,7 +100,7 @@ fn words_position_into_sorter(
    key_buffer: &mut Vec<u8>,
    del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
    add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
-    word_position_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
+    word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
 ) -> Result<()> {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@ -1,10 +1,11 @@
+use std::borrow::Cow;
 use std::fs::File;
 use std::io::{self, BufReader, BufWriter, Seek};

-use grenad::{CompressionType, MergeFunction, Sorter};
+use grenad::{CompressionType, Sorter};
 use heed::types::Bytes;

-use super::ClonableMmap;
+use super::{ClonableMmap, MergeFn};
 use crate::update::index_documents::valid_lmdb_key;
 use crate::Result;

@ -30,14 +31,14 @@ pub fn create_writer<R: io::Write>(
 /// A helper function that creates a grenad sorter
 /// with the given parameters. The max memory is
 /// clamped to something reasonable.
-pub fn create_sorter<MF: MergeFunction>(
+pub fn create_sorter(
    sort_algorithm: grenad::SortAlgorithm,
-    merge: MF,
+    merge: MergeFn,
    chunk_compression_type: grenad::CompressionType,
    chunk_compression_level: Option<u32>,
    max_nb_chunks: Option<usize>,
    max_memory: Option<usize>,
-) -> grenad::Sorter<MF> {
+) -> grenad::Sorter<MergeFn> {
    let mut builder = grenad::Sorter::builder(merge);
    builder.chunk_compression_type(chunk_compression_type);
    if let Some(level) = chunk_compression_level {
@ -56,14 +57,10 @@ pub fn create_sorter<MF: MergeFunction>(
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
-pub fn sorter_into_reader<MF>(
-    sorter: grenad::Sorter<MF>,
+pub fn sorter_into_reader(
+    sorter: grenad::Sorter<MergeFn>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>>
-where
-    MF: MergeFunction,
-    crate::Error: From<MF::Error>,
-{
+) -> Result<grenad::Reader<BufReader<File>>> {
    let mut writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
@ -172,8 +169,8 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
 /// Write provided sorter in database using serialize_value function.
 /// merge_values function is used if an entry already exist in the database.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
-pub fn write_sorter_into_database<K, V, FS, FM, MF>(
-    sorter: Sorter<MF>,
+pub fn write_sorter_into_database<K, V, FS, FM>(
+    sorter: Sorter<MergeFn>,
    database: &heed::Database<K, V>,
    wtxn: &mut heed::RwTxn<'_>,
    index_is_empty: bool,
@ -183,8 +180,6 @@ pub fn write_sorter_into_database<K, V, FS, FM, MF>(
 where
    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
-    MF: MergeFunction,
-    crate::Error: From<MF::Error>,
 {
    let mut buffer = Vec::new();
    let database = database.remap_types::<Bytes, Bytes>();
@ -212,3 +207,8 @@ where

    Ok(())
 }
+
+/// Used when trying to merge readers, but you don't actually care about the values.
+pub fn merge_ignore_values<'a>(_key: &[u8], _values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+    Ok(Cow::Owned(Vec::new()))
+}
--- a/milli/src/update/index_documents/helpers/merge_functions.rs
+++ b/milli/src/update/index_documents/helpers/merge_functions.rs
@ -3,8 +3,6 @@ use std::collections::BTreeSet;
 use std::io;
 use std::result::Result as StdResult;

-use either::Either;
-use grenad::MergeFunction;
 use roaring::RoaringBitmap;

 use crate::heed_codec::CboRoaringBitmapCodec;
@ -12,8 +10,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::transform::Operation;
 use crate::Result;

-pub type EitherObkvMerge =
-    Either<ObkvsKeepLastAdditionMergeDeletions, ObkvsMergeAdditionsAndDeletions>;
+pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;

 pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec<u8>) -> io::Result<()> {
    buffer.clear();
@ -21,53 +18,35 @@ pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec<u8>) ->
    bitmap.serialize_into(buffer)
 }

-pub struct MergeRoaringBitmaps;
-
-impl MergeFunction for MergeRoaringBitmaps {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        if values.len() == 1 {
-            Ok(values[0].clone())
-        } else {
-            let merged = values
-                .iter()
-                .map(AsRef::as_ref)
-                .map(RoaringBitmap::deserialize_from)
-                .map(StdResult::unwrap)
-                .reduce(|a, b| a | b)
-                .unwrap();
-            let mut buffer = Vec::new();
-            serialize_roaring_bitmap(&merged, &mut buffer)?;
-            Ok(Cow::Owned(buffer))
-        }
+pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+    if values.len() == 1 {
+        Ok(values[0].clone())
+    } else {
+        let merged = values
+            .iter()
+            .map(AsRef::as_ref)
+            .map(RoaringBitmap::deserialize_from)
+            .map(StdResult::unwrap)
+            .reduce(|a, b| a | b)
+            .unwrap();
+        let mut buffer = Vec::new();
+        serialize_roaring_bitmap(&merged, &mut buffer)?;
+        Ok(Cow::Owned(buffer))
    }
 }

-pub struct KeepFirst;
-
-impl MergeFunction for KeepFirst {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        Ok(values[0].clone())
-    }
+pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+    Ok(values[0].clone())
 }

 /// Only the last value associated with an id is kept.
-pub struct KeepLatestObkv;
-
-impl MergeFunction for KeepLatestObkv {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        Ok(obkvs.last().unwrap().clone())
-    }
+pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+    Ok(obkvs.last().unwrap().clone())
 }

 pub fn merge_two_del_add_obkvs(
-    base: &obkv::KvReaderU16,
-    update: &obkv::KvReaderU16,
+    base: obkv::KvReaderU16<'_>,
+    update: obkv::KvReaderU16<'_>,
    merge_additions: bool,
    buffer: &mut Vec<u8>,
 ) {
@ -87,7 +66,7 @@ pub fn merge_two_del_add_obkvs(
                    // If merge_additions is false, recreate an obkv keeping the deletions only.
                    value_buffer.clear();
                    let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                    let base_reader = KvReaderDelAdd::from_slice(v);
+                    let base_reader = KvReaderDelAdd::new(v);

                    if let Some(deletion) = base_reader.get(DelAdd::Deletion) {
                        value_writer.insert(DelAdd::Deletion, deletion).unwrap();
@ -101,8 +80,8 @@ pub fn merge_two_del_add_obkvs(
                // merge deletions and additions.
                value_buffer.clear();
                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                let base_reader = KvReaderDelAdd::from_slice(base);
-                let update_reader = KvReaderDelAdd::from_slice(update);
+                let base_reader = KvReaderDelAdd::new(base);
+                let update_reader = KvReaderDelAdd::new(update);

                // keep newest deletion.
                if let Some(deletion) = update_reader
@ -152,8 +131,8 @@ fn inner_merge_del_add_obkvs<'a>(
            break;
        }

-        let newest = obkv::KvReader::from_slice(&acc);
-        let oldest = obkv::KvReader::from_slice(&current[1..]);
+        let newest = obkv::KvReader::new(&acc);
+        let oldest = obkv::KvReader::new(&current[1..]);
        merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer);

        // we want the result of the merge into our accumulator.
@ -166,79 +145,65 @@ fn inner_merge_del_add_obkvs<'a>(
 }

 /// Merge all the obkvs from the newest to the oldest.
-#[derive(Copy, Clone)]
-pub struct ObkvsMergeAdditionsAndDeletions;
-
-impl MergeFunction for ObkvsMergeAdditionsAndDeletions {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        inner_merge_del_add_obkvs(obkvs, true)
-    }
+pub fn obkvs_merge_additions_and_deletions<'a>(
+    _key: &[u8],
+    obkvs: &[Cow<'a, [u8]>],
+) -> Result<Cow<'a, [u8]>> {
+    inner_merge_del_add_obkvs(obkvs, true)
 }

 /// Merge all the obkvs deletions from the newest to the oldest and keep only the newest additions.
-#[derive(Copy, Clone)]
-pub struct ObkvsKeepLastAdditionMergeDeletions;
-
-impl MergeFunction for ObkvsKeepLastAdditionMergeDeletions {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        inner_merge_del_add_obkvs(obkvs, false)
-    }
+pub fn obkvs_keep_last_addition_merge_deletions<'a>(
+    _key: &[u8],
+    obkvs: &[Cow<'a, [u8]>],
+) -> Result<Cow<'a, [u8]>> {
+    inner_merge_del_add_obkvs(obkvs, false)
 }

 /// Do a union of all the CboRoaringBitmaps in the values.
-pub struct MergeCboRoaringBitmaps;
-
-impl MergeFunction for MergeCboRoaringBitmaps {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        if values.len() == 1 {
-            Ok(values[0].clone())
-        } else {
-            let mut vec = Vec::new();
-            CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
-            Ok(Cow::from(vec))
-        }
+pub fn merge_cbo_roaring_bitmaps<'a>(
+    _key: &[u8],
+    values: &[Cow<'a, [u8]>],
+) -> Result<Cow<'a, [u8]>> {
+    if values.len() == 1 {
+        Ok(values[0].clone())
+    } else {
+        let mut vec = Vec::new();
+        CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
+        Ok(Cow::from(vec))
    }
 }

 /// Do a union of CboRoaringBitmaps on both sides of a DelAdd obkv
 /// separately and outputs a new DelAdd with both unions.
-pub struct MergeDeladdCboRoaringBitmaps;
-
-impl MergeFunction for MergeDeladdCboRoaringBitmaps {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        if values.len() == 1 {
-            Ok(values[0].clone())
-        } else {
-            // Retrieve the bitmaps from both sides
-            let mut del_bitmaps_bytes = Vec::new();
-            let mut add_bitmaps_bytes = Vec::new();
-            for value in values {
-                let obkv = KvReaderDelAdd::from_slice(value);
-                if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
-                    del_bitmaps_bytes.push(bitmap_bytes);
-                }
-                if let Some(bitmap_bytes) = obkv.get(DelAdd::Addition) {
-                    add_bitmaps_bytes.push(bitmap_bytes);
-                }
+pub fn merge_deladd_cbo_roaring_bitmaps<'a>(
+    _key: &[u8],
+    values: &[Cow<'a, [u8]>],
+) -> Result<Cow<'a, [u8]>> {
+    if values.len() == 1 {
+        Ok(values[0].clone())
+    } else {
+        // Retrieve the bitmaps from both sides
+        let mut del_bitmaps_bytes = Vec::new();
+        let mut add_bitmaps_bytes = Vec::new();
+        for value in values {
+            let obkv = KvReaderDelAdd::new(value);
+            if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
+                del_bitmaps_bytes.push(bitmap_bytes);
+            }
+            if let Some(bitmap_bytes) = obkv.get(DelAdd::Addition) {
+                add_bitmaps_bytes.push(bitmap_bytes);
            }
-
-            let mut output_deladd_obkv = KvWriterDelAdd::memory();
-            let mut buffer = Vec::new();
-            CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
-            output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
-            buffer.clear();
-            CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
-            output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
-            output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
        }
+
+        let mut output_deladd_obkv = KvWriterDelAdd::memory();
+        let mut buffer = Vec::new();
+        CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
+        output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
+        buffer.clear();
+        CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
+        output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
+        output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
    }
 }

@ -252,7 +217,7 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
    buffer: &'a mut Vec<u8>,
 ) -> Result<Option<&'a [u8]>> {
    Ok(CboRoaringBitmapCodec::merge_deladd_into(
-        KvReaderDelAdd::from_slice(deladd_obkv),
+        KvReaderDelAdd::new(deladd_obkv),
        previous,
        buffer,
    )?)
@ -260,55 +225,37 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(

 /// Do a union of BtreeSet on both sides of a DelAdd obkv
 /// separately and outputs a new DelAdd with both unions.
-pub struct MergeDeladdBtreesetString;
-
-impl MergeFunction for MergeDeladdBtreesetString {
-    type Error = crate::Error;
-
-    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-        if values.len() == 1 {
-            Ok(values[0].clone())
-        } else {
-            // Retrieve the bitmaps from both sides
-            let mut del_set = BTreeSet::new();
-            let mut add_set = BTreeSet::new();
-            for value in values {
-                let obkv = KvReaderDelAdd::from_slice(value);
-                if let Some(bytes) = obkv.get(DelAdd::Deletion) {
-                    let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
-                    for value in set {
-                        del_set.insert(value);
-                    }
-                }
-                if let Some(bytes) = obkv.get(DelAdd::Addition) {
-                    let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
-                    for value in set {
-                        add_set.insert(value);
-                    }
+pub fn merge_deladd_btreeset_string<'a>(
+    _key: &[u8],
+    values: &[Cow<'a, [u8]>],
+) -> Result<Cow<'a, [u8]>> {
+    if values.len() == 1 {
+        Ok(values[0].clone())
+    } else {
+        // Retrieve the bitmaps from both sides
+        let mut del_set = BTreeSet::new();
+        let mut add_set = BTreeSet::new();
+        for value in values {
+            let obkv = KvReaderDelAdd::new(value);
+            if let Some(bytes) = obkv.get(DelAdd::Deletion) {
+                let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
+                for value in set {
+                    del_set.insert(value);
+                }
+            }
+            if let Some(bytes) = obkv.get(DelAdd::Addition) {
+                let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
+                for value in set {
+                    add_set.insert(value);
                }
            }
-
-            let mut output_deladd_obkv = KvWriterDelAdd::memory();
-            let del = serde_json::to_vec(&del_set).unwrap();
-            output_deladd_obkv.insert(DelAdd::Deletion, &del)?;
-            let add = serde_json::to_vec(&add_set).unwrap();
-            output_deladd_obkv.insert(DelAdd::Addition, &add)?;
-            output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
        }
-    }
-}
-
-/// Used when trying to merge readers, but you don't actually care about the values.
-pub struct MergeIgnoreValues;
-
-impl MergeFunction for MergeIgnoreValues {
-    type Error = crate::Error;
-
-    fn merge<'a>(
-        &self,
-        _key: &[u8],
-        _values: &[Cow<'a, [u8]>],
-    ) -> std::result::Result<Cow<'a, [u8]>, Self::Error> {
-        Ok(Cow::Owned(Vec::new()))
+
+        let mut output_deladd_obkv = KvWriterDelAdd::memory();
+        let del = serde_json::to_vec(&del_set).unwrap();
+        output_deladd_obkv.insert(DelAdd::Deletion, &del)?;
+        let add = serde_json::to_vec(&add_set).unwrap();
+        output_deladd_obkv.insert(DelAdd::Addition, &add)?;
+        output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
    }
 }
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@ -7,8 +7,17 @@ use std::convert::{TryFrom, TryInto};

 pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
 use fst::{IntoStreamer, Streamer};
-pub use grenad_helpers::*;
-pub use merge_functions::*;
+pub use grenad_helpers::{
+    as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
+    merge_ignore_values, sorter_into_reader, write_sorter_into_database, writer_into_reader,
+    GrenadParameters,
+};
+pub use merge_functions::{
+    keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_deladd_btreeset_string,
+    merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
+    merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions,
+    obkvs_merge_additions_and_deletions, MergeFn,
+};

 use crate::MAX_WORD_LENGTH;

--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -27,7 +27,13 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};

 use self::enrich::enrich_documents_batch;
 pub use self::enrich::{extract_finite_float_from_value, DocumentId};
-pub use self::helpers::*;
+pub use self::helpers::{
+    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
+    fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps,
+    merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, merge_roaring_bitmaps,
+    valid_lmdb_key, write_sorter_into_database, writer_into_reader, MergeFn,
+};
+use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
 pub use self::transform::{Transform, TransformOutput};
 use crate::documents::{obkv_to_object, DocumentsBatchBuilder, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
@ -599,7 +605,7 @@ where
                                let cloneable_chunk =
                                    unsafe { as_cloneable_grenad(&word_docids_reader)? };
                                let word_docids = word_docids.get_or_insert_with(|| {
-                                    MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
+                                    MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
                                });
                                word_docids.push(cloneable_chunk.into_cursor()?);
                                let cloneable_chunk =
@ -607,14 +613,14 @@ where
                                let exact_word_docids =
                                    exact_word_docids.get_or_insert_with(|| {
                                        MergerBuilder::new(
-                                            MergeDeladdCboRoaringBitmaps,
+                                            merge_deladd_cbo_roaring_bitmaps as MergeFn,
                                        )
                                    });
                                exact_word_docids.push(cloneable_chunk.into_cursor()?);
                                let cloneable_chunk =
                                    unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
                                let word_fid_docids = word_fid_docids.get_or_insert_with(|| {
-                                    MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
+                                    MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
                                });
                                word_fid_docids.push(cloneable_chunk.into_cursor()?);
                                TypedChunk::WordDocids {
@ -628,7 +634,7 @@ where
                                let word_position_docids =
                                    word_position_docids.get_or_insert_with(|| {
                                        MergerBuilder::new(
-                                            MergeDeladdCboRoaringBitmaps,
+                                            merge_deladd_cbo_roaring_bitmaps as MergeFn,
                                        )
                                    });
                                word_position_docids.push(cloneable_chunk.into_cursor()?);
@ -683,9 +689,8 @@ where
                        key: None,
                    },
                )?;
-                let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
                let reader =
-                    ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
+                    ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized);
                let dim = reader.dimensions(self.wtxn)?;
                dimension.insert(name.to_string(), dim);
            }
@ -694,6 +699,7 @@ where
        for (embedder_name, dimension) in dimension {
            let wtxn = &mut *self.wtxn;
            let vector_arroy = self.index.vector_arroy;
+            let cancel = &self.should_abort;

            let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
                InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
@ -707,17 +713,8 @@ where
            let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);

            pool.install(|| {
-                for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
-                    let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
-                    if is_quantizing {
-                        writer.quantize(wtxn, k, dimension)?;
-                    }
-                    if writer.need_build(wtxn, dimension)? {
-                        writer.build(wtxn, &mut rng, dimension)?;
-                    } else if writer.is_empty(wtxn, dimension)? {
-                        break;
-                    }
-                }
+                let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
+                writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?;
                Result::Ok(())
            })
            .map_err(InternalError::from)??;
@ -741,10 +738,10 @@ where
    )]
    pub fn execute_prefix_databases(
        self,
-        word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
-        exact_word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
-        word_position_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
-        word_fid_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
+        word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
+        exact_word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
+        word_position_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
+        word_fid_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
    ) -> Result<()>
    where
        FP: Fn(UpdateIndexingStep) + Sync,
@ -924,7 +921,7 @@ where
 )]
 fn execute_word_prefix_docids(
    txn: &mut heed::RwTxn<'_>,
-    merger: Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>,
+    merger: Merger<CursorClonableMmap, MergeFn>,
    word_docids_db: Database<Str, CboRoaringBitmapCodec>,
    word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
    indexer_config: &IndexerConfig,
--- a/milli/src/update/index_documents/parallel.rs
+++ b/milli/src/update/index_documents/parallel.rs
@ -31,14 +31,14 @@ impl<'t> ImmutableObkvs<'t> {
    }

    /// Returns the OBKVs identified by the given ID.
-    pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<&'t KvReaderU16>> {
+    pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<KvReaderU16<'t>>> {
        match self
            .ids
            .rank(docid)
            .checked_sub(1)
            .and_then(|offset| self.slices.get(offset as usize))
        {
-            Some(&bytes) => Ok(Some(bytes.into())),
+            Some(bytes) => Ok(Some(KvReaderU16::new(bytes))),
            None => Ok(None),
        }
    }
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@ -5,7 +5,6 @@ use std::collections::{BTreeMap, HashMap, HashSet};
 use std::fs::File;
 use std::io::{Read, Seek};

-use either::Either;
 use fxhash::FxHashMap;
 use itertools::Itertools;
 use obkv::{KvReader, KvReaderU16, KvWriter};
@ -14,10 +13,10 @@ use serde_json::Value;
 use smartstring::SmartString;

 use super::helpers::{
-    create_sorter, create_writer, sorter_into_reader, EitherObkvMerge,
-    ObkvsKeepLastAdditionMergeDeletions, ObkvsMergeAdditionsAndDeletions,
+    create_sorter, create_writer, keep_first, obkvs_keep_last_addition_merge_deletions,
+    obkvs_merge_additions_and_deletions, sorter_into_reader, MergeFn,
 };
-use super::{IndexDocumentsMethod, IndexerConfig, KeepFirst};
+use super::{IndexDocumentsMethod, IndexerConfig};
 use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::{db_name, main_key};
@ -27,7 +26,7 @@ use crate::update::del_add::{
 };
 use crate::update::index_documents::GrenadParameters;
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
-use crate::update::{AvailableIds, UpdateIndexingStep};
+use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
 use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
 use crate::vector::settings::WriteBackToDocuments;
 use crate::vector::ArroyWrapper;
@ -56,13 +55,13 @@ pub struct Transform<'a, 'i> {

    indexer_settings: &'a IndexerConfig,
    pub index_documents_method: IndexDocumentsMethod,
-    available_documents_ids: AvailableIds,
+    available_documents_ids: AvailableDocumentsIds,

    // Both grenad follows the same format:
    // key | value
    // u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
-    original_sorter: grenad::Sorter<EitherObkvMerge>,
-    flattened_sorter: grenad::Sorter<EitherObkvMerge>,
+    original_sorter: grenad::Sorter<MergeFn>,
+    flattened_sorter: grenad::Sorter<MergeFn>,

    replaced_documents_ids: RoaringBitmap,
    new_documents_ids: RoaringBitmap,
@ -110,19 +109,17 @@ impl<'a, 'i> Transform<'a, 'i> {
        index_documents_method: IndexDocumentsMethod,
        _autogenerate_docids: bool,
    ) -> Result<Self> {
-        use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments};
-
        // We must choose the appropriate merge function for when two or more documents
        // with the same user id must be merged or fully replaced in the same batch.
        let merge_function = match index_documents_method {
-            ReplaceDocuments => Either::Left(ObkvsKeepLastAdditionMergeDeletions),
-            UpdateDocuments => Either::Right(ObkvsMergeAdditionsAndDeletions),
+            IndexDocumentsMethod::ReplaceDocuments => obkvs_keep_last_addition_merge_deletions,
+            IndexDocumentsMethod::UpdateDocuments => obkvs_merge_additions_and_deletions,
        };

        // We initialize the sorter with the user indexing settings.
        let original_sorter = create_sorter(
            grenad::SortAlgorithm::Stable,
-            merge_function.clone(),
+            merge_function,
            indexer_settings.chunk_compression_type,
            indexer_settings.chunk_compression_level,
            indexer_settings.max_nb_chunks,
@ -144,7 +141,7 @@ impl<'a, 'i> Transform<'a, 'i> {
            index,
            fields_ids_map: index.fields_ids_map(wtxn)?,
            indexer_settings,
-            available_documents_ids: AvailableIds::new(&documents_ids),
+            available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids),
            original_sorter,
            flattened_sorter,
            index_documents_method,
@ -282,21 +279,21 @@ impl<'a, 'i> Transform<'a, 'i> {
                    document_sorter_value_buffer.clear();
                    document_sorter_value_buffer.push(Operation::Addition as u8);
                    into_del_add_obkv(
-                        KvReaderU16::from_slice(base_obkv),
+                        KvReaderU16::new(base_obkv),
                        deladd_operation,
                        &mut document_sorter_value_buffer,
                    )?;
                    self.original_sorter
                        .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
-                    let base_obkv = KvReader::from_slice(base_obkv);
+                    let base_obkv = KvReader::new(base_obkv);
                    if let Some(flattened_obkv) =
-                        Self::flatten_from_fields_ids_map(base_obkv, &mut self.fields_ids_map)?
+                        Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)?
                    {
                        // we recreate our buffer with the flattened documents
                        document_sorter_value_buffer.clear();
                        document_sorter_value_buffer.push(Operation::Addition as u8);
                        into_del_add_obkv(
-                            KvReaderU16::from_slice(&flattened_obkv),
+                            KvReaderU16::new(&flattened_obkv),
                            deladd_operation,
                            &mut document_sorter_value_buffer,
                        )?;
@ -315,7 +312,7 @@ impl<'a, 'i> Transform<'a, 'i> {
                document_sorter_value_buffer.clear();
                document_sorter_value_buffer.push(Operation::Addition as u8);
                into_del_add_obkv(
-                    KvReaderU16::from_slice(&obkv_buffer),
+                    KvReaderU16::new(&obkv_buffer),
                    DelAddOperation::Addition,
                    &mut document_sorter_value_buffer,
                )?;
@ -323,14 +320,14 @@ impl<'a, 'i> Transform<'a, 'i> {
                self.original_sorter
                    .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;

-                let flattened_obkv = KvReader::from_slice(&obkv_buffer);
+                let flattened_obkv = KvReader::new(&obkv_buffer);
                if let Some(obkv) =
-                    Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
+                    Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
                {
                    document_sorter_value_buffer.clear();
                    document_sorter_value_buffer.push(Operation::Addition as u8);
                    into_del_add_obkv(
-                        KvReaderU16::from_slice(&obkv),
+                        KvReaderU16::new(&obkv),
                        DelAddOperation::Addition,
                        &mut document_sorter_value_buffer,
                    )?
@ -523,22 +520,22 @@ impl<'a, 'i> Transform<'a, 'i> {
        document_sorter_value_buffer.clear();
        document_sorter_value_buffer.push(Operation::Deletion as u8);
        into_del_add_obkv(
-            KvReaderU16::from_slice(base_obkv),
+            KvReaderU16::new(base_obkv),
            DelAddOperation::Deletion,
            document_sorter_value_buffer,
        )?;
        self.original_sorter.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;

        // flatten it and push it as to delete in the flattened_sorter
-        let flattened_obkv = KvReader::from_slice(base_obkv);
+        let flattened_obkv = KvReader::new(base_obkv);
        if let Some(obkv) =
-            Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
+            Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
        {
            // we recreate our buffer with the flattened documents
            document_sorter_value_buffer.clear();
            document_sorter_value_buffer.push(Operation::Deletion as u8);
            into_del_add_obkv(
-                KvReaderU16::from_slice(&obkv),
+                KvReaderU16::new(&obkv),
                DelAddOperation::Deletion,
                document_sorter_value_buffer,
            )?;
@ -556,7 +553,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        target = "indexing::transform"
    )]
    fn flatten_from_fields_ids_map(
-        obkv: &KvReader<FieldId>,
+        obkv: &KvReader<'_, FieldId>,
        fields_ids_map: &mut FieldsIdsMap,
    ) -> Result<Option<Vec<u8>>> {
        if obkv
@ -724,10 +721,10 @@ impl<'a, 'i> Transform<'a, 'i> {
                total_documents: self.documents_count,
            });

-            for (key, value) in KvReader::from_slice(val) {
-                let reader = KvReaderDelAdd::from_slice(value);
+            for (key, value) in KvReader::new(val) {
+                let reader = KvReaderDelAdd::new(value);
                match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
-                    (None, None) => (),
+                    (None, None) => {}
                    (None, Some(_)) => {
                        // New field
                        let name = self.fields_ids_map.name(key).ok_or(
@ -841,7 +838,7 @@ impl<'a, 'i> Transform<'a, 'i> {
    /// then fill the provided buffers with delta documents using KvWritterDelAdd.
    #[allow(clippy::too_many_arguments)] // need the vectors + fid, feel free to create a struct xo xo
    fn rebind_existing_document(
-        old_obkv: &KvReader<FieldId>,
+        old_obkv: KvReader<'_, FieldId>,
        settings_diff: &InnerIndexSettingsDiff,
        modified_faceted_fields: &HashSet<String>,
        mut injected_vectors: serde_json::Map<String, serde_json::Value>,
@ -929,7 +926,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        }

        let data = obkv_writer.into_inner()?;
-        let obkv = KvReader::<FieldId>::from_slice(&data);
+        let obkv = KvReader::<FieldId>::new(&data);

        if let Some(original_obkv_buffer) = original_obkv_buffer {
            original_obkv_buffer.clear();
@ -939,8 +936,8 @@ impl<'a, 'i> Transform<'a, 'i> {
        if let Some(flattened_obkv_buffer) = flattened_obkv_buffer {
            // take the non-flattened version if flatten_from_fields_ids_map returns None.
            let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
-            let flattened = Self::flatten_from_fields_ids_map(obkv, &mut fields_ids_map)?;
-            let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice);
+            let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?;
+            let flattened = flattened.as_deref().map_or(obkv, KvReader::new);

            flattened_obkv_buffer.clear();
            into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
@ -983,7 +980,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        let mut original_sorter = if settings_diff.reindex_vectors() {
            Some(create_sorter(
                grenad::SortAlgorithm::Stable,
-                KeepFirst,
+                keep_first,
                self.indexer_settings.chunk_compression_type,
                self.indexer_settings.chunk_compression_level,
                self.indexer_settings.max_nb_chunks,
@ -993,27 +990,24 @@ impl<'a, 'i> Transform<'a, 'i> {
            None
        };

-        let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
+        let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
            .embedding_config_updates
            .iter()
            .filter_map(|(name, action)| {
                if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
                    action.write_back()
                {
-                    let readers: Result<Vec<_>> = self
-                        .index
-                        .arroy_readers(wtxn, *embedder_id, action.was_quantized)
-                        .collect();
-                    match readers {
-                        Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
-                        Err(error) => Some(Err(error)),
-                    }
+                    let reader = ArroyWrapper::new(
+                        self.index.vector_arroy,
+                        *embedder_id,
+                        action.was_quantized,
+                    );
+                    Some((name.as_str(), (reader, user_provided)))
                } else {
                    None
                }
            })
            .collect();
-        let readers = readers?;

        let old_vectors_fid = settings_diff
            .old
@ -1025,7 +1019,7 @@ impl<'a, 'i> Transform<'a, 'i> {
            if settings_diff.reindex_searchable() || settings_diff.reindex_facets() {
                Some(create_sorter(
                    grenad::SortAlgorithm::Stable,
-                    KeepFirst,
+                    keep_first,
                    self.indexer_settings.chunk_compression_type,
                    self.indexer_settings.chunk_compression_level,
                    self.indexer_settings.max_nb_chunks,
@ -1051,34 +1045,24 @@ impl<'a, 'i> Transform<'a, 'i> {
                    arroy::Error,
                > = readers
                    .iter()
-                    .filter_map(|(name, (readers, user_provided))| {
+                    .filter_map(|(name, (reader, user_provided))| {
                        if !user_provided.contains(docid) {
                            return None;
                        }
-                        let mut vectors = Vec::new();
-                        for reader in readers {
-                            let Some(vector) = reader.item_vector(wtxn, docid).transpose() else {
-                                break;
-                            };
-
-                            match vector {
-                                Ok(vector) => vectors.push(vector),
-                                Err(error) => return Some(Err(error)),
-                            }
+                        match reader.item_vectors(wtxn, docid) {
+                            Ok(vectors) if vectors.is_empty() => None,
+                            Ok(vectors) => Some(Ok((
+                                name.to_string(),
+                                serde_json::to_value(ExplicitVectors {
+                                    embeddings: Some(
+                                        VectorOrArrayOfVectors::from_array_of_vectors(vectors),
+                                    ),
+                                    regenerate: false,
+                                })
+                                .unwrap(),
+                            ))),
+                            Err(e) => Some(Err(e)),
                        }
-                        if vectors.is_empty() {
-                            return None;
-                        }
-                        Some(Ok((
-                            name.to_string(),
-                            serde_json::to_value(ExplicitVectors {
-                                embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
-                                    vectors,
-                                )),
-                                regenerate: false,
-                            })
-                            .unwrap(),
-                        )))
                    })
                    .collect();

@ -1107,11 +1091,9 @@ impl<'a, 'i> Transform<'a, 'i> {
        }

        // delete all vectors from the embedders that need removal
-        for (_, (readers, _)) in readers {
-            for reader in readers {
-                let dimensions = reader.dimensions(wtxn)?;
-                reader.clear(wtxn, dimensions)?;
-            }
+        for (_, (reader, _)) in readers {
+            let dimensions = reader.dimensions(wtxn)?;
+            reader.clear(wtxn, dimensions)?;
        }

        let grenad_params = GrenadParameters {
@ -1155,8 +1137,6 @@ fn drop_and_reuse<U, T>(mut vec: Vec<U>) -> Vec<T> {

 #[cfg(test)]
 mod test {
-    use grenad::MergeFunction;
-
    use super::*;

    #[test]
@ -1168,21 +1148,21 @@ mod test {
        kv_writer.insert(0_u8, [0]).unwrap();
        let buffer = kv_writer.into_inner().unwrap();
        into_del_add_obkv(
-            KvReaderU16::from_slice(&buffer),
+            KvReaderU16::new(&buffer),
            DelAddOperation::Addition,
            &mut additive_doc_0,
        )
        .unwrap();
        additive_doc_0.insert(0, Operation::Addition as u8);
        into_del_add_obkv(
-            KvReaderU16::from_slice(&buffer),
+            KvReaderU16::new(&buffer),
            DelAddOperation::Deletion,
            &mut deletive_doc_0,
        )
        .unwrap();
        deletive_doc_0.insert(0, Operation::Deletion as u8);
        into_del_add_obkv(
-            KvReaderU16::from_slice(&buffer),
+            KvReaderU16::new(&buffer),
            DelAddOperation::DeletionAndAddition,
            &mut del_add_doc_0,
        )
@ -1194,7 +1174,7 @@ mod test {
        kv_writer.insert(1_u8, [1]).unwrap();
        let buffer = kv_writer.into_inner().unwrap();
        into_del_add_obkv(
-            KvReaderU16::from_slice(&buffer),
+            KvReaderU16::new(&buffer),
            DelAddOperation::Addition,
            &mut additive_doc_1,
        )
@ -1207,39 +1187,32 @@ mod test {
        kv_writer.insert(1_u8, [1]).unwrap();
        let buffer = kv_writer.into_inner().unwrap();
        into_del_add_obkv(
-            KvReaderU16::from_slice(&buffer),
+            KvReaderU16::new(&buffer),
            DelAddOperation::Addition,
            &mut additive_doc_0_1,
        )
        .unwrap();
        additive_doc_0_1.insert(0, Operation::Addition as u8);

-        let ret = MergeFunction::merge(
-            &ObkvsMergeAdditionsAndDeletions,
-            &[],
-            &[Cow::from(additive_doc_0.as_slice())],
-        )
-        .unwrap();
+        let ret = obkvs_merge_additions_and_deletions(&[], &[Cow::from(additive_doc_0.as_slice())])
+            .unwrap();
        assert_eq!(*ret, additive_doc_0);

-        let ret = MergeFunction::merge(
-            &ObkvsMergeAdditionsAndDeletions,
+        let ret = obkvs_merge_additions_and_deletions(
            &[],
            &[Cow::from(deletive_doc_0.as_slice()), Cow::from(additive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, del_add_doc_0);

-        let ret = MergeFunction::merge(
-            &ObkvsMergeAdditionsAndDeletions,
+        let ret = obkvs_merge_additions_and_deletions(
            &[],
            &[Cow::from(additive_doc_0.as_slice()), Cow::from(deletive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, deletive_doc_0);

-        let ret = MergeFunction::merge(
-            &ObkvsMergeAdditionsAndDeletions,
+        let ret = obkvs_merge_additions_and_deletions(
            &[],
            &[
                Cow::from(additive_doc_1.as_slice()),
@ -1250,24 +1223,21 @@ mod test {
        .unwrap();
        assert_eq!(*ret, del_add_doc_0);

-        let ret = MergeFunction::merge(
-            &ObkvsMergeAdditionsAndDeletions,
+        let ret = obkvs_merge_additions_and_deletions(
            &[],
            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, additive_doc_0_1);

-        let ret = MergeFunction::merge(
-            &ObkvsKeepLastAdditionMergeDeletions,
+        let ret = obkvs_keep_last_addition_merge_deletions(
            &[],
            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, additive_doc_0);

-        let ret = MergeFunction::merge(
-            &ObkvsKeepLastAdditionMergeDeletions,
+        let ret = obkvs_keep_last_addition_merge_deletions(
            &[],
            &[
                Cow::from(deletive_doc_0.as_slice()),
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -4,17 +4,18 @@ use std::fs::File;
 use std::io::{self, BufReader};

 use bytemuck::allocation::pod_collect_to_vec;
-use grenad::{MergeFunction, Merger, MergerBuilder};
+use grenad::{Merger, MergerBuilder};
 use heed::types::Bytes;
 use heed::{BytesDecode, RwTxn};
 use obkv::{KvReader, KvWriter};
 use roaring::RoaringBitmap;

 use super::helpers::{
-    self, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
-    CursorClonableMmap, KeepFirst, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
-    MergeIgnoreValues,
+    self, keep_first, merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
+    merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, merge_ignore_values, valid_lmdb_key,
+    CursorClonableMmap,
 };
+use super::MergeFn;
 use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
 use crate::facet::FacetType;
 use crate::index::db_name::DOCUMENTS;
@ -23,7 +24,7 @@ use crate::proximity::MAX_DISTANCE;
 use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
 use crate::update::facet::FacetsUpdate;
 use crate::update::index_documents::helpers::{
-    as_cloneable_grenad, try_split_array_at, KeepLatestObkv,
+    as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
 };
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::vector::ArroyWrapper;
@ -140,7 +141,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let vectors_fid =
                fields_ids_map.id(crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);

-            let mut builder = MergerBuilder::new(KeepLatestObkv);
+            let mut builder = MergerBuilder::new(keep_latest_obkv as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::Documents(chunk) = typed_chunk else {
                    unreachable!();
@ -162,7 +163,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let mut vectors_buffer = Vec::new();
            while let Some((key, reader)) = iter.next()? {
                let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
-                let reader: &KvReader<FieldId> = reader.into();
+                let reader: KvReader<'_, FieldId> = KvReader::new(reader);

                let (document_id_bytes, external_id_bytes) = try_split_array_at(key)
                    .ok_or(SerializationError::Decoding { db_name: Some(DOCUMENTS) })?;
@ -170,7 +171,7 @@ pub(crate) fn write_typed_chunk_into_index(
                let external_id = std::str::from_utf8(external_id_bytes)?;

                for (field_id, value) in reader.iter() {
-                    let del_add_reader = KvReaderDelAdd::from_slice(value);
+                    let del_add_reader = KvReaderDelAdd::new(value);

                    if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
                        let addition = if vectors_fid == Some(field_id) {
@ -234,7 +235,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdWordCountDocids(chunk) = typed_chunk else {
                    unreachable!();
@ -257,10 +258,13 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
            let _entered = span.enter();

-            let mut word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
-            let mut exact_word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
-            let mut word_fid_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
-            let mut fst_merger_builder = MergerBuilder::new(MergeIgnoreValues);
+            let mut word_docids_builder =
+                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut exact_word_docids_builder =
+                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut word_fid_docids_builder =
+                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut fst_merger_builder = MergerBuilder::new(merge_ignore_values as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::WordDocids {
                    word_docids_reader,
@ -325,7 +329,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::WordPositionDocids(chunk) = typed_chunk else {
                    unreachable!();
@ -349,7 +353,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            let mut data_size = 0;
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids) = typed_chunk
@ -371,9 +375,10 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
            let _entered = span.enter();

-            let mut facet_id_string_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut facet_id_string_builder =
+                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            let mut normalized_facet_id_string_builder =
-                MergerBuilder::new(MergeDeladdBtreesetString);
+                MergerBuilder::new(merge_deladd_btreeset_string as MergeFn);
            let mut data_size = 0;
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetStringDocids((
@ -407,7 +412,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetExistsDocids(chunk) = typed_chunk else {
                    unreachable!();
@ -431,7 +436,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetIsNullDocids(chunk) = typed_chunk else {
                    unreachable!();
@ -454,7 +459,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetIsEmptyDocids(chunk) = typed_chunk else {
                    unreachable!();
@ -478,7 +483,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::WordPairProximityDocids(chunk) = typed_chunk else {
                    unreachable!();
@ -511,7 +516,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(KeepFirst);
+            let mut builder = MergerBuilder::new(keep_first as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdDocidFacetNumbers(chunk) = typed_chunk else {
                    unreachable!();
@ -525,7 +530,7 @@ pub(crate) fn write_typed_chunk_into_index(
                index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
            let mut iter = merger.into_stream_merger_iter()?;
            while let Some((key, value)) = iter.next()? {
-                let reader = KvReaderDelAdd::from_slice(value);
+                let reader = KvReaderDelAdd::new(value);
                if valid_lmdb_key(key) {
                    match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
                        (None, None) => {}
@ -545,7 +550,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(KeepFirst);
+            let mut builder = MergerBuilder::new(keep_first as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdDocidFacetStrings(chunk) = typed_chunk else {
                    unreachable!();
@ -559,7 +564,7 @@ pub(crate) fn write_typed_chunk_into_index(
                index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
            let mut iter = merger.into_stream_merger_iter()?;
            while let Some((key, value)) = iter.next()? {
-                let reader = KvReaderDelAdd::from_slice(value);
+                let reader = KvReaderDelAdd::new(value);
                if valid_lmdb_key(key) {
                    match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
                        (None, None) => {}
@ -578,7 +583,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "geo_points");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(KeepFirst);
+            let mut builder = MergerBuilder::new(keep_first as MergeFn);
            for typed_chunk in typed_chunks {
                let TypedChunk::GeoPoints(chunk) = typed_chunk else {
                    unreachable!();
@ -596,7 +601,7 @@ pub(crate) fn write_typed_chunk_into_index(
                // convert the key back to a u32 (4 bytes)
                let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();

-                let deladd_obkv = KvReaderDelAdd::from_slice(value);
+                let deladd_obkv = KvReaderDelAdd::new(value);
                if let Some(value) = deladd_obkv.get(DelAdd::Deletion) {
                    let geopoint = extract_geo_point(value, docid);
                    rtree.remove(&geopoint);
@ -615,9 +620,9 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
            let _entered = span.enter();

-            let mut remove_vectors_builder = MergerBuilder::new(KeepFirst);
-            let mut manual_vectors_builder = MergerBuilder::new(KeepFirst);
-            let mut embeddings_builder = MergerBuilder::new(KeepFirst);
+            let mut remove_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
+            let mut manual_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
+            let mut embeddings_builder = MergerBuilder::new(keep_first as MergeFn);
            let mut add_to_user_provided = RoaringBitmap::new();
            let mut remove_from_user_provided = RoaringBitmap::new();
            let mut params = None;
@ -668,22 +673,14 @@ pub(crate) fn write_typed_chunk_into_index(
                .get(&embedder_name)
                .map_or(false, |conf| conf.2);
            // FIXME: allow customizing distance
-            let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
-                .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
-                .collect();
+            let writer = ArroyWrapper::new(index.vector_arroy, embedder_index, binary_quantized);

            // remove vectors for docids we want them removed
            let merger = remove_vectors_builder.build();
            let mut iter = merger.into_stream_merger_iter()?;
            while let Some((key, _)) = iter.next()? {
                let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
-
-                for writer in &writers {
-                    // Uses invariant: vectors are packed in the first writers.
-                    if !writer.del_item(wtxn, expected_dimension, docid)? {
-                        break;
-                    }
-                }
+                writer.del_items(wtxn, expected_dimension, docid)?;
            }

            // add generated embeddings
@ -711,9 +708,7 @@ pub(crate) fn write_typed_chunk_into_index(
                        embeddings.embedding_count(),
                    )));
                }
-                for (embedding, writer) in embeddings.iter().zip(&writers) {
-                    writer.add_item(wtxn, expected_dimension, docid, embedding)?;
-                }
+                writer.add_items(wtxn, docid, &embeddings)?;
            }

            // perform the manual diff
@ -724,55 +719,18 @@ pub(crate) fn write_typed_chunk_into_index(
                let (left, _index) = try_split_array_at(key).unwrap();
                let docid = DocumentId::from_be_bytes(left);

-                let vector_deladd_obkv = KvReaderDelAdd::from_slice(value);
+                let vector_deladd_obkv = KvReaderDelAdd::new(value);
                if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
                    let vector: Vec<f32> = pod_collect_to_vec(value);

-                    let mut deleted_index = None;
-                    for (index, writer) in writers.iter().enumerate() {
-                        let Some(candidate) = writer.item_vector(wtxn, docid)? else {
-                            // uses invariant: vectors are packed in the first writers.
-                            break;
-                        };
-                        if candidate == vector {
-                            writer.del_item(wtxn, expected_dimension, docid)?;
-                            deleted_index = Some(index);
-                        }
-                    }
-
-                    // 🥲 enforce invariant: vectors are packed in the first writers.
-                    if let Some(deleted_index) = deleted_index {
-                        let mut last_index_with_a_vector = None;
-                        for (index, writer) in writers.iter().enumerate().skip(deleted_index) {
-                            let Some(candidate) = writer.item_vector(wtxn, docid)? else {
-                                break;
-                            };
-                            last_index_with_a_vector = Some((index, candidate));
-                        }
-                        if let Some((last_index, vector)) = last_index_with_a_vector {
-                            // unwrap: computed the index from the list of writers
-                            let writer = writers.get(last_index).unwrap();
-                            writer.del_item(wtxn, expected_dimension, docid)?;
-                            writers.get(deleted_index).unwrap().add_item(
-                                wtxn,
-                                expected_dimension,
-                                docid,
-                                &vector,
-                            )?;
-                        }
-                    }
+                    writer.del_item(wtxn, docid, &vector)?;
                }

                if let Some(value) = vector_deladd_obkv.get(DelAdd::Addition) {
                    let vector = pod_collect_to_vec(value);

                    // overflow was detected during vector extraction.
-                    for writer in &writers {
-                        if !writer.contains_item(wtxn, expected_dimension, docid)? {
-                            writer.add_item(wtxn, expected_dimension, docid, &vector)?;
-                            break;
-                        }
-                    }
+                    writer.add_item(wtxn, docid, &vector)?;
                }
            }

@ -792,13 +750,9 @@ fn extract_geo_point(value: &[u8], docid: DocumentId) -> GeoPoint {
    GeoPoint::new(xyz_point, (docid, point))
 }

-fn merge_word_docids_reader_into_fst<MF>(
-    merger: Merger<CursorClonableMmap, MF>,
-) -> Result<fst::Set<Vec<u8>>>
-where
-    MF: MergeFunction,
-    crate::Error: From<MF::Error>,
-{
+fn merge_word_docids_reader_into_fst(
+    merger: Merger<CursorClonableMmap, MergeFn>,
+) -> Result<fst::Set<Vec<u8>>> {
    let mut iter = merger.into_stream_merger_iter()?;
    let mut builder = fst::SetBuilder::memory();

@ -812,8 +766,8 @@ where
 /// Write provided entries in database using serialize_value function.
 /// merge_values function is used if an entry already exist in the database.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
-fn write_entries_into_database<R, K, V, FS, FM, MF>(
-    merger: Merger<R, MF>,
+fn write_entries_into_database<R, K, V, FS, FM>(
+    merger: Merger<R, MergeFn>,
    database: &heed::Database<K, V>,
    wtxn: &mut RwTxn<'_>,
    serialize_value: FS,
@ -823,8 +777,6 @@ where
    R: io::Read + io::Seek,
    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
-    MF: MergeFunction,
-    crate::Error: From<MF::Error>,
 {
    let mut buffer = Vec::new();
    let database = database.remap_types::<Bytes, Bytes>();
@ -851,22 +803,20 @@ where
 /// Akin to the `write_entries_into_database` function but specialized
 /// for the case when we only index additional searchable fields only.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
-fn write_proximity_entries_into_database_additional_searchables<R, MF>(
-    merger: Merger<R, MF>,
+fn write_proximity_entries_into_database_additional_searchables<R>(
+    merger: Merger<R, MergeFn>,
    database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
    wtxn: &mut RwTxn<'_>,
 ) -> Result<()>
 where
    R: io::Read + io::Seek,
-    MF: MergeFunction,
-    crate::Error: From<MF::Error>,
 {
    let mut iter = merger.into_stream_merger_iter()?;
    while let Some((key, value)) = iter.next()? {
        if valid_lmdb_key(key) {
            let (proximity_to_insert, word1, word2) =
                U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
-            let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
+            let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) {
                Some(value) => {
                    CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
                }
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@ -1,9 +1,11 @@
-pub use self::available_ids::AvailableIds;
+pub use self::available_documents_ids::AvailableDocumentsIds;
 pub use self::clear_documents::ClearDocuments;
-pub use self::concurrent_available_ids::ConcurrentAvailableIds;
 pub use self::facet::bulk::FacetsUpdateBulk;
 pub use self::facet::incremental::FacetsUpdateIncrementalInner;
-pub use self::index_documents::*;
+pub use self::index_documents::{
+    merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId,
+    IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn,
+};
 pub use self::indexer_config::IndexerConfig;
 pub use self::settings::{validate_embedding_settings, Setting, Settings};
 pub use self::update_step::UpdateIndexingStep;
@ -11,14 +13,12 @@ pub use self::word_prefix_docids::WordPrefixDocids;
 pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids;
 pub use self::words_prefixes_fst::WordsPrefixesFst;

-mod available_ids;
+mod available_documents_ids;
 mod clear_documents;
-mod concurrent_available_ids;
 pub(crate) mod del_add;
 pub(crate) mod facet;
 mod index_documents;
 mod indexer_config;
-pub mod new;
 mod settings;
 mod update_step;
 mod word_prefix_docids;
--- a/milli/src/update/new/channel.rs
+++ b/milli/src/update/new/channel.rs
@ -1,522 +0,0 @@
-use std::marker::PhantomData;
-use std::sync::atomic::Ordering;
-
-use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
-use heed::types::Bytes;
-use memmap2::Mmap;
-
-use super::extract::{FacetKind, HashMapMerger};
-use super::StdResult;
-use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY};
-use crate::update::new::KvReaderFieldId;
-use crate::{DocumentId, Index};
-
-/// The capacity of the channel is currently in number of messages.
-pub fn merger_writer_channel(cap: usize) -> (MergerSender, WriterReceiver) {
-    let (sender, receiver) = crossbeam_channel::bounded(cap);
-    (
-        MergerSender {
-            sender,
-            send_count: Default::default(),
-            writer_contentious_count: Default::default(),
-            merger_contentious_count: Default::default(),
-        },
-        WriterReceiver(receiver),
-    )
-}
-
-/// The capacity of the channel is currently in number of messages.
-pub fn extractors_merger_channels(cap: usize) -> (ExtractorSender, MergerReceiver) {
-    let (sender, receiver) = crossbeam_channel::bounded(cap);
-    (ExtractorSender(sender), MergerReceiver(receiver))
-}
-
-pub enum KeyValueEntry {
-    SmallInMemory { key_length: usize, data: Box<[u8]> },
-    LargeOnDisk { key: Box<[u8]>, value: Mmap },
-}
-
-impl KeyValueEntry {
-    pub fn from_small_key_value(key: &[u8], value: &[u8]) -> Self {
-        let mut data = Vec::with_capacity(key.len() + value.len());
-        data.extend_from_slice(key);
-        data.extend_from_slice(value);
-        KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() }
-    }
-
-    pub fn from_large_key_value(key: &[u8], value: Mmap) -> Self {
-        KeyValueEntry::LargeOnDisk { key: key.to_vec().into_boxed_slice(), value }
-    }
-
-    pub fn key(&self) -> &[u8] {
-        match self {
-            KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[..*key_length],
-            KeyValueEntry::LargeOnDisk { key, value: _ } => key.as_ref(),
-        }
-    }
-
-    pub fn value(&self) -> &[u8] {
-        match self {
-            KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[*key_length..],
-            KeyValueEntry::LargeOnDisk { key: _, value } => value.as_ref(),
-        }
-    }
-}
-
-pub struct KeyEntry {
-    data: Box<[u8]>,
-}
-
-impl KeyEntry {
-    pub fn from_key(key: &[u8]) -> Self {
-        KeyEntry { data: key.to_vec().into_boxed_slice() }
-    }
-
-    pub fn entry(&self) -> &[u8] {
-        self.data.as_ref()
-    }
-}
-
-pub enum EntryOperation {
-    Delete(KeyEntry),
-    Write(KeyValueEntry),
-}
-
-pub struct DocumentEntry {
-    docid: DocumentId,
-    content: Box<[u8]>,
-}
-
-impl DocumentEntry {
-    pub fn new_uncompressed(docid: DocumentId, content: Box<KvReaderFieldId>) -> Self {
-        DocumentEntry { docid, content: content.into() }
-    }
-
-    pub fn new_compressed(docid: DocumentId, content: Box<[u8]>) -> Self {
-        DocumentEntry { docid, content }
-    }
-
-    pub fn key(&self) -> [u8; 4] {
-        self.docid.to_be_bytes()
-    }
-
-    pub fn content(&self) -> &[u8] {
-        &self.content
-    }
-}
-
-pub struct DocumentDeletionEntry(DocumentId);
-
-impl DocumentDeletionEntry {
-    pub fn key(&self) -> [u8; 4] {
-        self.0.to_be_bytes()
-    }
-}
-
-pub struct WriterOperation {
-    database: Database,
-    entry: EntryOperation,
-}
-
-pub enum Database {
-    Documents,
-    ExactWordDocids,
-    FidWordCountDocids,
-    Main,
-    WordDocids,
-    WordFidDocids,
-    WordPairProximityDocids,
-    WordPositionDocids,
-    FacetIdIsNullDocids,
-    FacetIdIsEmptyDocids,
-    FacetIdExistsDocids,
-    FacetIdF64NumberDocids,
-    FacetIdStringDocids,
-}
-
-impl Database {
-    pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
-        match self {
-            Database::Documents => index.documents.remap_types(),
-            Database::ExactWordDocids => index.exact_word_docids.remap_types(),
-            Database::Main => index.main.remap_types(),
-            Database::WordDocids => index.word_docids.remap_types(),
-            Database::WordFidDocids => index.word_fid_docids.remap_types(),
-            Database::WordPositionDocids => index.word_position_docids.remap_types(),
-            Database::FidWordCountDocids => index.field_id_word_count_docids.remap_types(),
-            Database::WordPairProximityDocids => index.word_pair_proximity_docids.remap_types(),
-            Database::FacetIdIsNullDocids => index.facet_id_is_null_docids.remap_types(),
-            Database::FacetIdIsEmptyDocids => index.facet_id_is_empty_docids.remap_types(),
-            Database::FacetIdExistsDocids => index.facet_id_exists_docids.remap_types(),
-            Database::FacetIdF64NumberDocids => index.facet_id_f64_docids.remap_types(),
-            Database::FacetIdStringDocids => index.facet_id_string_docids.remap_types(),
-        }
-    }
-}
-
-impl WriterOperation {
-    pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
-        self.database.database(index)
-    }
-
-    pub fn entry(self) -> EntryOperation {
-        self.entry
-    }
-}
-
-pub struct WriterReceiver(Receiver<WriterOperation>);
-
-impl IntoIterator for WriterReceiver {
-    type Item = WriterOperation;
-    type IntoIter = IntoIter<Self::Item>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.0.into_iter()
-    }
-}
-
-pub struct MergerSender {
-    sender: Sender<WriterOperation>,
-    /// The number of message we send in total in the channel.
-    send_count: std::sync::atomic::AtomicUsize,
-    /// The number of times we sent something in a channel that was full.
-    writer_contentious_count: std::sync::atomic::AtomicUsize,
-    /// The number of times we sent something in a channel that was empty.
-    merger_contentious_count: std::sync::atomic::AtomicUsize,
-}
-
-impl Drop for MergerSender {
-    fn drop(&mut self) {
-        eprintln!(
-            "Merger channel stats: {} sends, {} writer contentions ({}%), {} merger contentions ({}%)",
-            self.send_count.load(Ordering::SeqCst),
-            self.writer_contentious_count.load(Ordering::SeqCst),
-            (self.writer_contentious_count.load(Ordering::SeqCst) as f32 / self.send_count.load(Ordering::SeqCst) as f32) * 100.0,
-            self.merger_contentious_count.load(Ordering::SeqCst),
-            (self.merger_contentious_count.load(Ordering::SeqCst) as f32 / self.send_count.load(Ordering::SeqCst) as f32) * 100.0
-        )
-    }
-}
-
-impl MergerSender {
-    pub fn main(&self) -> MainSender<'_> {
-        MainSender(self)
-    }
-
-    pub fn docids<D: DatabaseType>(&self) -> WordDocidsSender<'_, D> {
-        WordDocidsSender { sender: self, _marker: PhantomData }
-    }
-
-    pub fn facet_docids(&self) -> FacetDocidsSender<'_> {
-        FacetDocidsSender { sender: self }
-    }
-
-    pub fn documents(&self) -> DocumentsSender<'_> {
-        DocumentsSender(self)
-    }
-
-    pub fn send_documents_ids(&self, bitmap: &[u8]) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
-            DOCUMENTS_IDS_KEY.as_bytes(),
-            bitmap,
-        ));
-        match self.send(WriterOperation { database: Database::Main, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-
-    fn send(&self, op: WriterOperation) -> StdResult<(), SendError<()>> {
-        if self.sender.is_full() {
-            self.writer_contentious_count.fetch_add(1, Ordering::SeqCst);
-        }
-        if self.sender.is_empty() {
-            self.merger_contentious_count.fetch_add(1, Ordering::SeqCst);
-        }
-        self.send_count.fetch_add(1, Ordering::SeqCst);
-        match self.sender.send(op) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-pub struct MainSender<'a>(&'a MergerSender);
-
-impl MainSender<'_> {
-    pub fn write_words_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value(
-            WORDS_FST_KEY.as_bytes(),
-            value,
-        ));
-        match self.0.send(WriterOperation { database: Database::Main, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-
-    pub fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
-        match self.0.send(WriterOperation { database: Database::Main, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-pub enum ExactWordDocids {}
-pub enum FidWordCountDocids {}
-pub enum WordDocids {}
-pub enum WordFidDocids {}
-pub enum WordPairProximityDocids {}
-pub enum WordPositionDocids {}
-pub enum FacetDocids {}
-
-pub trait DatabaseType {
-    const DATABASE: Database;
-}
-
-pub trait MergerOperationType {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation;
-}
-
-impl DatabaseType for ExactWordDocids {
-    const DATABASE: Database = Database::ExactWordDocids;
-}
-
-impl MergerOperationType for ExactWordDocids {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
-        MergerOperation::ExactWordDocidsMerger(merger)
-    }
-}
-
-impl DatabaseType for FidWordCountDocids {
-    const DATABASE: Database = Database::FidWordCountDocids;
-}
-
-impl MergerOperationType for FidWordCountDocids {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
-        MergerOperation::FidWordCountDocidsMerger(merger)
-    }
-}
-
-impl DatabaseType for WordDocids {
-    const DATABASE: Database = Database::WordDocids;
-}
-
-impl MergerOperationType for WordDocids {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
-        MergerOperation::WordDocidsMerger(merger)
-    }
-}
-
-impl DatabaseType for WordFidDocids {
-    const DATABASE: Database = Database::WordFidDocids;
-}
-
-impl MergerOperationType for WordFidDocids {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
-        MergerOperation::WordFidDocidsMerger(merger)
-    }
-}
-
-impl DatabaseType for WordPairProximityDocids {
-    const DATABASE: Database = Database::WordPairProximityDocids;
-}
-
-impl MergerOperationType for WordPairProximityDocids {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
-        MergerOperation::WordPairProximityDocidsMerger(merger)
-    }
-}
-
-impl DatabaseType for WordPositionDocids {
-    const DATABASE: Database = Database::WordPositionDocids;
-}
-
-impl MergerOperationType for WordPositionDocids {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
-        MergerOperation::WordPositionDocidsMerger(merger)
-    }
-}
-
-impl MergerOperationType for FacetDocids {
-    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
-        MergerOperation::FacetDocidsMerger(merger)
-    }
-}
-
-pub trait DocidsSender {
-    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>;
-    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>;
-}
-
-pub struct WordDocidsSender<'a, D> {
-    sender: &'a MergerSender,
-    _marker: PhantomData<D>,
-}
-
-impl<D: DatabaseType> DocidsSender for WordDocidsSender<'_, D> {
-    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
-        match self.sender.send(WriterOperation { database: D::DATABASE, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-
-    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
-        match self.sender.send(WriterOperation { database: D::DATABASE, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-pub struct FacetDocidsSender<'a> {
-    sender: &'a MergerSender,
-}
-
-impl DocidsSender for FacetDocidsSender<'_> {
-    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
-        let (database, key) = self.extract_database(key);
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
-        match self.sender.send(WriterOperation { database, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-
-    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
-        let (database, key) = self.extract_database(key);
-        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
-        match self.sender.send(WriterOperation { database, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-impl FacetDocidsSender<'_> {
-    fn extract_database<'a>(&self, key: &'a [u8]) -> (Database, &'a [u8]) {
-        let database = match FacetKind::from(key[0]) {
-            FacetKind::Number => Database::FacetIdF64NumberDocids,
-            FacetKind::String => Database::FacetIdStringDocids,
-            FacetKind::Null => Database::FacetIdIsNullDocids,
-            FacetKind::Empty => Database::FacetIdIsEmptyDocids,
-            FacetKind::Exists => Database::FacetIdExistsDocids,
-        };
-        (database, &key[1..])
-    }
-}
-
-pub struct DocumentsSender<'a>(&'a MergerSender);
-
-impl DocumentsSender<'_> {
-    /// TODO do that efficiently
-    pub fn uncompressed(
-        &self,
-        docid: DocumentId,
-        document: &KvReaderFieldId,
-    ) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
-            &docid.to_be_bytes(),
-            document.as_bytes(),
-        ));
-        match self.0.send(WriterOperation { database: Database::Documents, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-
-    pub fn delete(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Delete(KeyEntry::from_key(&docid.to_be_bytes()));
-        match self.0.send(WriterOperation { database: Database::Documents, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-pub enum MergerOperation {
-    ExactWordDocidsMerger(HashMapMerger),
-    FidWordCountDocidsMerger(HashMapMerger),
-    WordDocidsMerger(HashMapMerger),
-    WordFidDocidsMerger(HashMapMerger),
-    WordPairProximityDocidsMerger(HashMapMerger),
-    WordPositionDocidsMerger(HashMapMerger),
-    FacetDocidsMerger(HashMapMerger),
-    DeleteDocument { docid: DocumentId },
-    InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> },
-    FinishedDocument,
-}
-
-pub struct MergerReceiver(Receiver<MergerOperation>);
-
-impl IntoIterator for MergerReceiver {
-    type Item = MergerOperation;
-    type IntoIter = IntoIter<Self::Item>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.0.into_iter()
-    }
-}
-
-pub struct ExtractorSender(Sender<MergerOperation>);
-
-impl ExtractorSender {
-    pub fn document_sender(&self) -> DocumentSender<'_> {
-        DocumentSender(Some(&self.0))
-    }
-
-    pub fn send_searchable<D: MergerOperationType>(
-        &self,
-        merger: HashMapMerger,
-    ) -> StdResult<(), SendError<()>> {
-        match self.0.send(D::new_merger_operation(merger)) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-pub struct DocumentSender<'a>(Option<&'a Sender<MergerOperation>>);
-
-impl DocumentSender<'_> {
-    pub fn insert(
-        &self,
-        docid: DocumentId,
-        document: Box<KvReaderFieldId>,
-    ) -> StdResult<(), SendError<()>> {
-        let sender = self.0.unwrap();
-        match sender.send(MergerOperation::InsertDocument { docid, document }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-
-    pub fn delete(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
-        let sender = self.0.unwrap();
-        match sender.send(MergerOperation::DeleteDocument { docid }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-
-    pub fn finish(mut self) -> StdResult<(), SendError<()>> {
-        let sender = self.0.take().unwrap();
-        match sender.send(MergerOperation::FinishedDocument) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-impl Drop for DocumentSender<'_> {
-    fn drop(&mut self) {
-        if let Some(sender) = self.0.take() {
-            sender.send(MergerOperation::FinishedDocument);
-        }
-    }
-}
--- a/milli/src/update/new/document_change.rs
+++ b/milli/src/update/new/document_change.rs
@ -1,96 +0,0 @@
-use heed::RoTxn;
-use obkv::KvReader;
-
-use crate::update::new::KvReaderFieldId;
-use crate::{DocumentId, FieldId, Index, Result};
-
-pub enum DocumentChange {
-    Deletion(Deletion),
-    Update(Update),
-    Insertion(Insertion),
-}
-
-pub struct Deletion {
-    docid: DocumentId,
-    current: Box<KvReaderFieldId>,
-}
-
-pub struct Update {
-    docid: DocumentId,
-    current: Box<KvReaderFieldId>,
-    new: Box<KvReaderFieldId>,
-}
-
-pub struct Insertion {
-    docid: DocumentId,
-    new: Box<KvReaderFieldId>,
-}
-
-impl DocumentChange {
-    pub fn docid(&self) -> DocumentId {
-        match &self {
-            Self::Deletion(inner) => inner.docid(),
-            Self::Update(inner) => inner.docid(),
-            Self::Insertion(inner) => inner.docid(),
-        }
-    }
-}
-
-impl Deletion {
-    pub fn create(docid: DocumentId, current: Box<KvReaderFieldId>) -> Self {
-        Self { docid, current }
-    }
-
-    pub fn docid(&self) -> DocumentId {
-        self.docid
-    }
-
-    // TODO shouldn't we use the one in self?
-    pub fn current<'a>(
-        &self,
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<&'a KvReader<FieldId>>> {
-        index.documents.get(rtxn, &self.docid).map_err(crate::Error::from)
-    }
-}
-
-impl Insertion {
-    pub fn create(docid: DocumentId, new: Box<KvReaderFieldId>) -> Self {
-        Insertion { docid, new }
-    }
-
-    pub fn docid(&self) -> DocumentId {
-        self.docid
-    }
-
-    pub fn new(&self) -> &KvReader<FieldId> {
-        self.new.as_ref()
-    }
-}
-
-impl Update {
-    pub fn create(
-        docid: DocumentId,
-        current: Box<KvReaderFieldId>,
-        new: Box<KvReaderFieldId>,
-    ) -> Self {
-        Update { docid, current, new }
-    }
-
-    pub fn docid(&self) -> DocumentId {
-        self.docid
-    }
-
-    pub fn current<'a>(
-        &self,
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<&'a KvReader<FieldId>>> {
-        index.documents.get(rtxn, &self.docid).map_err(crate::Error::from)
-    }
-
-    pub fn new(&self) -> &KvReader<FieldId> {
-        self.new.as_ref()
-    }
-}
--- a/milli/src/update/new/extract/cache.rs
+++ b/milli/src/update/new/extract/cache.rs
@ -1,149 +0,0 @@
-use std::collections::HashMap;
-
-use roaring::RoaringBitmap;
-use smallvec::SmallVec;
-
-pub const KEY_SIZE: usize = 12;
-
-#[derive(Debug)]
-pub struct CboCachedSorter {
-    cache: HashMap<SmallVec<[u8; KEY_SIZE]>, DelAddRoaringBitmap>,
-    total_insertions: usize,
-    fitted_in_key: usize,
-}
-
-impl CboCachedSorter {
-    pub fn new() -> Self {
-        CboCachedSorter { cache: HashMap::new(), total_insertions: 0, fitted_in_key: 0 }
-    }
-}
-
-impl CboCachedSorter {
-    pub fn insert_del_u32(&mut self, key: &[u8], n: u32) {
-        match self.cache.get_mut(key) {
-            Some(DelAddRoaringBitmap { del, add: _ }) => {
-                del.get_or_insert_with(RoaringBitmap::default).insert(n);
-            }
-            None => {
-                self.total_insertions += 1;
-                self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
-                let value = DelAddRoaringBitmap::new_del_u32(n);
-                assert!(self.cache.insert(key.into(), value).is_none());
-            }
-        }
-    }
-
-    pub fn insert_del(&mut self, key: &[u8], bitmap: RoaringBitmap) {
-        match self.cache.get_mut(key) {
-            Some(DelAddRoaringBitmap { del, add: _ }) => {
-                *del.get_or_insert_with(RoaringBitmap::default) |= bitmap;
-            }
-            None => {
-                self.total_insertions += 1;
-                self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
-                let value = DelAddRoaringBitmap::new_del(bitmap);
-                assert!(self.cache.insert(key.into(), value).is_none());
-            }
-        }
-    }
-
-    pub fn insert_add_u32(&mut self, key: &[u8], n: u32) {
-        match self.cache.get_mut(key) {
-            Some(DelAddRoaringBitmap { del: _, add }) => {
-                add.get_or_insert_with(RoaringBitmap::default).insert(n);
-            }
-            None => {
-                self.total_insertions += 1;
-                self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
-                let value = DelAddRoaringBitmap::new_add_u32(n);
-                assert!(self.cache.insert(key.into(), value).is_none());
-            }
-        }
-    }
-
-    pub fn insert_add(&mut self, key: &[u8], bitmap: RoaringBitmap) {
-        match self.cache.get_mut(key) {
-            Some(DelAddRoaringBitmap { del: _, add }) => {
-                *add.get_or_insert_with(RoaringBitmap::default) |= bitmap;
-            }
-            None => {
-                self.total_insertions += 1;
-                self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
-                let value = DelAddRoaringBitmap::new_add(bitmap);
-                assert!(self.cache.insert(key.into(), value).is_none());
-            }
-        }
-    }
-
-    pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) {
-        match self.cache.get_mut(key) {
-            Some(DelAddRoaringBitmap { del, add }) => {
-                del.get_or_insert_with(RoaringBitmap::default).insert(n);
-                add.get_or_insert_with(RoaringBitmap::default).insert(n);
-            }
-            None => {
-                self.total_insertions += 1;
-                self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
-                let value = DelAddRoaringBitmap::new_del_add_u32(n);
-                assert!(self.cache.insert(key.into(), value).is_none());
-            }
-        }
-    }
-
-    pub fn into_sorter(self) -> HashMap<SmallVec<[u8; KEY_SIZE]>, DelAddRoaringBitmap> {
-        eprintln!(
-            "LruCache stats: {} <= {KEY_SIZE} bytes ({}%) on a total of {} insertions",
-            self.fitted_in_key,
-            (self.fitted_in_key as f32 / self.total_insertions as f32) * 100.0,
-            self.total_insertions,
-        );
-
-        self.cache
-    }
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct DelAddRoaringBitmap {
-    pub(crate) del: Option<RoaringBitmap>,
-    pub(crate) add: Option<RoaringBitmap>,
-}
-
-impl DelAddRoaringBitmap {
-    fn new_del_add_u32(n: u32) -> Self {
-        DelAddRoaringBitmap {
-            del: Some(RoaringBitmap::from([n])),
-            add: Some(RoaringBitmap::from([n])),
-        }
-    }
-
-    fn new_del(bitmap: RoaringBitmap) -> Self {
-        DelAddRoaringBitmap { del: Some(bitmap), add: None }
-    }
-
-    fn new_del_u32(n: u32) -> Self {
-        DelAddRoaringBitmap { del: Some(RoaringBitmap::from([n])), add: None }
-    }
-
-    fn new_add(bitmap: RoaringBitmap) -> Self {
-        DelAddRoaringBitmap { del: None, add: Some(bitmap) }
-    }
-
-    fn new_add_u32(n: u32) -> Self {
-        DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) }
-    }
-
-    pub fn merge_with(&mut self, other: DelAddRoaringBitmap) {
-        self.del = match (self.del.take(), other.del) {
-            (None, None) => None,
-            (None, Some(other)) => Some(other),
-            (Some(this), None) => Some(this),
-            (Some(this), Some(other)) => Some(this | other),
-        };
-        self.add = match (self.add.take(), other.add) {
-            (None, None) => None,
-            (None, Some(other)) => Some(other),
-            (Some(this), None) => Some(this),
-            (Some(this), Some(other)) => Some(this | other),
-        };
-    }
-}
--- a/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/milli/src/update/new/extract/faceted/extract_facets.rs
@ -1,240 +0,0 @@
-use std::collections::HashSet;
-
-use heed::RoTxn;
-use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
-use serde_json::Value;
-
-use super::super::cache::CboCachedSorter;
-use super::facet_document::extract_document_facets;
-use super::FacetKind;
-use crate::facet::value_encoding::f64_into_bytes;
-use crate::update::new::extract::{DocidsExtractor, HashMapMerger};
-use crate::update::new::{DocumentChange, ItemsPool};
-use crate::update::GrenadParameters;
-use crate::{DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result, MAX_FACET_VALUE_LENGTH};
-pub struct FacetedDocidsExtractor;
-
-impl FacetedDocidsExtractor {
-    fn extract_document_change(
-        rtxn: &RoTxn,
-        index: &Index,
-        buffer: &mut Vec<u8>,
-        fields_ids_map: &mut GlobalFieldsIdsMap,
-        attributes_to_extract: &[&str],
-        cached_sorter: &mut CboCachedSorter,
-        document_change: DocumentChange,
-    ) -> Result<()> {
-        match document_change {
-            DocumentChange::Deletion(inner) => extract_document_facets(
-                attributes_to_extract,
-                inner.current(rtxn, index)?.unwrap(),
-                fields_ids_map,
-                &mut |fid, value| {
-                    Self::facet_fn_with_options(
-                        buffer,
-                        cached_sorter,
-                        CboCachedSorter::insert_del_u32,
-                        inner.docid(),
-                        fid,
-                        value,
-                    )
-                },
-            ),
-            DocumentChange::Update(inner) => {
-                extract_document_facets(
-                    attributes_to_extract,
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut |fid, value| {
-                        Self::facet_fn_with_options(
-                            buffer,
-                            cached_sorter,
-                            CboCachedSorter::insert_del_u32,
-                            inner.docid(),
-                            fid,
-                            value,
-                        )
-                    },
-                )?;
-
-                extract_document_facets(
-                    attributes_to_extract,
-                    inner.new(),
-                    fields_ids_map,
-                    &mut |fid, value| {
-                        Self::facet_fn_with_options(
-                            buffer,
-                            cached_sorter,
-                            CboCachedSorter::insert_add_u32,
-                            inner.docid(),
-                            fid,
-                            value,
-                        )
-                    },
-                )
-            }
-            DocumentChange::Insertion(inner) => extract_document_facets(
-                attributes_to_extract,
-                inner.new(),
-                fields_ids_map,
-                &mut |fid, value| {
-                    Self::facet_fn_with_options(
-                        buffer,
-                        cached_sorter,
-                        CboCachedSorter::insert_add_u32,
-                        inner.docid(),
-                        fid,
-                        value,
-                    )
-                },
-            ),
-        }
-    }
-
-    fn facet_fn_with_options(
-        buffer: &mut Vec<u8>,
-        cached_sorter: &mut CboCachedSorter,
-        cache_fn: impl Fn(&mut CboCachedSorter, &[u8], u32),
-        docid: DocumentId,
-        fid: FieldId,
-        value: &Value,
-    ) -> Result<()> {
-        // Exists
-        // key: fid
-        buffer.clear();
-        buffer.push(FacetKind::Exists as u8);
-        buffer.extend_from_slice(&fid.to_be_bytes());
-        cache_fn(cached_sorter, &*buffer, docid);
-
-        match value {
-            // Number
-            // key: fid - level - orderedf64 - orignalf64
-            Value::Number(number) => {
-                if let Some((n, ordered)) =
-                    number.as_f64().and_then(|n| f64_into_bytes(n).map(|ordered| (n, ordered)))
-                {
-                    buffer.clear();
-                    buffer.push(FacetKind::Number as u8);
-                    buffer.extend_from_slice(&fid.to_be_bytes());
-                    buffer.push(1); // level 0
-                    buffer.extend_from_slice(&ordered);
-                    buffer.extend_from_slice(&n.to_be_bytes());
-
-                    Ok(cache_fn(cached_sorter, &*buffer, docid))
-                } else {
-                    Ok(())
-                }
-            }
-            // String
-            // key: fid - level - truncated_string
-            Value::String(s) => {
-                let truncated = truncate_str(s);
-                buffer.clear();
-                buffer.push(FacetKind::String as u8);
-                buffer.extend_from_slice(&fid.to_be_bytes());
-                buffer.push(1); // level 0
-                buffer.extend_from_slice(truncated.as_bytes());
-                Ok(cache_fn(cached_sorter, &*buffer, docid))
-            }
-            // Null
-            // key: fid
-            Value::Null => {
-                buffer.clear();
-                buffer.push(FacetKind::Null as u8);
-                buffer.extend_from_slice(&fid.to_be_bytes());
-                Ok(cache_fn(cached_sorter, &*buffer, docid))
-            }
-            // Empty
-            // key: fid
-            Value::Array(a) if a.is_empty() => {
-                buffer.clear();
-                buffer.push(FacetKind::Empty as u8);
-                buffer.extend_from_slice(&fid.to_be_bytes());
-                Ok(cache_fn(cached_sorter, &*buffer, docid))
-            }
-            Value::Object(o) if o.is_empty() => {
-                buffer.clear();
-                buffer.push(FacetKind::Empty as u8);
-                buffer.extend_from_slice(&fid.to_be_bytes());
-                Ok(cache_fn(cached_sorter, &*buffer, docid))
-            }
-            // Otherwise, do nothing
-            /// TODO: What about Value::Bool?
-            _ => Ok(()),
-        }
-    }
-
-    fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
-        index.user_defined_faceted_fields(rtxn)
-    }
-}
-
-/// Truncates a string to the biggest valid LMDB key size.
-fn truncate_str(s: &str) -> &str {
-    let index = s
-        .char_indices()
-        .map(|(idx, _)| idx)
-        .chain(std::iter::once(s.len()))
-        .take_while(|idx| idx <= &MAX_FACET_VALUE_LENGTH)
-        .last();
-
-    &s[..index.unwrap_or(0)]
-}
-
-impl DocidsExtractor for FacetedDocidsExtractor {
-    #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
-    fn run_extraction(
-        index: &Index,
-        fields_ids_map: &GlobalFieldsIdsMap,
-        indexer: GrenadParameters,
-        document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
-    ) -> Result<HashMapMerger> {
-        let max_memory = indexer.max_memory_by_thread();
-
-        let rtxn = index.read_txn()?;
-        let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
-        let attributes_to_extract: Vec<_> =
-            attributes_to_extract.iter().map(|s| s.as_ref()).collect();
-
-        let context_pool = ItemsPool::new(|| {
-            Ok((index.read_txn()?, fields_ids_map.clone(), Vec::new(), CboCachedSorter::new()))
-        });
-
-        {
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
-            let _entered = span.enter();
-            document_changes.into_par_iter().try_for_each(|document_change| {
-                context_pool.with(|(rtxn, fields_ids_map, buffer, cached_sorter)| {
-                    Self::extract_document_change(
-                        &*rtxn,
-                        index,
-                        buffer,
-                        fields_ids_map,
-                        &attributes_to_extract,
-                        cached_sorter,
-                        document_change?,
-                    )
-                })
-            })?;
-        }
-        {
-            let mut builder = HashMapMerger::new();
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
-            let _entered = span.enter();
-
-            let readers: Vec<_> = context_pool
-                .into_items()
-                .par_bridge()
-                .map(|(_rtxn, _tokenizer, _fields_ids_map, cached_sorter)| {
-                    cached_sorter.into_sorter()
-                })
-                .collect();
-
-            builder.extend(readers);
-
-            Ok(builder)
-        }
-    }
-}
--- a/milli/src/update/new/extract/faceted/facet_document.rs
+++ b/milli/src/update/new/extract/faceted/facet_document.rs
@ -1,52 +0,0 @@
-use serde_json::Value;
-
-use crate::update::new::extract::perm_json_p;
-use crate::update::new::KvReaderFieldId;
-use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
-
-pub fn extract_document_facets(
-    attributes_to_extract: &[&str],
-    obkv: &KvReaderFieldId,
-    field_id_map: &mut GlobalFieldsIdsMap,
-    facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
-) -> Result<()> {
-    let mut field_name = String::new();
-    for (field_id, field_bytes) in obkv {
-        let Some(field_name) = field_id_map.name(field_id).map(|s| {
-            field_name.clear();
-            field_name.push_str(s);
-            &field_name
-        }) else {
-            unreachable!("field id not found in field id map");
-        };
-
-        let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
-            Some(field_id) => facet_fn(field_id, value),
-            None => Err(UserError::AttributeLimitReached.into()),
-        };
-
-        // if the current field is searchable or contains a searchable attribute
-        if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) {
-            // parse json.
-            match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
-                Value::Object(object) => perm_json_p::seek_leaf_values_in_object(
-                    &object,
-                    Some(attributes_to_extract),
-                    &[], // skip no attributes
-                    field_name,
-                    &mut tokenize_field,
-                )?,
-                Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
-                    &array,
-                    Some(attributes_to_extract),
-                    &[], // skip no attributes
-                    field_name,
-                    &mut tokenize_field,
-                )?,
-                value => tokenize_field(field_name, &value)?,
-            }
-        }
-    }
-
-    Ok(())
-}
--- a/milli/src/update/new/extract/faceted/mod.rs
+++ b/milli/src/update/new/extract/faceted/mod.rs
@ -1,26 +0,0 @@
-mod extract_facets;
-mod facet_document;
-
-pub use extract_facets::FacetedDocidsExtractor;
-
-#[repr(u8)]
-pub enum FacetKind {
-    Number = 0,
-    String = 1,
-    Null = 2,
-    Empty = 3,
-    Exists,
-}
-
-impl From<u8> for FacetKind {
-    fn from(value: u8) -> Self {
-        match value {
-            0 => Self::Number,
-            1 => Self::String,
-            2 => Self::Null,
-            3 => Self::Empty,
-            4 => Self::Exists,
-            _ => unreachable!(),
-        }
-    }
-}
--- a/milli/src/update/new/extract/lru.rs
+++ b/milli/src/update/new/extract/lru.rs
@ -1,234 +0,0 @@
-use std::borrow::Borrow;
-use std::hash::{BuildHasher, Hash};
-use std::iter::repeat_with;
-use std::mem;
-use std::num::NonZeroUsize;
-
-use hashbrown::hash_map::{DefaultHashBuilder, Entry};
-use hashbrown::HashMap;
-
-#[derive(Debug)]
-pub struct Lru<K, V, S = DefaultHashBuilder> {
-    lookup: HashMap<K, usize, S>,
-    storage: FixedSizeList<LruNode<K, V>>,
-}
-
-impl<K: Eq + Hash, V> Lru<K, V> {
-    /// Creates a new LRU cache that holds at most `capacity` elements.
-    pub fn new(capacity: NonZeroUsize) -> Self {
-        Self { lookup: HashMap::new(), storage: FixedSizeList::new(capacity.get()) }
-    }
-}
-
-impl<K: Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
-    /// Creates a new LRU cache that holds at most `capacity` elements
-    /// and uses the provided hash builder to hash keys.
-    pub fn with_hasher(capacity: NonZeroUsize, hash_builder: S) -> Lru<K, V, S> {
-        Self {
-            lookup: HashMap::with_hasher(hash_builder),
-            storage: FixedSizeList::new(capacity.get()),
-        }
-    }
-}
-
-impl<K: Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
-    /// Returns a mutable reference to the value of the key in the cache or `None` if it is not present in the cache.
-    ///
-    /// Moves the key to the head of the LRU list if it exists.
-    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
-    where
-        K: Borrow<Q>,
-        Q: Hash + Eq + ?Sized,
-    {
-        let idx = *self.lookup.get(key)?;
-        self.storage.move_front(idx).map(|node| &mut node.value)
-    }
-}
-
-impl<K: Clone + Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
-    pub fn push(&mut self, key: K, value: V) -> Option<(K, V)> {
-        match self.lookup.entry(key) {
-            Entry::Occupied(occ) => {
-                // It's fine to unwrap here because:
-                // * the entry already exists
-                let node = self.storage.move_front(*occ.get()).unwrap();
-                let old_value = mem::replace(&mut node.value, value);
-                let old_key = occ.replace_key();
-                Some((old_key, old_value))
-            }
-            Entry::Vacant(vac) => {
-                let key = vac.key().clone();
-                if self.storage.is_full() {
-                    // It's fine to unwrap here because:
-                    // * the cache capacity is non zero
-                    // * the cache is full
-                    let idx = self.storage.back_idx();
-                    let node = self.storage.move_front(idx).unwrap();
-                    let LruNode { key, value } = mem::replace(node, LruNode { key, value });
-                    vac.insert(idx);
-                    self.lookup.remove(&key);
-                    Some((key, value))
-                } else {
-                    // It's fine to unwrap here because:
-                    // * the cache capacity is non zero
-                    // * the cache is not full
-                    let (idx, _) = self.storage.push_front(LruNode { key, value }).unwrap();
-                    vac.insert(idx);
-                    None
-                }
-            }
-        }
-    }
-}
-
-impl<K, V, S> IntoIterator for Lru<K, V, S> {
-    type Item = (K, V);
-    type IntoIter = IntoIter<K, V>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        IntoIter { lookup_iter: self.lookup.into_iter(), nodes: self.storage.nodes }
-    }
-}
-
-pub struct IntoIter<K, V> {
-    lookup_iter: hashbrown::hash_map::IntoIter<K, usize>,
-    nodes: Box<[Option<FixedSizeListNode<LruNode<K, V>>>]>,
-}
-
-impl<K, V> Iterator for IntoIter<K, V> {
-    type Item = (K, V);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let (_key, idx) = self.lookup_iter.next()?;
-        let LruNode { key, value } = self.nodes.get_mut(idx)?.take()?.data;
-        Some((key, value))
-    }
-}
-
-#[derive(Debug)]
-struct LruNode<K, V> {
-    key: K,
-    value: V,
-}
-
-#[derive(Debug)]
-struct FixedSizeListNode<T> {
-    prev: usize,
-    next: usize,
-    data: T,
-}
-
-#[derive(Debug)]
-struct FixedSizeList<T> {
-    nodes: Box<[Option<FixedSizeListNode<T>>]>,
-    /// Also corresponds to the first `None` in the nodes.
-    length: usize,
-    // TODO Also, we probably do not need one of the front and back cursors.
-    front: usize,
-    back: usize,
-}
-
-impl<T> FixedSizeList<T> {
-    fn new(capacity: usize) -> Self {
-        Self {
-            nodes: repeat_with(|| None).take(capacity).collect::<Vec<_>>().into_boxed_slice(),
-            length: 0,
-            front: usize::MAX,
-            back: usize::MAX,
-        }
-    }
-
-    #[inline]
-    fn capacity(&self) -> usize {
-        self.nodes.len()
-    }
-
-    #[inline]
-    fn len(&self) -> usize {
-        self.length
-    }
-
-    #[inline]
-    fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    #[inline]
-    fn is_full(&self) -> bool {
-        self.len() == self.capacity()
-    }
-
-    #[inline]
-    fn back_idx(&self) -> usize {
-        self.back
-    }
-
-    #[inline]
-    fn next(&mut self) -> Option<usize> {
-        if self.is_full() {
-            None
-        } else {
-            let current_free = self.length;
-            self.length += 1;
-            Some(current_free)
-        }
-    }
-
-    #[inline]
-    fn node_mut(&mut self, idx: usize) -> Option<&mut FixedSizeListNode<T>> {
-        self.nodes.get_mut(idx).and_then(|node| node.as_mut())
-    }
-
-    #[inline]
-    fn node_ref(&self, idx: usize) -> Option<&FixedSizeListNode<T>> {
-        self.nodes.get(idx).and_then(|node| node.as_ref())
-    }
-
-    #[inline]
-    fn move_front(&mut self, idx: usize) -> Option<&mut T> {
-        let node = self.nodes.get_mut(idx)?.take()?;
-        if let Some(prev) = self.node_mut(node.prev) {
-            prev.next = node.next;
-        } else {
-            self.front = node.next;
-        }
-        if let Some(next) = self.node_mut(node.next) {
-            next.prev = node.prev;
-        } else {
-            self.back = node.prev;
-        }
-
-        if let Some(front) = self.node_mut(self.front) {
-            front.prev = idx;
-        }
-        if self.node_ref(self.back).is_none() {
-            self.back = idx;
-        }
-
-        let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode {
-            prev: usize::MAX,
-            next: self.front,
-            data: node.data,
-        });
-        self.front = idx;
-        Some(&mut node.data)
-    }
-
-    #[inline]
-    fn push_front(&mut self, data: T) -> Option<(usize, &mut T)> {
-        let idx = self.next()?;
-        if let Some(front) = self.node_mut(self.front) {
-            front.prev = idx;
-        }
-        if self.node_ref(self.back).is_none() {
-            self.back = idx;
-        }
-        let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode {
-            prev: usize::MAX,
-            next: self.front,
-            data,
-        });
-        self.front = idx;
-        Some((idx, &mut node.data))
-    }
-}
--- a/milli/src/update/new/extract/mod.rs
+++ b/milli/src/update/new/extract/mod.rs
@ -1,218 +0,0 @@
-mod cache;
-mod faceted;
-mod lru;
-mod searchable;
-
-use std::collections::HashMap;
-use std::mem;
-
-pub use faceted::*;
-use grenad::MergeFunction;
-use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
-use rayon::slice::ParallelSliceMut as _;
-pub use searchable::*;
-use smallvec::SmallVec;
-
-use super::DocumentChange;
-use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
-use crate::{GlobalFieldsIdsMap, Index, Result};
-
-pub trait DocidsExtractor {
-    fn run_extraction(
-        index: &Index,
-        fields_ids_map: &GlobalFieldsIdsMap,
-        indexer: GrenadParameters,
-        document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
-    ) -> Result<HashMapMerger>;
-}
-
-pub struct HashMapMerger {
-    maps: Vec<HashMap<SmallVec<[u8; cache::KEY_SIZE]>, cache::DelAddRoaringBitmap>>,
-}
-
-impl HashMapMerger {
-    pub fn new() -> HashMapMerger {
-        HashMapMerger { maps: Vec::new() }
-    }
-
-    pub fn extend<I>(&mut self, iter: I)
-    where
-        I: IntoIterator<
-            Item = HashMap<SmallVec<[u8; cache::KEY_SIZE]>, cache::DelAddRoaringBitmap>,
-        >,
-    {
-        self.maps.extend(iter);
-    }
-}
-
-impl IntoIterator for HashMapMerger {
-    type Item = (SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap);
-    type IntoIter = IntoIter;
-
-    fn into_iter(self) -> Self::IntoIter {
-        let mut entries = {
-            let span = tracing::trace_span!(target: "indexing::documents::merge", "into_par_iter");
-            let _entered = span.enter();
-            let entries: Vec<_> =
-                self.maps.into_par_iter().flat_map(|m| m.into_par_iter()).collect();
-            eprintln!("There are {} entries in the HashMapMerger", entries.len());
-            entries
-        };
-        {
-            let span =
-                tracing::trace_span!(target: "indexing::documents::merge", "par_sort_unstable_by");
-            let _entered = span.enter();
-            entries.par_sort_unstable_by(|(ka, _), (kb, _)| ka.cmp(kb));
-            IntoIter {
-                sorted_entries: entries.into_iter(),
-                current_key: None,
-                current_deladd: cache::DelAddRoaringBitmap::default(),
-            }
-        }
-    }
-}
-
-pub struct IntoIter {
-    sorted_entries: std::vec::IntoIter<(SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap)>,
-    current_key: Option<SmallVec<[u8; 12]>>,
-    current_deladd: cache::DelAddRoaringBitmap,
-}
-
-impl Iterator for IntoIter {
-    type Item = (SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            match self.sorted_entries.next() {
-                Some((k, deladd)) => {
-                    if self.current_key.as_deref() == Some(k.as_slice()) {
-                        self.current_deladd.merge_with(deladd);
-                    } else {
-                        let previous_key = self.current_key.replace(k);
-                        let previous_deladd = mem::replace(&mut self.current_deladd, deladd);
-                        if let Some(previous_key) = previous_key {
-                            return Some((previous_key, previous_deladd));
-                        }
-                    }
-                }
-                None => {
-                    let current_deladd = mem::take(&mut self.current_deladd);
-                    return self.current_key.take().map(|ck| (ck, current_deladd));
-                }
-            }
-        }
-    }
-}
-
-/// TODO move in permissive json pointer
-pub mod perm_json_p {
-    use serde_json::{Map, Value};
-
-    use crate::Result;
-    const SPLIT_SYMBOL: char = '.';
-
-    /// Returns `true` if the `selector` match the `key`.
-    ///
-    /// ```text
-    /// Example:
-    /// `animaux`           match `animaux`
-    /// `animaux.chien`     match `animaux`
-    /// `animaux.chien`     match `animaux`
-    /// `animaux.chien.nom` match `animaux`
-    /// `animaux.chien.nom` match `animaux.chien`
-    /// -----------------------------------------
-    /// `animaux`    doesn't match `animaux.chien`
-    /// `animaux.`   doesn't match `animaux`
-    /// `animaux.ch` doesn't match `animaux.chien`
-    /// `animau`     doesn't match `animaux`
-    /// ```
-    pub fn contained_in(selector: &str, key: &str) -> bool {
-        selector.starts_with(key)
-            && selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true)
-    }
-
-    pub fn seek_leaf_values_in_object(
-        value: &Map<String, Value>,
-        selectors: Option<&[&str]>,
-        skip_selectors: &[&str],
-        base_key: &str,
-        seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
-    ) -> Result<()> {
-        if value.is_empty() {
-            seeker(&base_key, &Value::Object(Map::with_capacity(0)))?;
-        }
-
-        for (key, value) in value.iter() {
-            let base_key = if base_key.is_empty() {
-                key.to_string()
-            } else {
-                format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
-            };
-
-            // here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
-            // so we check the contained_in on both side
-            let should_continue = select_field(&base_key, selectors, skip_selectors);
-            if should_continue {
-                match value {
-                    Value::Object(object) => seek_leaf_values_in_object(
-                        object,
-                        selectors,
-                        skip_selectors,
-                        &base_key,
-                        seeker,
-                    ),
-                    Value::Array(array) => seek_leaf_values_in_array(
-                        array,
-                        selectors,
-                        skip_selectors,
-                        &base_key,
-                        seeker,
-                    ),
-                    value => seeker(&base_key, value),
-                }?;
-            }
-        }
-
-        Ok(())
-    }
-
-    pub fn seek_leaf_values_in_array(
-        values: &[Value],
-        selectors: Option<&[&str]>,
-        skip_selectors: &[&str],
-        base_key: &str,
-        seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
-    ) -> Result<()> {
-        if values.is_empty() {
-            seeker(&base_key, &Value::Array(vec![]))?;
-        }
-
-        for value in values {
-            match value {
-                Value::Object(object) => {
-                    seek_leaf_values_in_object(object, selectors, skip_selectors, base_key, seeker)
-                }
-                Value::Array(array) => {
-                    seek_leaf_values_in_array(array, selectors, skip_selectors, base_key, seeker)
-                }
-                value => seeker(base_key, value),
-            }?;
-        }
-
-        Ok(())
-    }
-
-    pub fn select_field(
-        field_name: &str,
-        selectors: Option<&[&str]>,
-        skip_selectors: &[&str],
-    ) -> bool {
-        selectors.map_or(true, |selectors| {
-            selectors.iter().any(|selector| {
-                contained_in(selector, &field_name) || contained_in(&field_name, selector)
-            })
-        }) && !skip_selectors.iter().any(|skip_selector| {
-            contained_in(skip_selector, &field_name) || contained_in(&field_name, skip_selector)
-        })
-    }
-}
--- a/milli/src/update/new/extract/searchable/extract_fid_word_count_docids.rs
+++ b/milli/src/update/new/extract/searchable/extract_fid_word_count_docids.rs
@ -1,124 +0,0 @@
-use std::collections::HashMap;
-
-use heed::RoTxn;
-
-use super::tokenize_document::DocumentTokenizer;
-use super::SearchableExtractor;
-use crate::update::new::extract::cache::CboCachedSorter;
-use crate::update::new::DocumentChange;
-use crate::update::MergeDeladdCboRoaringBitmaps;
-use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
-
-const MAX_COUNTED_WORDS: usize = 30;
-
-pub struct FidWordCountDocidsExtractor;
-impl SearchableExtractor for FidWordCountDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    // This method is reimplemented to count the number of words in the document in each field
-    // and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS.
-    fn extract_document_change(
-        rtxn: &RoTxn,
-        index: &Index,
-        document_tokenizer: &DocumentTokenizer,
-        fields_ids_map: &mut GlobalFieldsIdsMap,
-        cached_sorter: &mut CboCachedSorter,
-        document_change: DocumentChange,
-    ) -> Result<()> {
-        let mut key_buffer = Vec::new();
-        match document_change {
-            DocumentChange::Deletion(inner) => {
-                let mut fid_word_count = HashMap::new();
-                let mut token_fn = |_fname: &str, fid: FieldId, _pos: u16, _word: &str| {
-                    fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
-                    Ok(())
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-
-                // The docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS are deleted.
-                for (fid, count) in fid_word_count.iter() {
-                    if *count <= MAX_COUNTED_WORDS {
-                        let key = build_key(*fid, *count as u8, &mut key_buffer);
-                        cached_sorter.insert_del_u32(key, inner.docid());
-                    }
-                }
-            }
-            DocumentChange::Update(inner) => {
-                let mut fid_word_count = HashMap::new();
-                let mut token_fn = |_fname: &str, fid: FieldId, _pos: u16, _word: &str| {
-                    fid_word_count
-                        .entry(fid)
-                        .and_modify(|(current_count, _new_count)| *current_count += 1)
-                        .or_insert((1, 0));
-                    Ok(())
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-
-                let mut token_fn = |_fname: &str, fid: FieldId, _pos: u16, _word: &str| {
-                    fid_word_count
-                        .entry(fid)
-                        .and_modify(|(_current_count, new_count)| *new_count += 1)
-                        .or_insert((0, 1));
-                    Ok(())
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-
-                // Only the fields that have a change in the number of words are updated.
-                for (fid, (current_count, new_count)) in fid_word_count.iter() {
-                    if *current_count != *new_count {
-                        if *current_count <= MAX_COUNTED_WORDS {
-                            let key = build_key(*fid, *current_count as u8, &mut key_buffer);
-                            cached_sorter.insert_del_u32(key, inner.docid());
-                        }
-                        if *new_count <= MAX_COUNTED_WORDS {
-                            let key = build_key(*fid, *new_count as u8, &mut key_buffer);
-                            cached_sorter.insert_add_u32(key, inner.docid());
-                        }
-                    }
-                }
-            }
-            DocumentChange::Insertion(inner) => {
-                let mut fid_word_count = HashMap::new();
-                let mut token_fn = |_fname: &str, fid: FieldId, _pos: u16, _word: &str| {
-                    fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
-                    Ok(())
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-
-                // The docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS are stored.
-                for (fid, count) in fid_word_count.iter() {
-                    if *count <= MAX_COUNTED_WORDS {
-                        let key = build_key(*fid, *count as u8, &mut key_buffer);
-                        cached_sorter.insert_add_u32(key, inner.docid());
-                    }
-                }
-            }
-        }
-
-        Ok(())
-    }
-}
-
-fn build_key(fid: FieldId, count: u8, key_buffer: &mut Vec<u8>) -> &[u8] {
-    key_buffer.clear();
-    key_buffer.extend_from_slice(&fid.to_be_bytes());
-    key_buffer.push(count);
-    key_buffer.as_slice()
-}
--- a/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/milli/src/update/new/extract/searchable/extract_word_docids.rs
@ -1,594 +0,0 @@
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::fs::File;
-use std::num::NonZero;
-
-use grenad::{Merger, MergerBuilder};
-use heed::RoTxn;
-use rayon::iter::{IntoParallelIterator, ParallelIterator};
-
-use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
-use super::SearchableExtractor;
-use crate::update::new::extract::cache::CboCachedSorter;
-use crate::update::new::extract::perm_json_p::contained_in;
-use crate::update::new::extract::HashMapMerger;
-use crate::update::new::{DocumentChange, ItemsPool};
-use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
-use crate::{
-    bucketed_position, DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result,
-    MAX_POSITION_PER_ATTRIBUTE,
-};
-
-const MAX_COUNTED_WORDS: usize = 30;
-
-trait ProtoWordDocidsExtractor {
-    fn build_key(field_id: FieldId, position: u16, word: &str) -> Cow<'_, [u8]>;
-    fn attributes_to_extract<'a>(
-        _rtxn: &'a RoTxn,
-        _index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>>;
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
-}
-
-impl<T> SearchableExtractor for T
-where
-    T: ProtoWordDocidsExtractor,
-{
-    fn extract_document_change(
-        rtxn: &RoTxn,
-        index: &Index,
-        document_tokenizer: &DocumentTokenizer,
-        fields_ids_map: &mut GlobalFieldsIdsMap,
-        cached_sorter: &mut CboCachedSorter,
-        document_change: DocumentChange,
-    ) -> Result<()> {
-        match document_change {
-            DocumentChange::Deletion(inner) => {
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    Ok(cached_sorter.insert_del_u32(&key, inner.docid()))
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-            }
-            DocumentChange::Update(inner) => {
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    Ok(cached_sorter.insert_del_u32(&key, inner.docid()))
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    Ok(cached_sorter.insert_add_u32(&key, inner.docid()))
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-            }
-            DocumentChange::Insertion(inner) => {
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    Ok(cached_sorter.insert_add_u32(&key, inner.docid()))
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        Self::attributes_to_extract(rtxn, index)
-    }
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
-        Self::attributes_to_skip(rtxn, index)
-    }
-}
-
-pub struct WordDocidsExtractor;
-impl ProtoWordDocidsExtractor for WordDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
-        // exact attributes must be skipped and stored in a separate DB, see `ExactWordDocidsExtractor`.
-        index.exact_attributes(rtxn).map_err(Into::into)
-    }
-
-    /// TODO write in an external Vec buffer
-    fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
-        Cow::Borrowed(word.as_bytes())
-    }
-}
-
-pub struct ExactWordDocidsExtractor;
-impl ProtoWordDocidsExtractor for ExactWordDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        let exact_attributes = index.exact_attributes(rtxn)?;
-        // If there are no user-defined searchable fields, we return all exact attributes.
-        // Otherwise, we return the intersection of exact attributes and user-defined searchable fields.
-        if let Some(searchable_attributes) = index.user_defined_searchable_fields(rtxn)? {
-            let attributes = exact_attributes
-                .into_iter()
-                .filter(|attr| searchable_attributes.contains(attr))
-                .collect();
-            Ok(Some(attributes))
-        } else {
-            Ok(Some(exact_attributes))
-        }
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
-        Cow::Borrowed(word.as_bytes())
-    }
-}
-
-pub struct WordFidDocidsExtractor;
-impl ProtoWordDocidsExtractor for WordFidDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    fn build_key(field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
-        let mut key = Vec::new();
-        key.extend_from_slice(word.as_bytes());
-        key.push(0);
-        key.extend_from_slice(&field_id.to_be_bytes());
-        Cow::Owned(key)
-    }
-}
-
-pub struct WordPositionDocidsExtractor;
-impl ProtoWordDocidsExtractor for WordPositionDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    fn build_key(_field_id: FieldId, position: u16, word: &str) -> Cow<[u8]> {
-        // position must be bucketed to reduce the number of keys in the DB.
-        let position = bucketed_position(position);
-        let mut key = Vec::new();
-        key.extend_from_slice(word.as_bytes());
-        key.push(0);
-        key.extend_from_slice(&position.to_be_bytes());
-        Cow::Owned(key)
-    }
-}
-
-// V2
-
-struct WordDocidsCachedSorters {
-    word_fid_docids: CboCachedSorter,
-    word_docids: CboCachedSorter,
-    exact_word_docids: CboCachedSorter,
-    word_position_docids: CboCachedSorter,
-    fid_word_count_docids: CboCachedSorter,
-    fid_word_count: HashMap<FieldId, (usize, usize)>,
-    current_docid: Option<DocumentId>,
-}
-
-impl WordDocidsCachedSorters {
-    pub fn new(
-        indexer: GrenadParameters,
-        max_memory: Option<usize>,
-        capacity: NonZero<usize>,
-    ) -> Self {
-        let max_memory = max_memory.map(|max_memory| max_memory / 4);
-
-        let word_fid_docids = CboCachedSorter::new();
-        let word_docids = CboCachedSorter::new();
-        let exact_word_docids = CboCachedSorter::new();
-        let word_position_docids = CboCachedSorter::new();
-        let fid_word_count_docids = CboCachedSorter::new();
-
-        Self {
-            word_fid_docids,
-            word_docids,
-            exact_word_docids,
-            word_position_docids,
-            fid_word_count_docids,
-            fid_word_count: HashMap::new(),
-            current_docid: None,
-        }
-    }
-
-    fn insert_add_u32(
-        &mut self,
-        field_id: FieldId,
-        position: u16,
-        word: &str,
-        exact: bool,
-        docid: u32,
-        buffer: &mut Vec<u8>,
-    ) -> Result<()> {
-        let key = word.as_bytes();
-        if exact {
-            self.exact_word_docids.insert_add_u32(key, docid);
-        } else {
-            self.word_docids.insert_add_u32(key, docid);
-        }
-
-        buffer.clear();
-        buffer.extend_from_slice(word.as_bytes());
-        buffer.push(0);
-        buffer.extend_from_slice(&position.to_be_bytes());
-        self.word_fid_docids.insert_add_u32(buffer, docid);
-
-        buffer.clear();
-        buffer.extend_from_slice(word.as_bytes());
-        buffer.push(0);
-        buffer.extend_from_slice(&field_id.to_be_bytes());
-        self.word_position_docids.insert_add_u32(buffer, docid);
-
-        if self.current_docid.map_or(false, |id| docid != id) {
-            self.flush_fid_word_count(buffer)?;
-        }
-
-        self.fid_word_count
-            .entry(field_id)
-            .and_modify(|(_current_count, new_count)| *new_count += 1)
-            .or_insert((0, 1));
-        self.current_docid = Some(docid);
-
-        Ok(())
-    }
-
-    fn insert_del_u32(
-        &mut self,
-        field_id: FieldId,
-        position: u16,
-        word: &str,
-        exact: bool,
-        docid: u32,
-        buffer: &mut Vec<u8>,
-    ) -> Result<()> {
-        let key = word.as_bytes();
-        if exact {
-            self.exact_word_docids.insert_del_u32(key, docid);
-        } else {
-            self.word_docids.insert_del_u32(key, docid);
-        }
-
-        buffer.clear();
-        buffer.extend_from_slice(word.as_bytes());
-        buffer.push(0);
-        buffer.extend_from_slice(&position.to_be_bytes());
-        self.word_fid_docids.insert_del_u32(buffer, docid);
-
-        buffer.clear();
-        buffer.extend_from_slice(word.as_bytes());
-        buffer.push(0);
-        buffer.extend_from_slice(&field_id.to_be_bytes());
-        self.word_position_docids.insert_del_u32(buffer, docid);
-
-        if self.current_docid.map_or(false, |id| docid != id) {
-            self.flush_fid_word_count(buffer)?;
-        }
-
-        self.fid_word_count
-            .entry(field_id)
-            .and_modify(|(current_count, _new_count)| *current_count += 1)
-            .or_insert((1, 0));
-        self.current_docid = Some(docid);
-
-        Ok(())
-    }
-
-    fn flush_fid_word_count(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
-        for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
-            if current_count != new_count {
-                if current_count <= MAX_COUNTED_WORDS {
-                    buffer.clear();
-                    buffer.extend_from_slice(&fid.to_be_bytes());
-                    buffer.push(current_count as u8);
-                    self.fid_word_count_docids.insert_del_u32(buffer, self.current_docid.unwrap());
-                }
-                if new_count <= MAX_COUNTED_WORDS {
-                    buffer.clear();
-                    buffer.extend_from_slice(&fid.to_be_bytes());
-                    buffer.push(new_count as u8);
-                    self.fid_word_count_docids.insert_add_u32(buffer, self.current_docid.unwrap());
-                }
-            }
-        }
-
-        Ok(())
-    }
-}
-
-struct WordDocidsMergerBuilders {
-    word_fid_docids: HashMapMerger,
-    word_docids: HashMapMerger,
-    exact_word_docids: HashMapMerger,
-    word_position_docids: HashMapMerger,
-    fid_word_count_docids: HashMapMerger,
-}
-
-pub struct WordDocidsMergers {
-    pub word_fid_docids: HashMapMerger,
-    pub word_docids: HashMapMerger,
-    pub exact_word_docids: HashMapMerger,
-    pub word_position_docids: HashMapMerger,
-    pub fid_word_count_docids: HashMapMerger,
-}
-
-impl WordDocidsMergerBuilders {
-    fn new() -> Self {
-        Self {
-            word_fid_docids: HashMapMerger::new(),
-            word_docids: HashMapMerger::new(),
-            exact_word_docids: HashMapMerger::new(),
-            word_position_docids: HashMapMerger::new(),
-            fid_word_count_docids: HashMapMerger::new(),
-        }
-    }
-
-    fn add_sorters(&mut self, other: WordDocidsCachedSorters) -> Result<()> {
-        let WordDocidsCachedSorters {
-            word_fid_docids,
-            word_docids,
-            exact_word_docids,
-            word_position_docids,
-            fid_word_count_docids,
-            fid_word_count: _,
-            current_docid: _,
-        } = other;
-
-        let mut word_fid_docids_readers = HashMap::new();
-        let mut word_docids_readers = HashMap::new();
-        let mut exact_word_docids_readers = HashMap::new();
-        let mut word_position_docids_readers = HashMap::new();
-        let mut fid_word_count_docids_readers = HashMap::new();
-        rayon::scope(|s| {
-            s.spawn(|_| {
-                word_fid_docids_readers = word_fid_docids.into_sorter();
-            });
-            s.spawn(|_| {
-                word_docids_readers = word_docids.into_sorter();
-            });
-            s.spawn(|_| {
-                exact_word_docids_readers = exact_word_docids.into_sorter();
-            });
-            s.spawn(|_| {
-                word_position_docids_readers = word_position_docids.into_sorter();
-            });
-            s.spawn(|_| {
-                fid_word_count_docids_readers = fid_word_count_docids.into_sorter();
-            });
-        });
-        self.word_fid_docids.extend([word_fid_docids_readers]);
-        self.word_docids.extend([word_docids_readers]);
-        self.exact_word_docids.extend([exact_word_docids_readers]);
-        self.word_position_docids.extend([word_position_docids_readers]);
-        self.fid_word_count_docids.extend([fid_word_count_docids_readers]);
-
-        Ok(())
-    }
-
-    fn build(self) -> WordDocidsMergers {
-        WordDocidsMergers {
-            word_fid_docids: self.word_fid_docids,
-            word_docids: self.word_docids,
-            exact_word_docids: self.exact_word_docids,
-            word_position_docids: self.word_position_docids,
-            fid_word_count_docids: self.fid_word_count_docids,
-        }
-    }
-}
-
-pub struct WordDocidsExtractors;
-
-impl WordDocidsExtractors {
-    pub fn run_extraction(
-        index: &Index,
-        fields_ids_map: &GlobalFieldsIdsMap,
-        indexer: GrenadParameters,
-        document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
-    ) -> Result<WordDocidsMergers> {
-        let max_memory = indexer.max_memory_by_thread();
-
-        let rtxn = index.read_txn()?;
-        let stop_words = index.stop_words(&rtxn)?;
-        let allowed_separators = index.allowed_separators(&rtxn)?;
-        let allowed_separators: Option<Vec<_>> =
-            allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let dictionary = index.dictionary(&rtxn)?;
-        let dictionary: Option<Vec<_>> =
-            dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let builder = tokenizer_builder(
-            stop_words.as_ref(),
-            allowed_separators.as_deref(),
-            dictionary.as_deref(),
-        );
-        let tokenizer = builder.into_tokenizer();
-
-        let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
-        let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?;
-        let localized_attributes_rules =
-            index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
-
-        let document_tokenizer = DocumentTokenizer {
-            tokenizer: &tokenizer,
-            attribute_to_extract: attributes_to_extract.as_deref(),
-            attribute_to_skip: attributes_to_skip.as_slice(),
-            localized_attributes_rules: &localized_attributes_rules,
-            max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
-        };
-
-        let context_pool = ItemsPool::new(|| {
-            Ok((
-                index.read_txn()?,
-                &document_tokenizer,
-                fields_ids_map.clone(),
-                WordDocidsCachedSorters::new(
-                    indexer,
-                    max_memory,
-                    // TODO use a better value
-                    200_000.try_into().unwrap(),
-                ),
-            ))
-        });
-
-        {
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
-            let _entered = span.enter();
-            document_changes.into_par_iter().try_for_each(|document_change| {
-                context_pool.with(|(rtxn, document_tokenizer, fields_ids_map, cached_sorter)| {
-                    Self::extract_document_change(
-                        &*rtxn,
-                        index,
-                        document_tokenizer,
-                        fields_ids_map,
-                        cached_sorter,
-                        document_change?,
-                    )
-                })
-            })?;
-        }
-
-        {
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
-            let _entered = span.enter();
-            let mut builder = WordDocidsMergerBuilders::new();
-            for (_rtxn, _tokenizer, _fields_ids_map, cache) in context_pool.into_items() {
-                builder.add_sorters(cache)?;
-            }
-
-            Ok(builder.build())
-        }
-    }
-
-    fn extract_document_change(
-        rtxn: &RoTxn,
-        index: &Index,
-        document_tokenizer: &DocumentTokenizer,
-        fields_ids_map: &mut GlobalFieldsIdsMap,
-        cached_sorter: &mut WordDocidsCachedSorters,
-        document_change: DocumentChange,
-    ) -> Result<()> {
-        let exact_attributes = index.exact_attributes(rtxn)?;
-        let is_exact_attribute =
-            |fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr));
-        let mut buffer = Vec::new();
-        match document_change {
-            DocumentChange::Deletion(inner) => {
-                let mut token_fn = |fname: &str, fid, pos, word: &str| {
-                    cached_sorter
-                        .insert_del_u32(
-                            fid,
-                            pos,
-                            word,
-                            is_exact_attribute(fname),
-                            inner.docid(),
-                            &mut buffer,
-                        )
-                        .map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-            }
-            DocumentChange::Update(inner) => {
-                let mut token_fn = |fname: &str, fid, pos, word: &str| {
-                    cached_sorter
-                        .insert_del_u32(
-                            fid,
-                            pos,
-                            word,
-                            is_exact_attribute(fname),
-                            inner.docid(),
-                            &mut buffer,
-                        )
-                        .map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-
-                let mut token_fn = |fname: &str, fid, pos, word: &str| {
-                    cached_sorter
-                        .insert_add_u32(
-                            fid,
-                            pos,
-                            word,
-                            is_exact_attribute(fname),
-                            inner.docid(),
-                            &mut buffer,
-                        )
-                        .map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-            }
-            DocumentChange::Insertion(inner) => {
-                let mut token_fn = |fname: &str, fid, pos, word: &str| {
-                    cached_sorter
-                        .insert_add_u32(
-                            fid,
-                            pos,
-                            word,
-                            is_exact_attribute(fname),
-                            inner.docid(),
-                            &mut buffer,
-                        )
-                        .map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-            }
-        }
-
-        cached_sorter.flush_fid_word_count(&mut buffer)
-    }
-
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-}
--- a/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@ -1,160 +0,0 @@
-use std::collections::VecDeque;
-use std::rc::Rc;
-
-use heed::RoTxn;
-use obkv::KvReader;
-
-use super::tokenize_document::DocumentTokenizer;
-use super::SearchableExtractor;
-use crate::proximity::{index_proximity, MAX_DISTANCE};
-use crate::update::new::extract::cache::CboCachedSorter;
-use crate::update::new::DocumentChange;
-use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
-
-pub struct WordPairProximityDocidsExtractor;
-impl SearchableExtractor for WordPairProximityDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    // This method is reimplemented to count the number of words in the document in each field
-    // and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS.
-    fn extract_document_change(
-        rtxn: &RoTxn,
-        index: &Index,
-        document_tokenizer: &DocumentTokenizer,
-        fields_ids_map: &mut GlobalFieldsIdsMap,
-        cached_sorter: &mut CboCachedSorter,
-        document_change: DocumentChange,
-    ) -> Result<()> {
-        let mut key_buffer = Vec::new();
-        let mut del_word_pair_proximity = Vec::new();
-        let mut add_word_pair_proximity = Vec::new();
-        let mut word_positions: VecDeque<(Rc<str>, u16)> =
-            VecDeque::with_capacity(MAX_DISTANCE as usize);
-
-        let docid = document_change.docid();
-        match document_change {
-            DocumentChange::Deletion(inner) => {
-                let document = inner.current(rtxn, index)?.unwrap();
-                process_document_tokens(
-                    document,
-                    document_tokenizer,
-                    fields_ids_map,
-                    &mut word_positions,
-                    &mut |(w1, w2), prox| {
-                        del_word_pair_proximity.push(((w1, w2), prox));
-                    },
-                )?;
-            }
-            DocumentChange::Update(inner) => {
-                let document = inner.current(rtxn, index)?.unwrap();
-                process_document_tokens(
-                    document,
-                    document_tokenizer,
-                    fields_ids_map,
-                    &mut word_positions,
-                    &mut |(w1, w2), prox| {
-                        del_word_pair_proximity.push(((w1, w2), prox));
-                    },
-                )?;
-                let document = inner.new();
-                process_document_tokens(
-                    document,
-                    document_tokenizer,
-                    fields_ids_map,
-                    &mut word_positions,
-                    &mut |(w1, w2), prox| {
-                        add_word_pair_proximity.push(((w1, w2), prox));
-                    },
-                )?;
-            }
-            DocumentChange::Insertion(inner) => {
-                let document = inner.new();
-                process_document_tokens(
-                    document,
-                    document_tokenizer,
-                    fields_ids_map,
-                    &mut word_positions,
-                    &mut |(w1, w2), prox| {
-                        add_word_pair_proximity.push(((w1, w2), prox));
-                    },
-                )?;
-            }
-        }
-
-        del_word_pair_proximity.sort_unstable();
-        del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
-        for ((w1, w2), prox) in del_word_pair_proximity.iter() {
-            let key = build_key(*prox, w1, w2, &mut key_buffer);
-            cached_sorter.insert_del_u32(key, docid);
-        }
-
-        add_word_pair_proximity.sort_unstable();
-        add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
-        for ((w1, w2), prox) in add_word_pair_proximity.iter() {
-            let key = build_key(*prox, w1, w2, &mut key_buffer);
-            cached_sorter.insert_add_u32(key, docid);
-        }
-        Ok(())
-    }
-}
-
-fn build_key<'a>(prox: u8, w1: &str, w2: &str, key_buffer: &'a mut Vec<u8>) -> &'a [u8] {
-    key_buffer.clear();
-    key_buffer.push(prox);
-    key_buffer.extend_from_slice(w1.as_bytes());
-    key_buffer.push(0);
-    key_buffer.extend_from_slice(w2.as_bytes());
-    key_buffer.as_slice()
-}
-
-fn word_positions_into_word_pair_proximity(
-    word_positions: &mut VecDeque<(Rc<str>, u16)>,
-    word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
-) -> Result<()> {
-    let (head_word, head_position) = word_positions.pop_front().unwrap();
-    for (word, position) in word_positions.iter() {
-        let prox = index_proximity(head_position as u32, *position as u32) as u8;
-        if prox > 0 && prox < MAX_DISTANCE as u8 {
-            word_pair_proximity((head_word.clone(), word.clone()), prox);
-        }
-    }
-    Ok(())
-}
-
-fn process_document_tokens(
-    document: &KvReader<FieldId>,
-    document_tokenizer: &DocumentTokenizer,
-    fields_ids_map: &mut GlobalFieldsIdsMap,
-    word_positions: &mut VecDeque<(Rc<str>, u16)>,
-    word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
-) -> Result<()> {
-    let mut token_fn = |_fname: &str, _fid: FieldId, pos: u16, word: &str| {
-        // drain the proximity window until the head word is considered close to the word we are inserting.
-        while word_positions
-            .front()
-            .map_or(false, |(_w, p)| index_proximity(*p as u32, pos as u32) >= MAX_DISTANCE)
-        {
-            word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?;
-        }
-
-        // insert the new word.
-        word_positions.push_back((Rc::from(word), pos));
-        Ok(())
-    };
-    document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
-
-    while !word_positions.is_empty() {
-        word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?;
-    }
-
-    Ok(())
-}
--- a/milli/src/update/new/extract/searchable/mod.rs
+++ b/milli/src/update/new/extract/searchable/mod.rs
@ -1,126 +0,0 @@
-mod extract_fid_word_count_docids;
-mod extract_word_docids;
-mod extract_word_pair_proximity_docids;
-mod tokenize_document;
-
-use std::fs::File;
-
-pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
-pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
-use grenad::Merger;
-use heed::RoTxn;
-use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
-use tokenize_document::{tokenizer_builder, DocumentTokenizer};
-
-use super::cache::CboCachedSorter;
-use super::{DocidsExtractor, HashMapMerger};
-use crate::update::new::{DocumentChange, ItemsPool};
-use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
-use crate::{GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
-
-pub trait SearchableExtractor {
-    fn run_extraction(
-        index: &Index,
-        fields_ids_map: &GlobalFieldsIdsMap,
-        indexer: GrenadParameters,
-        document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
-    ) -> Result<HashMapMerger> {
-        let max_memory = indexer.max_memory_by_thread();
-
-        let rtxn = index.read_txn()?;
-        let stop_words = index.stop_words(&rtxn)?;
-        let allowed_separators = index.allowed_separators(&rtxn)?;
-        let allowed_separators: Option<Vec<_>> =
-            allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let dictionary = index.dictionary(&rtxn)?;
-        let dictionary: Option<Vec<_>> =
-            dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let builder = tokenizer_builder(
-            stop_words.as_ref(),
-            allowed_separators.as_deref(),
-            dictionary.as_deref(),
-        );
-        let tokenizer = builder.into_tokenizer();
-
-        let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
-        let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?;
-        let localized_attributes_rules =
-            index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
-
-        let document_tokenizer = DocumentTokenizer {
-            tokenizer: &tokenizer,
-            attribute_to_extract: attributes_to_extract.as_deref(),
-            attribute_to_skip: attributes_to_skip.as_slice(),
-            localized_attributes_rules: &localized_attributes_rules,
-            max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
-        };
-
-        let context_pool = ItemsPool::new(|| {
-            Ok((
-                index.read_txn()?,
-                &document_tokenizer,
-                fields_ids_map.clone(),
-                CboCachedSorter::new(),
-            ))
-        });
-
-        {
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
-            let _entered = span.enter();
-            document_changes.into_par_iter().try_for_each(|document_change| {
-                context_pool.with(|(rtxn, document_tokenizer, fields_ids_map, cached_sorter)| {
-                    Self::extract_document_change(
-                        &*rtxn,
-                        index,
-                        document_tokenizer,
-                        fields_ids_map,
-                        cached_sorter,
-                        document_change?,
-                    )
-                })
-            })?;
-        }
-        {
-            let mut builder = HashMapMerger::new();
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
-            let _entered = span.enter();
-
-            let readers: Vec<_> = context_pool
-                .into_items()
-                .par_bridge()
-                .map(|(_rtxn, _tokenizer, _fields_ids_map, cached_sorter)| {
-                    cached_sorter.into_sorter()
-                })
-                .collect();
-            builder.extend(readers);
-            Ok(builder)
-        }
-    }
-
-    fn extract_document_change(
-        rtxn: &RoTxn,
-        index: &Index,
-        document_tokenizer: &DocumentTokenizer,
-        fields_ids_map: &mut GlobalFieldsIdsMap,
-        cached_sorter: &mut CboCachedSorter,
-        document_change: DocumentChange,
-    ) -> Result<()>;
-
-    fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index)
-        -> Result<Option<Vec<&'a str>>>;
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
-}
-
-impl<T: SearchableExtractor> DocidsExtractor for T {
-    fn run_extraction(
-        index: &Index,
-        fields_ids_map: &GlobalFieldsIdsMap,
-        indexer: GrenadParameters,
-        document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
-    ) -> Result<HashMapMerger> {
-        Self::run_extraction(index, fields_ids_map, indexer, document_changes)
-    }
-}
--- a/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/milli/src/update/new/extract/searchable/tokenize_document.rs
@ -1,273 +0,0 @@
-use std::collections::HashMap;
-
-use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
-use serde_json::Value;
-
-use crate::proximity::MAX_DISTANCE;
-use crate::update::new::extract::perm_json_p::{
-    seek_leaf_values_in_array, seek_leaf_values_in_object, select_field,
-};
-use crate::update::new::KvReaderFieldId;
-use crate::{
-    FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
-    MAX_WORD_LENGTH,
-};
-
-pub struct DocumentTokenizer<'a> {
-    pub tokenizer: &'a Tokenizer<'a>,
-    pub attribute_to_extract: Option<&'a [&'a str]>,
-    pub attribute_to_skip: &'a [&'a str],
-    pub localized_attributes_rules: &'a [LocalizedAttributesRule],
-    pub max_positions_per_attributes: u32,
-}
-
-impl<'a> DocumentTokenizer<'a> {
-    pub fn tokenize_document(
-        &self,
-        obkv: &KvReaderFieldId,
-        field_id_map: &mut GlobalFieldsIdsMap,
-        token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
-    ) -> Result<()> {
-        let mut field_position = HashMap::new();
-        let mut field_name = String::new();
-        for (field_id, field_bytes) in obkv {
-            let Some(field_name) = field_id_map.name(field_id).map(|s| {
-                field_name.clear();
-                field_name.push_str(s);
-                &field_name
-            }) else {
-                unreachable!("field id not found in field id map");
-            };
-
-            let mut tokenize_field = |name: &str, value: &Value| {
-                let Some(field_id) = field_id_map.id_or_insert(name) else {
-                    return Err(UserError::AttributeLimitReached.into());
-                };
-
-                let position = field_position
-                    .entry(field_id)
-                    .and_modify(|counter| *counter += MAX_DISTANCE)
-                    .or_insert(0);
-                if *position as u32 >= self.max_positions_per_attributes {
-                    return Ok(());
-                }
-
-                match value {
-                    Value::Number(n) => {
-                        let token = n.to_string();
-                        if let Ok(position) = (*position).try_into() {
-                            token_fn(name, field_id, position, token.as_str())?;
-                        }
-
-                        Ok(())
-                    }
-                    Value::String(text) => {
-                        // create an iterator of token with their positions.
-                        let locales = self
-                            .localized_attributes_rules
-                            .iter()
-                            .find(|rule| rule.match_str(field_name))
-                            .map(|rule| rule.locales());
-                        let tokens = process_tokens(
-                            *position,
-                            self.tokenizer.tokenize_with_allow_list(text.as_str(), locales),
-                        )
-                        .take_while(|(p, _)| (*p as u32) < self.max_positions_per_attributes);
-
-                        for (index, token) in tokens {
-                            // keep a word only if it is not empty and fit in a LMDB key.
-                            let token = token.lemma().trim();
-                            if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
-                                *position = index;
-                                if let Ok(position) = (*position).try_into() {
-                                    token_fn(name, field_id, position, token)?;
-                                }
-                            }
-                        }
-
-                        Ok(())
-                    }
-                    _ => Ok(()),
-                }
-            };
-
-            // if the current field is searchable or contains a searchable attribute
-            if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip) {
-                // parse json.
-                match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
-                    Value::Object(object) => seek_leaf_values_in_object(
-                        &object,
-                        self.attribute_to_extract,
-                        self.attribute_to_skip,
-                        field_name,
-                        &mut tokenize_field,
-                    )?,
-                    Value::Array(array) => seek_leaf_values_in_array(
-                        &array,
-                        self.attribute_to_extract,
-                        self.attribute_to_skip,
-                        field_name,
-                        &mut tokenize_field,
-                    )?,
-                    value => tokenize_field(field_name, &value)?,
-                }
-            }
-        }
-
-        Ok(())
-    }
-}
-
-/// take an iterator on tokens and compute their relative position depending on separator kinds
-/// if it's an `Hard` separator we add an additional relative proximity of MAX_DISTANCE between words,
-/// else we keep the standard proximity of 1 between words.
-fn process_tokens<'a>(
-    start_offset: u32,
-    tokens: impl Iterator<Item = Token<'a>>,
-) -> impl Iterator<Item = (u32, Token<'a>)> {
-    tokens
-        .skip_while(|token| token.is_separator())
-        .scan((start_offset, None), |(offset, prev_kind), mut token| {
-            match token.kind {
-                TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
-                    *offset += match *prev_kind {
-                        Some(TokenKind::Separator(SeparatorKind::Hard)) => MAX_DISTANCE,
-                        Some(_) => 1,
-                        None => 0,
-                    };
-                    *prev_kind = Some(token.kind)
-                }
-                TokenKind::Separator(SeparatorKind::Hard) => {
-                    *prev_kind = Some(token.kind);
-                }
-                TokenKind::Separator(SeparatorKind::Soft)
-                    if *prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) =>
-                {
-                    *prev_kind = Some(token.kind);
-                }
-                _ => token.kind = TokenKind::Unknown,
-            }
-            Some((*offset, token))
-        })
-        .filter(|(_, t)| t.is_word())
-}
-
-/// Factorize tokenizer building.
-pub fn tokenizer_builder<'a>(
-    stop_words: Option<&'a fst::Set<&'a [u8]>>,
-    allowed_separators: Option<&'a [&str]>,
-    dictionary: Option<&'a [&str]>,
-) -> TokenizerBuilder<'a, &'a [u8]> {
-    let mut tokenizer_builder = TokenizerBuilder::new();
-    if let Some(stop_words) = stop_words {
-        tokenizer_builder.stop_words(stop_words);
-    }
-    if let Some(dictionary) = dictionary {
-        tokenizer_builder.words_dict(dictionary);
-    }
-    if let Some(separators) = allowed_separators {
-        tokenizer_builder.separators(separators);
-    }
-
-    tokenizer_builder
-}
-
-#[cfg(test)]
-mod test {
-    use charabia::TokenizerBuilder;
-    use meili_snap::snapshot;
-    use obkv::KvReader;
-    use serde_json::json;
-
-    use super::*;
-    use crate::FieldsIdsMap;
-
-    #[test]
-    fn test_tokenize_document() {
-        let mut fields_ids_map = FieldsIdsMap::new();
-
-        let field_1 = json!({
-                "name": "doggo",
-                "age": 10,
-        });
-
-        let field_2 = json!({
-                "catto": {
-                    "name": "pesti",
-                    "age": 23,
-                }
-        });
-
-        let field_3 = json!(["doggo", "catto"]);
-        let field_4 = json!("UNSEARCHABLE");
-        let field_5 = json!({"nope": "unsearchable"});
-
-        let mut obkv = obkv::KvWriter::memory();
-        let field_1_id = fields_ids_map.insert("doggo").unwrap();
-        let field_1 = serde_json::to_string(&field_1).unwrap();
-        obkv.insert(field_1_id, field_1.as_bytes()).unwrap();
-        let field_2_id = fields_ids_map.insert("catto").unwrap();
-        let field_2 = serde_json::to_string(&field_2).unwrap();
-        obkv.insert(field_2_id, field_2.as_bytes()).unwrap();
-        let field_3_id = fields_ids_map.insert("doggo.name").unwrap();
-        let field_3 = serde_json::to_string(&field_3).unwrap();
-        obkv.insert(field_3_id, field_3.as_bytes()).unwrap();
-        let field_4_id = fields_ids_map.insert("not-me").unwrap();
-        let field_4 = serde_json::to_string(&field_4).unwrap();
-        obkv.insert(field_4_id, field_4.as_bytes()).unwrap();
-        let field_5_id = fields_ids_map.insert("me-nether").unwrap();
-        let field_5 = serde_json::to_string(&field_5).unwrap();
-        obkv.insert(field_5_id, field_5.as_bytes()).unwrap();
-        let value = obkv.into_inner().unwrap();
-        let obkv = KvReader::from_slice(value.as_slice());
-
-        let mut tb = TokenizerBuilder::default();
-        let document_tokenizer = DocumentTokenizer {
-            tokenizer: &tb.build(),
-            attribute_to_extract: None,
-            attribute_to_skip: &["not-me", "me-nether.nope"],
-            localized_attributes_rules: &[],
-            max_positions_per_attributes: 1000,
-        };
-
-        let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);
-        let mut global_fields_ids_map = GlobalFieldsIdsMap::new(&fields_ids_map_lock);
-
-        let mut words = std::collections::BTreeMap::new();
-        document_tokenizer
-            .tokenize_document(obkv, &mut global_fields_ids_map, &mut |_fname, fid, pos, word| {
-                words.insert([fid, pos], word.to_string());
-                Ok(())
-            })
-            .unwrap();
-
-        snapshot!(format!("{:#?}", words), @r###"
-        {
-            [
-                2,
-                0,
-            ]: "doggo",
-            [
-                2,
-                MAX_DISTANCE,
-            ]: "doggo",
-            [
-                2,
-                16,
-            ]: "catto",
-            [
-                3,
-                0,
-            ]: "10",
-            [
-                4,
-                0,
-            ]: "pesti",
-            [
-                5,
-                0,
-            ]: "23",
-        }
-        "###);
-    }
-}
--- a/milli/src/update/new/indexer/document_deletion.rs
+++ b/milli/src/update/new/indexer/document_deletion.rs
@ -1,42 +0,0 @@
-use std::sync::Arc;
-
-use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
-use roaring::RoaringBitmap;
-
-use super::DocumentChanges;
-use crate::update::new::{Deletion, DocumentChange, ItemsPool};
-use crate::{FieldsIdsMap, Index, Result};
-
-pub struct DocumentDeletion {
-    pub to_delete: RoaringBitmap,
-}
-
-impl DocumentDeletion {
-    pub fn new() -> Self {
-        Self { to_delete: Default::default() }
-    }
-
-    pub fn delete_documents_by_docids(&mut self, docids: RoaringBitmap) {
-        self.to_delete |= docids;
-    }
-}
-
-impl<'p> DocumentChanges<'p> for DocumentDeletion {
-    type Parameter = &'p Index;
-
-    fn document_changes(
-        self,
-        _fields_ids_map: &mut FieldsIdsMap,
-        param: Self::Parameter,
-    ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
-        let index = param;
-        let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from)));
-        let to_delete: Vec<_> = self.to_delete.into_iter().collect();
-        Ok(to_delete.into_par_iter().map_with(items, |items, docid| {
-            items.with(|rtxn| {
-                let current = index.document(rtxn, docid)?;
-                Ok(DocumentChange::Deletion(Deletion::create(docid, current.boxed())))
-            })
-        }))
-    }
-}
--- a/milli/src/update/new/indexer/document_operation.rs
+++ b/milli/src/update/new/indexer/document_operation.rs
@ -1,392 +0,0 @@
-use std::borrow::Cow;
-use std::collections::{BTreeMap, HashMap};
-use std::sync::Arc;
-
-use heed::types::Bytes;
-use heed::RoTxn;
-use memmap2::Mmap;
-use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
-use IndexDocumentsMethod as Idm;
-
-use super::super::document_change::DocumentChange;
-use super::super::items_pool::ItemsPool;
-use super::super::{CowStr, TopLevelMap};
-use super::DocumentChanges;
-use crate::documents::{DocumentIdExtractionError, PrimaryKey};
-use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, Update};
-use crate::update::{AvailableIds, IndexDocumentsMethod};
-use crate::{DocumentId, Error, FieldsIdsMap, Index, Result, UserError};
-
-pub struct DocumentOperation<'pl> {
-    operations: Vec<Payload<'pl>>,
-    index_documents_method: IndexDocumentsMethod,
-}
-
-pub enum Payload<'pl> {
-    Addition(&'pl [u8]),
-    Deletion(Vec<String>),
-}
-
-pub struct PayloadStats {
-    pub document_count: usize,
-    pub bytes: u64,
-}
-
-#[derive(Clone)]
-enum InnerDocOp<'pl> {
-    Addition(DocumentOffset<'pl>),
-    Deletion,
-}
-
-/// Represents an offset where a document lives
-/// in an mmapped grenad reader file.
-#[derive(Clone)]
-pub struct DocumentOffset<'pl> {
-    /// The mmapped payload files.
-    pub content: &'pl [u8],
-}
-
-impl<'pl> DocumentOperation<'pl> {
-    pub fn new(method: IndexDocumentsMethod) -> Self {
-        Self { operations: Default::default(), index_documents_method: method }
-    }
-
-    /// TODO please give me a type
-    /// The payload is expected to be in the grenad format
-    pub fn add_documents(&mut self, payload: &'pl Mmap) -> Result<PayloadStats> {
-        payload.advise(memmap2::Advice::Sequential)?;
-        let document_count =
-            memchr::memmem::find_iter(&payload[..], "}{").count().saturating_add(1);
-        self.operations.push(Payload::Addition(&payload[..]));
-        Ok(PayloadStats { bytes: payload.len() as u64, document_count })
-    }
-
-    pub fn delete_documents(&mut self, to_delete: Vec<String>) {
-        self.operations.push(Payload::Deletion(to_delete))
-    }
-}
-
-impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
-    type Parameter = (&'p Index, &'p RoTxn<'p>, &'p PrimaryKey<'p>);
-
-    fn document_changes(
-        self,
-        fields_ids_map: &mut FieldsIdsMap,
-        param: Self::Parameter,
-    ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
-        let (index, rtxn, primary_key) = param;
-
-        let documents_ids = index.documents_ids(rtxn)?;
-        let mut available_docids = AvailableIds::new(&documents_ids);
-        let mut docids_version_offsets = HashMap::<CowStr<'pl>, _>::new();
-
-        for operation in self.operations {
-            match operation {
-                Payload::Addition(payload) => {
-                    let mut iter =
-                        serde_json::Deserializer::from_slice(payload).into_iter::<TopLevelMap>();
-
-                    /// TODO manage the error
-                    let mut previous_offset = 0;
-                    while let Some(document) = iter.next().transpose().unwrap() {
-                        // TODO Fetch all document fields to fill the fields ids map
-                        document.0.keys().for_each(|key| {
-                            fields_ids_map.insert(key.as_ref());
-                        });
-
-                        // TODO we must manage the TooManyDocumentIds,InvalidDocumentId
-                        //      we must manage the unwrap
-                        let external_document_id =
-                            match primary_key.document_id_from_top_level_map(&document)? {
-                                Ok(document_id) => Ok(document_id),
-                                Err(DocumentIdExtractionError::InvalidDocumentId(e)) => Err(e),
-                                Err(DocumentIdExtractionError::MissingDocumentId) => {
-                                    Err(UserError::MissingDocumentId {
-                                        primary_key: primary_key.name().to_string(),
-                                        document: document.try_into().unwrap(),
-                                    })
-                                }
-                                Err(DocumentIdExtractionError::TooManyDocumentIds(_)) => {
-                                    Err(UserError::TooManyDocumentIds {
-                                        primary_key: primary_key.name().to_string(),
-                                        document: document.try_into().unwrap(),
-                                    })
-                                }
-                            }?;
-
-                        let current_offset = iter.byte_offset();
-                        let document_operation = InnerDocOp::Addition(DocumentOffset {
-                            content: &payload[previous_offset..current_offset],
-                        });
-
-                        match docids_version_offsets.get_mut(external_document_id.as_ref()) {
-                            None => {
-                                let docid = match index
-                                    .external_documents_ids()
-                                    .get(rtxn, &external_document_id)?
-                                {
-                                    Some(docid) => docid,
-                                    None => available_docids
-                                        .next()
-                                        .ok_or(Error::UserError(UserError::DocumentLimitReached))?,
-                                };
-
-                                docids_version_offsets.insert(
-                                    external_document_id,
-                                    (docid, vec![document_operation]),
-                                );
-                            }
-                            Some((_, offsets)) => {
-                                let useless_previous_addition = match self.index_documents_method {
-                                    IndexDocumentsMethod::ReplaceDocuments => {
-                                        MergeDocumentForReplacement::USELESS_PREVIOUS_CHANGES
-                                    }
-                                    IndexDocumentsMethod::UpdateDocuments => {
-                                        MergeDocumentForUpdates::USELESS_PREVIOUS_CHANGES
-                                    }
-                                };
-
-                                if useless_previous_addition {
-                                    offsets.clear();
-                                }
-
-                                offsets.push(document_operation);
-                            }
-                        }
-
-                        previous_offset = iter.byte_offset();
-                    }
-                }
-                Payload::Deletion(to_delete) => {
-                    for external_document_id in to_delete {
-                        match docids_version_offsets.get_mut(external_document_id.as_str()) {
-                            None => {
-                                let docid = match index
-                                    .external_documents_ids()
-                                    .get(rtxn, &external_document_id)?
-                                {
-                                    Some(docid) => docid,
-                                    None => available_docids
-                                        .next()
-                                        .ok_or(Error::UserError(UserError::DocumentLimitReached))?,
-                                };
-
-                                docids_version_offsets.insert(
-                                    CowStr(external_document_id.into()),
-                                    (docid, vec![InnerDocOp::Deletion]),
-                                );
-                            }
-                            Some((_, offsets)) => {
-                                offsets.clear();
-                                offsets.push(InnerDocOp::Deletion);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        /// TODO is it the best way to provide FieldsIdsMap to the parallel iterator?
-        let fields_ids_map = fields_ids_map.clone();
-        // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
-        let mut docids_version_offsets: Vec<_> = docids_version_offsets.drain().collect();
-        // Reorder the offsets to make sure we iterate on the file sequentially
-        let sort_function_key = match self.index_documents_method {
-            Idm::ReplaceDocuments => MergeDocumentForReplacement::sort_key,
-            Idm::UpdateDocuments => MergeDocumentForUpdates::sort_key,
-        };
-
-        // And finally sort them
-        docids_version_offsets.sort_unstable_by_key(|(_, (_, docops))| sort_function_key(docops));
-
-        Ok(docids_version_offsets.into_par_iter().map_with(
-            Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))),
-            move |context_pool, (external_docid, (internal_docid, operations))| {
-                context_pool.with(|rtxn| {
-                    let document_merge_function = match self.index_documents_method {
-                        Idm::ReplaceDocuments => MergeDocumentForReplacement::merge,
-                        Idm::UpdateDocuments => MergeDocumentForUpdates::merge,
-                    };
-
-                    document_merge_function(
-                        rtxn,
-                        index,
-                        &fields_ids_map,
-                        internal_docid,
-                        external_docid.to_string(), // TODO do not clone
-                        &operations,
-                    )
-                })
-            },
-        ))
-    }
-}
-
-trait MergeChanges {
-    /// Wether the payloads in the list of operations are useless or not.
-    const USELESS_PREVIOUS_CHANGES: bool;
-
-    /// Returns a key that is used to order the payloads the right way.
-    fn sort_key(docops: &[InnerDocOp]) -> usize;
-
-    fn merge(
-        rtxn: &RoTxn,
-        index: &Index,
-        fields_ids_map: &FieldsIdsMap,
-        docid: DocumentId,
-        external_docid: String,
-        operations: &[InnerDocOp],
-    ) -> Result<DocumentChange>;
-}
-
-struct MergeDocumentForReplacement;
-
-impl MergeChanges for MergeDocumentForReplacement {
-    const USELESS_PREVIOUS_CHANGES: bool = true;
-
-    /// Reorders to read only the last change.
-    fn sort_key(docops: &[InnerDocOp]) -> usize {
-        let f = |ido: &_| match ido {
-            InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize),
-            InnerDocOp::Deletion => None,
-        };
-        docops.iter().rev().find_map(f).unwrap_or(0)
-    }
-
-    /// Returns only the most recent version of a document based on the updates from the payloads.
-    ///
-    /// This function is only meant to be used when doing a replacement and not an update.
-    fn merge(
-        rtxn: &RoTxn,
-        index: &Index,
-        fields_ids_map: &FieldsIdsMap,
-        docid: DocumentId,
-        external_docid: String,
-        operations: &[InnerDocOp],
-    ) -> Result<DocumentChange> {
-        let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
-        let current: Option<&KvReaderFieldId> = current.map(Into::into);
-
-        match operations.last() {
-            Some(InnerDocOp::Addition(DocumentOffset { content })) => {
-                let map: TopLevelMap = serde_json::from_slice(content).unwrap();
-                let mut document_entries = Vec::new();
-                for (key, v) in map.0 {
-                    let id = fields_ids_map.id(key.as_ref()).unwrap();
-                    document_entries.push((id, v));
-                }
-
-                document_entries.sort_unstable_by_key(|(id, _)| *id);
-
-                let mut writer = KvWriterFieldId::memory();
-                document_entries
-                    .into_iter()
-                    .for_each(|(id, value)| writer.insert(id, value.get()).unwrap());
-                let new = writer.into_boxed();
-
-                match current {
-                    Some(current) => {
-                        let update = Update::create(docid, current.boxed(), new);
-                        Ok(DocumentChange::Update(update))
-                    }
-                    None => Ok(DocumentChange::Insertion(Insertion::create(docid, new))),
-                }
-            }
-            Some(InnerDocOp::Deletion) => {
-                let deletion = match current {
-                    Some(current) => Deletion::create(docid, current.boxed()),
-                    None => todo!("Do that with Louis"),
-                };
-                Ok(DocumentChange::Deletion(deletion))
-            }
-            None => unreachable!("We must not have empty set of operations on a document"),
-        }
-    }
-}
-
-struct MergeDocumentForUpdates;
-
-impl MergeChanges for MergeDocumentForUpdates {
-    const USELESS_PREVIOUS_CHANGES: bool = false;
-
-    /// Reorders to read the first changes first so that it's faster to read the first one and then the rest.
-    fn sort_key(docops: &[InnerDocOp]) -> usize {
-        let f = |ido: &_| match ido {
-            InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize),
-            InnerDocOp::Deletion => None,
-        };
-        docops.iter().find_map(f).unwrap_or(0)
-    }
-
-    /// Reads the previous version of a document from the database, the new versions
-    /// in the grenad update files and merges them to generate a new boxed obkv.
-    ///
-    /// This function is only meant to be used when doing an update and not a replacement.
-    fn merge(
-        rtxn: &RoTxn,
-        index: &Index,
-        fields_ids_map: &FieldsIdsMap,
-        docid: DocumentId,
-        external_docid: String,
-        operations: &[InnerDocOp],
-    ) -> Result<DocumentChange> {
-        let mut document = BTreeMap::<_, Cow<_>>::new();
-        let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
-        let current: Option<&KvReaderFieldId> = current.map(Into::into);
-
-        if operations.is_empty() {
-            unreachable!("We must not have empty set of operations on a document");
-        }
-
-        let last_deletion = operations.iter().rposition(|op| matches!(op, InnerDocOp::Deletion));
-        let operations = &operations[last_deletion.map_or(0, |i| i + 1)..];
-
-        // If there was a deletion we must not start
-        // from the original document but from scratch.
-        if last_deletion.is_none() {
-            if let Some(current) = current {
-                current.into_iter().for_each(|(k, v)| {
-                    document.insert(k, v.into());
-                });
-            }
-        }
-
-        if operations.is_empty() {
-            let deletion = match current {
-                Some(current) => Deletion::create(docid, current.boxed()),
-                None => todo!("Do that with Louis"),
-            };
-            return Ok(DocumentChange::Deletion(deletion));
-        }
-
-        for operation in operations {
-            let DocumentOffset { content } = match operation {
-                InnerDocOp::Addition(offset) => offset,
-                InnerDocOp::Deletion => {
-                    unreachable!("Deletion in document operations")
-                }
-            };
-
-            let map: TopLevelMap = serde_json::from_slice(content).unwrap();
-            for (key, v) in map.0 {
-                let id = fields_ids_map.id(key.as_ref()).unwrap();
-                document.insert(id, v.get().as_bytes().to_vec().into());
-            }
-        }
-
-        let mut writer = KvWriterFieldId::memory();
-        document.into_iter().for_each(|(id, value)| writer.insert(id, value).unwrap());
-        let new = writer.into_boxed();
-
-        match current {
-            Some(current) => {
-                let update = Update::create(docid, current.boxed(), new);
-                Ok(DocumentChange::Update(update))
-            }
-            None => {
-                let insertion = Insertion::create(docid, new);
-                Ok(DocumentChange::Insertion(insertion))
-            }
-        }
-    }
-}
--- a/milli/src/update/new/indexer/mod.rs
+++ b/milli/src/update/new/indexer/mod.rs
@ -1,316 +0,0 @@
-use std::sync::RwLock;
-use std::thread::{self, Builder};
-
-use big_s::S;
-pub use document_deletion::DocumentDeletion;
-pub use document_operation::DocumentOperation;
-use heed::{RoTxn, RwTxn};
-pub use partial_dump::PartialDump;
-use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
-use rayon::ThreadPool;
-pub use update_by_function::UpdateByFunction;
-
-use super::channel::*;
-use super::document_change::DocumentChange;
-use super::extract::*;
-use super::merger::merge_grenad_entries;
-use super::{ItemsPool, StdResult, TopLevelMap};
-use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
-use crate::update::new::channel::ExtractorSender;
-use crate::update::GrenadParameters;
-use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};
-
-mod document_deletion;
-mod document_operation;
-mod partial_dump;
-mod update_by_function;
-
-pub trait DocumentChanges<'p> {
-    type Parameter: 'p;
-
-    fn document_changes(
-        self,
-        fields_ids_map: &mut FieldsIdsMap,
-        param: Self::Parameter,
-    ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p>;
-}
-
-/// This is the main function of this crate.
-///
-/// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`].
-///
-/// TODO return stats
-pub fn index<PI>(
-    wtxn: &mut RwTxn,
-    index: &Index,
-    fields_ids_map: FieldsIdsMap,
-    pool: &ThreadPool,
-    document_changes: PI,
-) -> Result<()>
-where
-    PI: IndexedParallelIterator<Item = Result<DocumentChange>> + Send + Clone,
-{
-    let (merger_sender, writer_receiver) = merger_writer_channel(10_000);
-    // This channel acts as a rendezvous point to ensure that we are one task ahead
-    let (extractor_sender, merger_receiver) = extractors_merger_channels(4);
-
-    let fields_ids_map_lock = RwLock::new(fields_ids_map);
-    let global_fields_ids_map = GlobalFieldsIdsMap::new(&fields_ids_map_lock);
-    let global_fields_ids_map_clone = global_fields_ids_map.clone();
-
-    thread::scope(|s| {
-        // TODO manage the errors correctly
-        let current_span = tracing::Span::current();
-        let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
-            pool.in_place_scope(|_s| {
-                    let span = tracing::trace_span!(target: "indexing::documents", parent: &current_span, "extract");
-                    let _entered = span.enter();
-                    let document_changes = document_changes.into_par_iter();
-
-                    // document but we need to create a function that collects and compresses documents.
-                    let document_sender = extractor_sender.document_sender();
-                    document_changes.clone().into_par_iter().try_for_each(|result| {
-                        match result? {
-                            DocumentChange::Deletion(deletion) => {
-                                let docid = deletion.docid();
-                                document_sender.delete(docid).unwrap();
-                            }
-                            DocumentChange::Update(update) => {
-                                let docid = update.docid();
-                                let content = update.new();
-                                document_sender.insert(docid, content.boxed()).unwrap();
-                            }
-                            DocumentChange::Insertion(insertion) => {
-                                let docid = insertion.docid();
-                                let content = insertion.new();
-                                document_sender.insert(docid, content.boxed()).unwrap();
-                                // extracted_dictionary_sender.send(self, dictionary: &[u8]);
-                            }
-                        }
-                        Ok(()) as Result<_>
-                    })?;
-
-                    document_sender.finish().unwrap();
-
-                    const TEN_GIB: usize = 10 * 1024 * 1024 * 1024;
-                    let max_memory = TEN_GIB / dbg!(rayon::current_num_threads());
-                    let grenad_parameters = GrenadParameters {
-                        max_memory: Some(max_memory),
-                        ..GrenadParameters::default()
-                    };
-
-                    {
-                        let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted");
-                        let _entered = span.enter();
-                        extract_and_send_docids::<
-                            FacetedDocidsExtractor,
-                            FacetDocids,
-                        >(
-                            index,
-                            &global_fields_ids_map,
-                            grenad_parameters,
-                            document_changes.clone(),
-                            &extractor_sender,
-                        )?;
-                    }
-
-                    {
-                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
-                        let _entered = span.enter();
-
-                        let WordDocidsMergers {
-                            word_fid_docids,
-                            word_docids,
-                            exact_word_docids,
-                            word_position_docids,
-                            fid_word_count_docids,
-                        } = WordDocidsExtractors::run_extraction(index, &global_fields_ids_map, grenad_parameters, document_changes.clone())?;
-                        extractor_sender.send_searchable::<WordDocids>(word_docids).unwrap();
-                        extractor_sender.send_searchable::<WordFidDocids>(word_fid_docids).unwrap();
-                        extractor_sender.send_searchable::<ExactWordDocids>(exact_word_docids).unwrap();
-                        extractor_sender.send_searchable::<WordPositionDocids>(word_position_docids).unwrap();
-                        extractor_sender.send_searchable::<FidWordCountDocids>(fid_word_count_docids).unwrap();
-                    }
-
-                    // {
-                    //     let span = tracing::trace_span!(target: "indexing::documents::extract", "exact_word_docids");
-                    //     let _entered = span.enter();
-                    //     extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>(
-                    //         index,
-                    //         &global_fields_ids_map,
-                    //         grenad_parameters,
-                    //         document_changes.clone(),
-                    //         &extractor_sender,
-                    //     )?;
-                    // }
-
-                    // {
-                    //     let span = tracing::trace_span!(target: "indexing::documents::extract", "word_position_docids");
-                    //     let _entered = span.enter();
-                    //     extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>(
-                    //         index,
-                    //         &global_fields_ids_map,
-                    //         grenad_parameters,
-                    //         document_changes.clone(),
-                    //         &extractor_sender,
-                    //     )?;
-                    // }
-
-                    // {
-                    //     let span = tracing::trace_span!(target: "indexing::documents::extract", "fid_word_count_docids");
-                    //     let _entered = span.enter();
-                    //     extract_and_send_docids::<FidWordCountDocidsExtractor, FidWordCountDocids>(
-                    //         index,
-                    //         &global_fields_ids_map,
-                    //         GrenadParameters::default(),
-                    //         document_changes.clone(),
-                    //         &extractor_sender,
-                    //     )?;
-                    // }
-
-                    {
-                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
-                        let _entered = span.enter();
-                        extract_and_send_docids::<
-                            WordPairProximityDocidsExtractor,
-                            WordPairProximityDocids,
-                        >(
-                            index,
-                            &global_fields_ids_map,
-                            grenad_parameters,
-                            document_changes.clone(),
-                            &extractor_sender,
-                        )?;
-                    }
-
-                    {
-                        let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH");
-                        let _entered = span.enter();
-                    }
-
-                    // TODO THIS IS TOO MUCH
-                    // - [ ] Extract fieldid docid facet number
-                    // - [ ] Extract fieldid docid facet string
-                    // - [ ] Extract facetid string fst
-                    // - [ ] Extract facetid normalized string strings
-
-                    // TODO Inverted Indexes again
-                    // - [x] Extract fieldid facet isempty docids
-                    // - [x] Extract fieldid facet isnull docids
-                    // - [x] Extract fieldid facet exists docids
-
-                    // TODO This is the normal system
-                    // - [x] Extract fieldid facet number docids
-                    // - [x] Extract fieldid facet string docids
-
-                    Ok(()) as Result<_>
-                })
-        })?;
-
-        // TODO manage the errors correctly
-        let current_span = tracing::Span::current();
-        let handle2 = Builder::new().name(S("indexer-merger")).spawn_scoped(s, move || {
-            let span =
-                tracing::trace_span!(target: "indexing::documents", parent: &current_span, "merge");
-            let _entered = span.enter();
-
-            let rtxn_pool = ItemsPool::new(|| index.read_txn().map_err(Into::into));
-            merge_grenad_entries(
-                merger_receiver,
-                merger_sender,
-                &rtxn_pool,
-                index,
-                global_fields_ids_map_clone,
-            )
-        })?;
-
-        let mut entries_count = 0;
-        for operation in writer_receiver {
-            let database = operation.database(index);
-            match operation.entry() {
-                EntryOperation::Delete(e) => {
-                    if !database.delete(wtxn, e.entry())? {
-                        unreachable!("We tried to delete an unknown key")
-                    }
-                }
-                EntryOperation::Write(e) => {
-                    entries_count += 1;
-                    database.put(wtxn, e.key(), e.value())?
-                }
-            }
-        }
-
-        eprintln!("We saw {entries_count}");
-
-        /// TODO handle the panicking threads
-        handle.join().unwrap()?;
-        handle2.join().unwrap()?;
-
-        Ok(()) as Result<_>
-    })?;
-
-    let fields_ids_map = fields_ids_map_lock.into_inner().unwrap();
-    index.put_fields_ids_map(wtxn, &fields_ids_map)?;
-
-    Ok(())
-}
-
-/// TODO: GrenadParameters::default() should be removed in favor a passed parameter
-/// TODO: manage the errors correctly
-/// TODO: we must have a single trait that also gives the extractor type
-fn extract_and_send_docids<E: DocidsExtractor, D: MergerOperationType>(
-    index: &Index,
-    fields_ids_map: &GlobalFieldsIdsMap,
-    indexer: GrenadParameters,
-    document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
-    sender: &ExtractorSender,
-) -> Result<()> {
-    let merger = E::run_extraction(index, fields_ids_map, indexer, document_changes)?;
-    Ok(sender.send_searchable::<D>(merger).unwrap())
-}
-
-/// Returns the primary key *field id* that has already been set for this index or the
-/// one we will guess by searching for the first key that contains "id" as a substring.
-/// TODO move this elsewhere
-pub fn retrieve_or_guess_primary_key<'a>(
-    rtxn: &'a RoTxn<'a>,
-    index: &Index,
-    fields_ids_map: &mut FieldsIdsMap,
-    first_document: Option<&'a TopLevelMap<'_>>,
-) -> Result<StdResult<PrimaryKey<'a>, UserError>> {
-    match index.primary_key(rtxn)? {
-        Some(primary_key) => match PrimaryKey::new(primary_key, fields_ids_map) {
-            Some(primary_key) => Ok(Ok(primary_key)),
-            None => unreachable!("Why is the primary key not in the fidmap?"),
-        },
-        None => {
-            let first_document = match first_document {
-                Some(document) => document,
-                None => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
-            };
-
-            let mut guesses: Vec<&str> = first_document
-                .keys()
-                .map(AsRef::as_ref)
-                .filter(|name| name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY))
-                .collect();
-
-            // sort the keys in lexicographical order, so that fields are always in the same order.
-            guesses.sort_unstable();
-
-            match guesses.as_slice() {
-                [] => Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
-                [name] => {
-                    tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
-                    match fields_ids_map.insert(name) {
-                        Some(field_id) => Ok(Ok(PrimaryKey::Flat { name, field_id })),
-                        None => Ok(Err(UserError::AttributeLimitReached)),
-                    }
-                }
-                multiple => Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound {
-                    candidates: multiple.iter().map(|candidate| candidate.to_string()).collect(),
-                })),
-            }
-        }
-    }
-}
--- a/milli/src/update/new/indexer/partial_dump.rs
+++ b/milli/src/update/new/indexer/partial_dump.rs
@ -1,73 +0,0 @@
-use rayon::iter::{IndexedParallelIterator, ParallelBridge, ParallelIterator};
-
-use super::DocumentChanges;
-use crate::documents::{DocumentIdExtractionError, PrimaryKey};
-use crate::update::concurrent_available_ids::ConcurrentAvailableIds;
-use crate::update::new::{DocumentChange, Insertion, KvWriterFieldId};
-use crate::{all_obkv_to_json, Error, FieldsIdsMap, Object, Result, UserError};
-
-pub struct PartialDump<I> {
-    iter: I,
-}
-
-impl<I> PartialDump<I> {
-    pub fn new_from_jsonlines(iter: I) -> Self {
-        PartialDump { iter }
-    }
-}
-
-impl<'p, I> DocumentChanges<'p> for PartialDump<I>
-where
-    I: IndexedParallelIterator<Item = Object> + Clone + 'p,
-{
-    type Parameter = (&'p FieldsIdsMap, &'p ConcurrentAvailableIds, &'p PrimaryKey<'p>);
-
-    /// Note for future self:
-    ///   - the field ids map must already be valid so you must have to generate it beforehand.
-    ///   - We should probably expose another method that generates the fields ids map from an iterator of JSON objects.
-    ///   - We recommend sending chunks of documents in this `PartialDumpIndexer` we therefore need to create a custom take_while_size method (that doesn't drop items).
-    fn document_changes(
-        self,
-        _fields_ids_map: &mut FieldsIdsMap,
-        param: Self::Parameter,
-    ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
-        let (fields_ids_map, concurrent_available_ids, primary_key) = param;
-
-        Ok(self.iter.map(|object| {
-            let docid = match concurrent_available_ids.next() {
-                Some(id) => id,
-                None => return Err(Error::UserError(UserError::DocumentLimitReached)),
-            };
-
-            let mut writer = KvWriterFieldId::memory();
-            object.iter().for_each(|(key, value)| {
-                let key = fields_ids_map.id(key).unwrap();
-                /// TODO better error management
-                let value = serde_json::to_vec(&value).unwrap();
-                /// TODO it is not ordered
-                writer.insert(key, value).unwrap();
-            });
-
-            let document = writer.into_boxed();
-            let external_docid = match primary_key.document_id(&document, fields_ids_map)? {
-                Ok(document_id) => Ok(document_id),
-                Err(DocumentIdExtractionError::InvalidDocumentId(user_error)) => Err(user_error),
-                Err(DocumentIdExtractionError::MissingDocumentId) => {
-                    Err(UserError::MissingDocumentId {
-                        primary_key: primary_key.name().to_string(),
-                        document: all_obkv_to_json(&document, fields_ids_map)?,
-                    })
-                }
-                Err(DocumentIdExtractionError::TooManyDocumentIds(_)) => {
-                    Err(UserError::TooManyDocumentIds {
-                        primary_key: primary_key.name().to_string(),
-                        document: all_obkv_to_json(&document, fields_ids_map)?,
-                    })
-                }
-            }?;
-
-            let insertion = Insertion::create(docid, document);
-            Ok(DocumentChange::Insertion(insertion))
-        }))
-    }
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Louis Dureuil	5d8726d92d	Retry in case where the JSON deserialization fails	2024-11-14 15:28:44 +01:00
Louis Dureuil	bca2974266	Add timeout on read and write operations.	2024-11-13 17:01:23 +01:00
meili-bors[bot]	13025594a8	Merge #5041 5041: Update version for the next release (v1.11.1) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-11-06 11:35:26 +00:00
meili-bors[bot]	2c1c33166d	Merge #5039 5039: Add 3s timeout to embedding requests made during search r=irevoire a=dureuill # Pull Request ## Related issue Fixes #5032 ## What does this PR do? - Add a 3-second timeout to embedding requests against a remote embedder made in the context of search. The timeout triggers when there are failing requests due to rate-limiting. - Add a test of that timeout. Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-11-06 10:56:50 +00:00
dureuill	cdb6e3f45a	Update version for the next release (v1.11.1) in Cargo.toml	2024-11-06 08:35:51 +00:00
Louis Dureuil	1d574bd443	Add test	2024-11-06 09:25:41 +01:00
Louis Dureuil	37a4fd7f99	Add deadline of 3 seconds to embedding requests made in the context of hybrid search	2024-11-06 09:25:24 +01:00
meili-bors[bot]	3753f87fd8	Merge #5011 5011: Revamp analytics r=ManyTheFish a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5009 ## What does this PR do? - Force every analytics to go through a trait that forces you to handle aggregation correcty - Put the code to retrieve the `user-agent`, `timestamp` and `requests.total_received` in common between all aggregates, so there is no mistake - Get rids of all the different channel for each kind of event in favor of an any map - Ensure that we never [send empty event ever again](https://github.com/meilisearch/meilisearch/pull/5001) - Merge all the sub-settings route into a global « Settings Updated » event. - Fix: When using one of the three following feature, we were not sending any analytics IF they were set from the global route - /non-separator-tokens - /separator-tokens - /dictionary Co-authored-by: Tamo <tamo@meilisearch.com>	2024-10-21 15:08:49 +00:00
Tamo	5675585fe8	move all the searches structures to new modules	2024-10-20 17:54:43 +02:00
Tamo	af589c85ec	reverse all the settings to keep the last one received instead of the first one received in case we receive the same setting multiple times	2024-10-20 17:40:31 +02:00
Tamo	ac919df37d	simplify the trait a bit more by getting rids of the downcast_aggregate method	2024-10-20 17:36:29 +02:00
Tamo	73b5722896	rename the other parameter of the aggregate method to new to avoid confusion	2024-10-20 17:31:35 +02:00
Tamo	c94679bde6	apply review comments	2024-10-20 17:24:12 +02:00
Tamo	89e2d2b2b9	fix the doctest	2024-10-17 13:55:49 +02:00
Tamo	3a7a20c716	remove the segment feature and always import segment	2024-10-17 11:21:14 +02:00
Tamo	fa1db6b721	fix the tests	2024-10-17 09:55:30 +02:00
Tamo	1ab6fec903	send all experimental features in the info event including the runtime one	2024-10-17 09:49:21 +02:00
Tamo	18ac4032aa	Remove the experimental feature seen	2024-10-17 09:35:11 +02:00
Tamo	d9115b74f0	move the analytics settings code to a dedicated file	2024-10-17 09:32:54 +02:00
Tamo	0fde49640a	make clippy happy	2024-10-17 09:18:25 +02:00
Tamo	4ee65d870e	remove a lot of ununsed code	2024-10-17 09:14:34 +02:00
Tamo	ef77c7699b	add the required shared values between all the events and fix the timestamp	2024-10-17 09:06:23 +02:00
Tamo	7382fb21e4	fix the main	2024-10-17 08:38:11 +02:00
Tamo	e4ace98004	fix all the routes + move to a better version of mopa	2024-10-17 01:04:25 +02:00
Tamo	aa7a34ffe8	make the aggregate method send	2024-10-17 00:43:34 +02:00
Tamo	6728cfbfac	fix the analytics	2024-10-17 00:38:18 +02:00
Tamo	ea6883189e	finish the analytics in all the routes	2024-10-16 21:17:06 +02:00
Tamo	fdeb47fb54	implements all routes	2024-10-16 17:16:33 +02:00
Tamo	e66fccc3f2	get rids of the analytics closure	2024-10-16 15:51:48 +02:00
Tamo	73e87c152a	rewrite most of the analytics especially the settings	2024-10-16 15:43:27 +02:00
meili-bors[bot]	75b2f22add	Merge #5008 5008: Display vectors when no custom vectors where ever provided r=irevoire a=dureuill # Pull Request ## Related issue Fixes the issue reported on [Discord](https://discord.com/channels/1006923006964154428/1294653031958446080/1295336784896589967). ## What does this PR do? - Normal behavior of Meilisearch is to hide `_vectors` even when `retrieveVectors: true` when there is an explicit list of displayed attributes that does not contain vectors - However, this relied on the field id for the `_vectors` field to exist, which wasn't the case when no `_vectors` was manually provided to documents. This would often be the case for people using autoembedders such as the OpenAI integration. - This PR fixes the behavior by looking for the `_vectors` string in the `displayedAttributes` when there is no `_vectors` fid. - This PR also adds a test for this specific situation, that would fail before the PR, and pass after the PR Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-10-15 13:08:47 +00:00
Louis Dureuil	5a74d4729c	Add test failing before this PR, OK now	2024-10-14 16:23:28 +02:00
Louis Dureuil	e44e7b5e81	Fix retrieveVectors when explicitly passed in displayed attributes without any document containing _vectors	2024-10-14 16:17:19 +02:00
meili-bors[bot]	a0b3887709	Merge #5006 5006: Bring back changes from v1.10.3 r=Kerollmops a=irevoire # Pull Request ## Related issue Port the following PR to the latest version: https://github.com/meilisearch/meilisearch/pull/5000 See its description for more information Co-authored-by: Tamo <tamo@meilisearch.com>	2024-10-14 14:06:35 +00:00
Tamo	4b4a6c7863	Update meilisearch/src/option.rs Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-10-14 14:39:34 +02:00
Tamo	3085092e04	Update meilisearch/src/option.rs Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-10-14 14:39:34 +02:00
Tamo	c4efd1df4e	Update meilisearch/src/option.rs Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-10-14 14:39:34 +02:00
Tamo	c32282acb1	improve doc	2024-10-14 14:39:34 +02:00
Tamo	92070a3578	Implement the experimental drop search after and nb search per core	2024-10-14 14:39:33 +02:00
meili-bors[bot]	a90563df3f	Merge #5001 5001: Do not send empty edit document by function r=Kerollmops a=irevoire # Pull Request We realized that we had a huge usage of the feature from user who didn’t enable the feature at all. That shouldn’t be possible. After a big investigation with `@gmourier` ![image](https://github.com/user-attachments/assets/eae3e851-dc5b-4616-80ee-7237a4871522) We found the issue, it was in the engine ## What does this PR do? - Do not send the edit by function event to segment if no event was received during this batch Co-authored-by: Tamo <tamo@meilisearch.com>	2024-10-11 08:27:16 +00:00
Tamo	466604725e	Do not send empty edit document by function	2024-10-10 23:47:15 +02:00
meili-bors[bot]	995394a516	Merge #4993 4993: Update mini-dashboard r=ManyTheFish a=curquiza Remove the forced capitalized attribute name Co-authored-by: curquiza <clementine@meilisearch.com>	2024-10-10 05:57:45 +00:00
curquiza	6e37ae8619	Update mini-dashboard	2024-10-09 19:13:14 +02:00
meili-bors[bot]	657c645603	Merge #4992 4992: fix the bad experimental search queue size r=dureuill a=irevoire # Pull Request ## Related issue Fixes #4991 ## What does this PR do? - Set the right default value for the experimental search queue size in the config file Co-authored-by: Tamo <tamo@meilisearch.com>	2024-10-09 10:45:48 +00:00
Tamo	7f5d0837c3	fix the bad experimental search queue size	2024-10-09 11:46:57 +02:00
meili-bors[bot]	0566f2549d	Merge #4972 4972: Add binary quantized to error messages r=irevoire a=dureuill was missing in error messages Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-10-02 09:23:55 +00:00
Louis Dureuil	0c2661ea90	Fix tests	2024-10-02 11:20:29 +02:00
Louis Dureuil	62dfbd6255	Add binary quantized to allowed fields for source adds its sources	2024-10-02 11:20:02 +02:00
meili-bors[bot]	cc669f90d5	Merge #4971 4971: update arroy r=dureuill a=irevoire # Pull Request Fix part of https://github.com/meilisearch/meilisearch/issues/3715 ## What does this PR do? - Update arroy to the latest version, most change are maintenance changes - The performances of adding vectors to arroy should slightly improve - Forward the build cancellation function to arroy so it can stop building trees when we have to stop an indexing process Co-authored-by: Tamo <tamo@meilisearch.com>	2024-10-02 05:53:51 +00:00
Tamo	b1dc10e771	uses the new cancellation method in arroy	2024-10-01 17:45:49 +02:00
Tamo	4b598fa648	update arroy	2024-10-01 17:31:12 +02:00
meili-bors[bot]	71b364286b	Merge #4957 4957: Update charabia feature flags r=dureuill a=ManyTheFish # Pull Request Add charabia's `turkish` feature flag into Meilisearch default tokenization flag [All tests pipeline](https://github.com/meilisearch/meilisearch/actions/runs/11030036031) Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-09-26 20:19:21 +00:00
meili-bors[bot]	86183e0807	Merge #4960 4960: Update rhai r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4956 A fix has been implemented in https://github.com/rhaiscript/rhai/issues/916 ## What does this PR do? - Use the latest version of rhai containing the fix Co-authored-by: Tamo <tamo@meilisearch.com>	2024-09-26 15:03:01 +00:00
Tamo	78a4b7949d	update rhai to a version that shouldn’t panic	2024-09-26 15:04:03 +02:00
ManyTheFish	dc2cb58cf1	use charabia default for all-tokenization	2024-09-25 11:12:30 +02:00
ManyTheFish	e9580fe619	Add turkish normalization	2024-09-25 11:03:17 +02:00
meili-bors[bot]	8205254f4c	Merge #4955 4955: Upgrade "batch failed" log to error level r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4916 Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-09-25 08:18:44 +00:00
meili-bors[bot]	efdc5739d7	Merge #4953 4953: Move the multi arroy index logic to the arroy wrapper r=irevoire a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4948 ## What does this PR do? - Make the `ArroyWrapper` we introduced in the last PR handle all the embedded for a specific docid itself. Co-authored-by: Tamo <tamo@meilisearch.com>	2024-09-24 15:02:24 +00:00
Tamo	b31e9bea26	while retrieving the readers on an arroywrapper, stops at the first empty reader	2024-09-24 16:33:17 +02:00
Tamo	7f048b9732	early exit in the clear and contains	2024-09-24 15:02:38 +02:00
Tamo	8b4e2c7b17	Remove now unused method	2024-09-24 15:00:25 +02:00
Tamo	645a55317a	merge the build and quantize method	2024-09-24 14:54:24 +02:00
meili-bors[bot]	8caf97db86	Merge #4954 4954: Fix bench by adding embedder r=ManyTheFish a=dureuill Fix benchmark workloads following breaking change on embedders Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-09-24 12:53:34 +00:00
Tamo	b8a74e0464	fix comments	2024-09-24 10:59:15 +02:00
Tamo	fd8447c521	fix the del items thing	2024-09-24 10:52:05 +02:00
Tamo	f2d187ba3e	rename the index method to embedder_index	2024-09-24 10:39:40 +02:00
Tamo	79d8a7a51a	rename the embedder index for clarity	2024-09-24 10:36:28 +02:00
Louis Dureuil	86da0e83fe	Upgrade "batch failed" log to ERROR level	2024-09-24 10:02:53 +02:00
Louis Dureuil	0704fb71e9	Fix bench by adding embedder	2024-09-24 09:56:47 +02:00
Tamo	1e4d4e69c4	finish the arroywrapper	2024-09-23 18:56:15 +02:00
Tamo	6ba4baecbf	first ugly step	2024-09-23 15:15:26 +02:00
Tamo	afa3ae0cbd	WIP	2024-09-19 17:42:52 +02:00