Add spans

WIP add more logs
Make the merger multithreaded
2025-12-04 19:55:43 +00:00 · 2024-09-26 17:20:32 +02:00 · 2024-09-26 16:37:38 +02:00 · 2024-09-26 11:09:06 +02:00 · 2024-09-25 22:42:41 +02:00 · 2024-09-25 22:15:15 +02:00
130 changed files with 12182 additions and 4975 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 [[package]]
 name = "arroy"
 version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
+source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
 dependencies = [
 "bytemuck",
 "byteorder",
 "heed",
 "log",
 "memmap2",
+ "nohash",
 "ordered-float",
 "rand",
 "rayon",
@@ -471,7 +471,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"

 [[package]]
 name = "benchmarks"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "anyhow",
 "bytes",
@@ -652,7 +652,7 @@ dependencies = [

 [[package]]
 name = "build-info"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "anyhow",
 "time",
@@ -933,9 +933,8 @@ dependencies = [

 [[package]]
 name = "charabia"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d"
+version = "0.9.1"
+source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
 dependencies = [
 "aho-corasick",
 "csv",
@@ -1622,7 +1621,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "anyhow",
 "big_s",
@@ -1834,7 +1833,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"

 [[package]]
 name = "file-store"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "tempfile",
 "thiserror",
@@ -1856,7 +1855,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "insta",
 "nom",
@@ -1876,7 +1875,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "criterion",
 "serde_json",
@@ -2000,7 +1999,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "arbitrary",
 "clap",
@@ -2221,11 +2220,11 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 [[package]]
 name = "grenad"
 version = "0.4.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "350d89047298d3b1b40050acd11ab76e487b854a104b760ebc5a7f375093de77"
+source = "git+https://github.com/meilisearch/grenad?branch=various-improvements#58ac87d852413571102f44c5e55ca13509a3f1a0"
 dependencies = [
 "bytemuck",
 "byteorder",
+ "either",
 "rayon",
 "tempfile",
 ]
@@ -2308,9 +2307,9 @@ dependencies = [

 [[package]]
 name = "hashbrown"
-version = "0.14.3"
+version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 dependencies = [
 "ahash 0.8.11",
 "allocator-api2",
@@ -2552,7 +2551,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"

 [[package]]
 name = "index-scheduler"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "anyhow",
 "arroy",
@@ -2570,6 +2569,7 @@ dependencies = [
 "meili-snap",
 "meilisearch-auth",
 "meilisearch-types",
+ "memmap2",
 "page_size",
 "rayon",
 "roaring",
@@ -2591,7 +2591,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
 dependencies = [
 "equivalent",
- "hashbrown 0.14.3",
+ "hashbrown 0.14.5",
 "serde",
 ]

@@ -2650,8 +2650,7 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
 [[package]]
 name = "irg-kvariants"
 version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26"
+source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
 dependencies = [
 "csv",
 "once_cell",
@@ -2746,7 +2745,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "criterion",
 "serde_json",
@@ -3365,7 +3364,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "insta",
 "md5",
@@ -3374,7 +3373,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "actix-cors",
 "actix-http",
@@ -3463,7 +3462,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "base64 0.22.1",
 "enum-iterator",
@@ -3482,7 +3481,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "actix-web",
 "anyhow",
@@ -3512,7 +3511,7 @@ dependencies = [

 [[package]]
 name = "meilitool"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "anyhow",
 "clap",
@@ -3543,7 +3542,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "arroy",
 "big_s",
@@ -3567,6 +3566,7 @@ dependencies = [
 "fxhash",
 "geoutils",
 "grenad",
+ "hashbrown 0.14.5",
 "heed",
 "hf-hub",
 "indexmap",
@@ -3686,6 +3686,12 @@ version = "0.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"

+[[package]]
+name = "nohash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca"
+
 [[package]]
 name = "nom"
 version = "7.1.3"
@@ -3830,9 +3836,8 @@ dependencies = [

 [[package]]
 name = "obkv"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2e27bcfe835a379d32352112f6b8dbae2d99d16a5fff42abe6e5ba5386c1e5a"
+version = "0.3.0"
+source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#ce535874008ecac554f02e0c670e6caf62134d6b"

 [[package]]
 name = "once_cell"
@@ -3977,7 +3982,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "big_s",
 "serde_json",
@@ -4649,8 +4654,7 @@ dependencies = [
 [[package]]
 name = "roaring"
 version = "0.10.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1"
+source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#6bba84b1a47da1d6e52d5c4dc0ce8593ae4646a5"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -4835,9 +4839,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"

 [[package]]
 name = "serde"
-version = "1.0.209"
+version = "1.0.210"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
+checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
 dependencies = [
 "serde_derive",
 ]
@@ -4853,9 +4857,9 @@ dependencies = [

 [[package]]
 name = "serde_derive"
-version = "1.0.209"
+version = "1.0.210"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
+checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -6036,7 +6040,7 @@ version = "0.16.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
 dependencies = [
- "hashbrown 0.14.3",
+ "hashbrown 0.14.5",
 "once_cell",
 ]

@@ -6362,7 +6366,7 @@ dependencies = [

 [[package]]
 name = "xtask"
-version = "1.10.1"
+version = "1.11.0"
 dependencies = [
 "anyhow",
 "build-info",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,7 @@ members = [
 ]

 [workspace.package]
-version = "1.10.1"
+version = "1.11.0"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
@@ -44,23 +44,5 @@ opt-level = 3
 [profile.dev.package.roaring]
 opt-level = 3

-[profile.dev.package.lindera-ipadic-builder]
-opt-level = 3
-[profile.dev.package.encoding]
-opt-level = 3
-[profile.dev.package.yada]
-opt-level = 3
-
-[profile.release.package.lindera-ipadic-builder]
-opt-level = 3
-[profile.release.package.encoding]
-opt-level = 3
-[profile.release.package.yada]
-opt-level = 3
-
-[profile.bench.package.lindera-ipadic-builder]
-opt-level = 3
-[profile.bench.package.encoding]
-opt-level = 3
-[profile.bench.package.yada]
-opt-level = 3
+[patch.crates-io]
+roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
--- a/README.md
+++ b/README.md
@@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co
 ## ✨ Features
 - **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
 - **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
+- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
 - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
 - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
+- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
 - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
 - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
 - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
+- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
 - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
 - **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
 - **Easy to install, deploy, and maintain**
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@@ -255,6 +255,8 @@ pub(crate) mod test {
        }
        "###);

+        insta::assert_json_snapshot!(vector_index.settings().unwrap());
+
        {
            let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
            let mut documents = documents.unwrap();
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-5.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-5.snap
@@ -1,783 +1,56 @@
 ---
 source: dump/src/reader/mod.rs
-expression: document
+expression: vector_index.settings().unwrap()
 ---
 {
-  "id": "e3",
-  "desc": "overriden vector + map",
-  "_vectors": {
-    "default": [
-      0.2,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1,
-      0.1
-    ],
-    "toto": [
-      0.1
-    ]
-  }
+  "displayedAttributes": [
+    "*"
+  ],
+  "searchableAttributes": [
+    "*"
+  ],
+  "filterableAttributes": [],
+  "sortableAttributes": [],
+  "rankingRules": [
+    "words",
+    "typo",
+    "proximity",
+    "attribute",
+    "sort",
+    "exactness"
+  ],
+  "stopWords": [],
+  "nonSeparatorTokens": [],
+  "separatorTokens": [],
+  "dictionary": [],
+  "synonyms": {},
+  "distinctAttribute": null,
+  "proximityPrecision": "byWord",
+  "typoTolerance": {
+    "enabled": true,
+    "minWordSizeForTypos": {
+      "oneTypo": 5,
+      "twoTypos": 9
+    },
+    "disableOnWords": [],
+    "disableOnAttributes": []
+  },
+  "faceting": {
+    "maxValuesPerFacet": 100,
+    "sortFacetValuesBy": {
+      "*": "alpha"
+    }
+  },
+  "pagination": {
+    "maxTotalHits": 1000
+  },
+  "embedders": {
+    "default": {
+      "source": "huggingFace",
+      "model": "BAAI/bge-base-en-v1.5",
+      "revision": "617ca489d9e86b49b8167676d8220688b99db36e",
+      "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
+    }
+  },
+  "searchCutoffMs": null
 }
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-6.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-6.snap
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-7.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-7.snap
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-8.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-8.snap
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-9.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v6_with_vectors-9.snap
@@ -0,0 +1,780 @@
+---
+source: dump/src/reader/mod.rs
+expression: document
+---
+{
+  "id": "e0",
+  "desc": "overriden vector",
+  "_vectors": {
+    "default": [
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1
+    ]
+  }
+}
--- a/filter-parser/src/condition.rs
+++ b/filter-parser/src/condition.rs
@@ -27,6 +27,7 @@ pub enum Condition<'a> {
    LowerThanOrEqual(Token<'a>),
    Between { from: Token<'a>, to: Token<'a> },
    Contains { keyword: Token<'a>, word: Token<'a> },
+    StartsWith { keyword: Token<'a>, word: Token<'a> },
 }

 /// condition      = value ("==" | ">" ...) value
@@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult<FilterCondition> {
    ))
 }

+/// starts with        = value "CONTAINS" value
+pub fn parse_starts_with(input: Span) -> IResult<FilterCondition> {
+    let (input, (fid, starts_with, value)) =
+        tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?;
+    Ok((
+        input,
+        FilterCondition::Condition {
+            fid,
+            op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
+        },
+    ))
+}
+
+/// starts with        = value "NOT" WS+ "CONTAINS" value
+pub fn parse_not_starts_with(input: Span) -> IResult<FilterCondition> {
+    let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH")));
+    let (input, (fid, (_not, _spaces, starts_with), value)) =
+        tuple((parse_value, keyword, cut(parse_value)))(input)?;
+
+    Ok((
+        input,
+        FilterCondition::Not(Box::new(FilterCondition::Condition {
+            fid,
+            op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
+        })),
+    ))
+}
+
 /// to             = value value "TO" WS+ value
 pub fn parse_to(input: Span) -> IResult<FilterCondition> {
    let (input, (key, from, _, _, to)) =
--- a/filter-parser/src/error.rs
+++ b/filter-parser/src/error.rs
@@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> {
            }
            ErrorKind::InvalidPrimary => {
                let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
-                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
+                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
            }
            ErrorKind::InvalidEscapedNumber => {
                writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -49,7 +49,7 @@ use std::fmt::Debug;
 pub use condition::{parse_condition, parse_to, Condition};
 use condition::{
    parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null,
-    parse_is_null, parse_not_contains, parse_not_exists,
+    parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with,
 };
 use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
 pub use error::{Error, ErrorKind};
@@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> {
                | Condition::LowerThan(_)
                | Condition::LowerThanOrEqual(_)
                | Condition::Between { .. } => None,
-                Condition::Contains { keyword, word: _ } => Some(keyword),
+                Condition::Contains { keyword, word: _ }
+                | Condition::StartsWith { keyword, word: _ } => Some(keyword),
            },
            FilterCondition::Not(this) => this.use_contains_operator(),
            FilterCondition::Or(seq) | FilterCondition::And(seq) => {
@@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
        parse_to,
        parse_contains,
        parse_not_contains,
+        parse_starts_with,
+        parse_not_starts_with,
        // the next lines are only for error handling and are written at the end to have the less possible performance impact
        parse_geo,
        parse_geo_distance,
@@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
            Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
            Condition::Between { from, to } => write!(f, "{from} TO {to}"),
            Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
+            Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"),
        }
    }
 }
@@ -680,6 +684,13 @@ pub mod tests {
        insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
        insta::assert_snapshot!(p("subscribers NOT   CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");

+        // Test STARTS WITH + NOT STARTS WITH
+        insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
+        insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
+        insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})");
+        insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
+        insta::assert_snapshot!(p("subscribers NOT   STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
+
        // Test nested NOT
        insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
        insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
@@ -751,7 +762,7 @@ pub mod tests {
        "###);

        insta::assert_snapshot!(p("'OR'"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
        1:5 'OR'
        "###);

@@ -761,12 +772,12 @@ pub mod tests {
        "###);

        insta::assert_snapshot!(p("channel Ponce"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
        1:14 channel Ponce
        "###);

        insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
        19:19 channel = Ponce OR
        "###);

@@ -851,12 +862,12 @@ pub mod tests {
        "###);

        insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
        1:17 colour NOT EXIST
        "###);

        insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
        1:23 subscribers 100 TO1000
        "###);

@@ -919,35 +930,35 @@ pub mod tests {
        "###);

        insta::assert_snapshot!(p(r#"value NULL"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
        1:11 value NULL
        "###);
        insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
        1:15 value NOT NULL
        "###);
        insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
        1:12 value EMPTY
        "###);
        insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
        1:16 value NOT EMPTY
        "###);
        insta::assert_snapshot!(p(r#"value IS"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
        1:9 value IS
        "###);
        insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
        1:13 value IS NOT
        "###);
        insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
        1:16 value IS EXISTS
        "###);
        insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
        1:20 value IS NOT EXISTS
        "###);
    }
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool {
            | "NULL"
            | "EMPTY"
            | "CONTAINS"
+            | "STARTS"
+            | "WITH"
            | "_geoRadius"
            | "_geoBoundingBox"
    )
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -29,6 +29,7 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
 synchronoise = "1.0.1"
 tempfile = "3.10.1"
 thiserror = "1.0.61"
+memmap2 = "0.9.4"
 time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
@@ -40,7 +41,7 @@ ureq = "2.10.0"
 uuid = { version = "1.10.0", features = ["serde", "v4"] }

 [dev-dependencies]
-arroy = "0.4.0"
+arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
 big_s = "1.0.2"
 crossbeam = "0.8.4"
 insta = { version = "1.39.0", features = ["json", "redactions"] }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -28,6 +28,9 @@ use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
+use meilisearch_types::milli::update::new::indexer::{
+    self, retrieve_or_guess_primary_key, DocumentChanges,
+};
 use meilisearch_types::milli::update::{
    IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
@@ -875,10 +878,8 @@ impl IndexScheduler {
                            while let Some(doc) =
                                cursor.next_document().map_err(milli::Error::from)?
                            {
-                                dump_content_file.push_document(&obkv_to_object(
-                                    &doc,
-                                    &documents_batch_index,
-                                )?)?;
+                                dump_content_file
+                                    .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
                            }
                            dump_content_file.flush()?;
                        }
@@ -1252,58 +1253,52 @@ impl IndexScheduler {
                let must_stop_processing = self.must_stop_processing.clone();
                let indexer_config = self.index_mapper.indexer_config();

-                if let Some(primary_key) = primary_key {
-                    match index.primary_key(index_wtxn)? {
-                        // if a primary key was set AND had already been defined in the index
-                        // but to a different value, we can make the whole batch fail.
-                        Some(pk) => {
-                            if primary_key != pk {
-                                return Err(milli::Error::from(
-                                    milli::UserError::PrimaryKeyCannotBeChanged(pk.to_string()),
-                                )
-                                .into());
-                            }
-                        }
-                        // if the primary key was set and there was no primary key set for this index
-                        // we set it to the received value before starting the indexing process.
-                        None => {
-                            let mut builder =
-                                milli::update::Settings::new(index_wtxn, index, indexer_config);
-                            builder.set_primary_key(primary_key);
-                            builder.execute(
-                                |indexing_step| tracing::debug!(update = ?indexing_step),
-                                || must_stop_processing.clone().get(),
-                            )?;
-                            primary_key_has_been_set = true;
+                /// TODO manage errors correctly
+                let rtxn = index.read_txn()?;
+                let first_addition_uuid = operations
+                    .iter()
+                    .find_map(|op| match op {
+                        DocumentOperation::Add(content_uuid) => Some(content_uuid),
+                        _ => None,
+                    })
+                    .unwrap();
+
+                let mut content_files = Vec::new();
+                for operation in &operations {
+                    if let DocumentOperation::Add(content_uuid) = operation {
+                        let content_file = self.file_store.get_update(*content_uuid)?;
+                        let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
+                        if !mmap.is_empty() {
+                            content_files.push(mmap);
                        }
                    }
                }

-                let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
+                let mut fields_ids_map = index.fields_ids_map(&rtxn)?;
+                let first_document = match content_files.first() {
+                    Some(mmap) => {
+                        let mut iter = serde_json::Deserializer::from_slice(mmap).into_iter();
+                        iter.next().transpose().map_err(|e| e.into()).map_err(Error::IoError)?
+                    }
+                    None => None,
+                };

-                let embedder_configs = index.embedding_configs(index_wtxn)?;
-                // TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
-                let embedders = self.embedders(embedder_configs)?;
-
-                let mut builder = milli::update::IndexDocuments::new(
-                    index_wtxn,
+                let primary_key = retrieve_or_guess_primary_key(
+                    &rtxn,
                    index,
-                    indexer_config,
-                    config,
-                    |indexing_step| tracing::trace!(?indexing_step, "Update"),
-                    || must_stop_processing.get(),
-                )?;
+                    &mut fields_ids_map,
+                    first_document.as_ref(),
+                )?
+                .unwrap();

+                let mut content_files_iter = content_files.iter();
+                let mut indexer = indexer::DocumentOperation::new(method);
                for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
                    match operation {
-                        DocumentOperation::Add(content_uuid) => {
-                            let content_file = self.file_store.get_update(content_uuid)?;
-                            let reader = DocumentsBatchReader::from_reader(content_file)
-                                .map_err(milli::Error::from)?;
-                            let (new_builder, user_result) = builder.add_documents(reader)?;
-                            builder = new_builder;
-
-                            builder = builder.with_embedders(embedders.clone());
+                        DocumentOperation::Add(_content_uuid) => {
+                            let mmap = content_files_iter.next().unwrap();
+                            let stats = indexer.add_documents(mmap)?;
+                            // builder = builder.with_embedders(embedders.clone());

                            let received_documents =
                                if let Some(Details::DocumentAdditionOrUpdate {
@@ -1317,30 +1312,17 @@ impl IndexScheduler {
                                    unreachable!();
                                };

-                            match user_result {
-                                Ok(count) => {
-                                    task.status = Status::Succeeded;
-                                    task.details = Some(Details::DocumentAdditionOrUpdate {
-                                        received_documents,
-                                        indexed_documents: Some(count),
-                                    })
-                                }
-                                Err(e) => {
-                                    task.status = Status::Failed;
-                                    task.details = Some(Details::DocumentAdditionOrUpdate {
-                                        received_documents,
-                                        indexed_documents: Some(0),
-                                    });
-                                    task.error = Some(milli::Error::from(e).into());
-                                }
-                            }
+                            task.status = Status::Succeeded;
+                            task.details = Some(Details::DocumentAdditionOrUpdate {
+                                received_documents,
+                                indexed_documents: Some(stats.document_count as u64),
+                            })
                        }
                        DocumentOperation::Delete(document_ids) => {
-                            let (new_builder, user_result) =
-                                builder.remove_documents(document_ids)?;
-                            builder = new_builder;
+                            let count = document_ids.len();
+                            indexer.delete_documents(document_ids);
                            // Uses Invariant: remove documents actually always returns Ok for the inner result
-                            let count = user_result.unwrap();
+                            // let count = user_result.unwrap();
                            let provided_ids =
                                if let Some(Details::DocumentDeletion { provided_ids, .. }) =
                                    task.details
@@ -1354,26 +1336,35 @@ impl IndexScheduler {
                            task.status = Status::Succeeded;
                            task.details = Some(Details::DocumentDeletion {
                                provided_ids,
-                                deleted_documents: Some(count),
+                                deleted_documents: Some(count as u64),
                            });
                        }
                    }
                }

                if !tasks.iter().all(|res| res.error.is_some()) {
-                    let addition = builder.execute()?;
-                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
-                } else if primary_key_has_been_set {
-                    // Everything failed but we've set a primary key.
-                    // We need to remove it.
-                    let mut builder =
-                        milli::update::Settings::new(index_wtxn, index, indexer_config);
-                    builder.reset_primary_key();
-                    builder.execute(
-                        |indexing_step| tracing::trace!(update = ?indexing_step),
-                        || must_stop_processing.clone().get(),
-                    )?;
+                    /// TODO create a pool if needed
+                    // let pool = indexer_config.thread_pool.unwrap();
+                    let pool = rayon::ThreadPoolBuilder::new().build().unwrap();
+
+                    let param = (index, &rtxn, &primary_key);
+                    let document_changes = indexer.document_changes(&mut fields_ids_map, param)?;
+                    /// TODO pass/write the FieldsIdsMap
+                    indexer::index(index_wtxn, index, fields_ids_map, &pool, document_changes)?;
+
+                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                }
+                // else if primary_key_has_been_set {
+                //     // Everything failed but we've set a primary key.
+                //     // We need to remove it.
+                //     let mut builder =
+                //         milli::update::Settings::new(index_wtxn, index, indexer_config);
+                //     builder.reset_primary_key();
+                //     builder.execute(
+                //         |indexing_step| tracing::trace!(update = ?indexing_step),
+                //         || must_stop_processing.clone().get(),
+                //     )?;
+                // }

                Ok(tasks)
            }
--- a/index-scheduler/src/features.rs
+++ b/index-scheduler/src/features.rs
@@ -87,7 +87,7 @@ impl RoFeatures {
            Ok(())
        } else {
            Err(FeatureNotEnabledError {
-                disabled_action: "Using `CONTAINS` in a filter",
+                disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter",
                feature: "contains filter",
                issue_link: "https://github.com/orgs/meilisearch/discussions/763",
            }
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1477,7 +1477,7 @@ impl IndexScheduler {
            .map(
                |IndexEmbeddingConfig {
                     name,
-                     config: milli::vector::EmbeddingConfig { embedder_options, prompt },
+                     config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
                     ..
                 }| {
                    let prompt =
@@ -1486,7 +1486,10 @@ impl IndexScheduler {
                    {
                        let embedders = self.embedders.read().unwrap();
                        if let Some(embedder) = embedders.get(&embedder_options) {
-                            return Ok((name, (embedder.clone(), prompt)));
+                            return Ok((
+                                name,
+                                (embedder.clone(), prompt, quantized.unwrap_or_default()),
+                            ));
                        }
                    }

@@ -1500,7 +1503,7 @@ impl IndexScheduler {
                        let mut embedders = self.embedders.write().unwrap();
                        embedders.insert(embedder_options, embedder.clone());
                    }
-                    Ok((name, (embedder, prompt)))
+                    Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
                },
            )
            .collect();
@@ -5197,7 +5200,7 @@ mod tests {
            let simple_hf_name = name.clone();

            let configs = index_scheduler.embedders(configs).unwrap();
-            let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
+            let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
            let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
            let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
            let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
@@ -5519,6 +5522,7 @@ mod tests {
                            400,
                        ),
                    },
+                    quantized: None,
                },
                user_provided: RoaringBitmap<[1, 2]>,
            },
@@ -5531,28 +5535,8 @@ mod tests {

        // the document with the id 3 should keep its original embedding
        let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
-        let mut embeddings = Vec::new();
-
-        'vectors: for i in 0..=u8::MAX {
-            let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
-                .map(Some)
-                .or_else(|e| match e {
-                    arroy::Error::MissingMetadata(_) => Ok(None),
-                    e => Err(e),
-                })
-                .transpose();
-
-            let Some(reader) = reader else {
-                break 'vectors;
-            };
-
-            let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
-            if let Some(embedding) = embedding {
-                embeddings.push(embedding)
-            } else {
-                break 'vectors;
-            }
-        }
+        let embeddings = index.embeddings(&rtxn, docid).unwrap();
+        let embeddings = &embeddings["my_doggo_embedder"];

        snapshot!(embeddings.len(), @"1");
        assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@@ -5737,6 +5721,7 @@ mod tests {
                            400,
                        ),
                    },
+                    quantized: None,
                },
                user_provided: RoaringBitmap<[0]>,
            },
@@ -5780,6 +5765,7 @@ mod tests {
                            400,
                        ),
                    },
+                    quantized: None,
                },
                user_provided: RoaringBitmap<[]>,
            },
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -66,3 +66,5 @@ khmer = ["milli/khmer"]
 vietnamese = ["milli/vietnamese"]
 # force swedish character recomposition
 swedish-recomposition = ["milli/swedish-recomposition"]
+# force german character recomposition
+german = ["milli/german"]
--- a/meilisearch-types/src/document_formats.rs
+++ b/meilisearch-types/src/document_formats.rs
@@ -1,20 +1,22 @@
 use std::fmt::{self, Debug, Display};
 use std::fs::File;
-use std::io::{self, BufWriter, Write};
+use std::io::{self, BufWriter};
 use std::marker::PhantomData;

-use memmap2::MmapOptions;
-use milli::documents::{DocumentsBatchBuilder, Error};
+use memmap2::Mmap;
+use milli::documents::Error;
+use milli::update::new::TopLevelMap;
 use milli::Object;
 use serde::de::{SeqAccess, Visitor};
 use serde::{Deserialize, Deserializer};
 use serde_json::error::Category;
+use serde_json::{to_writer, Map, Value};

 use crate::error::{Code, ErrorCode};

 type Result<T> = std::result::Result<T, DocumentFormatError>;

-#[derive(Debug)]
+#[derive(Debug, Clone, Copy)]
 pub enum PayloadType {
    Ndjson,
    Json,
@@ -88,6 +90,26 @@ impl From<(PayloadType, Error)> for DocumentFormatError {
    }
 }

+impl From<(PayloadType, serde_json::Error)> for DocumentFormatError {
+    fn from((ty, error): (PayloadType, serde_json::Error)) -> Self {
+        if error.classify() == Category::Data {
+            Self::Io(error.into())
+        } else {
+            Self::MalformedPayload(Error::Json(error), ty)
+        }
+    }
+}
+
+impl From<(PayloadType, csv::Error)> for DocumentFormatError {
+    fn from((ty, error): (PayloadType, csv::Error)) -> Self {
+        if error.is_io_error() {
+            Self::Io(error.into())
+        } else {
+            Self::MalformedPayload(Error::Csv(error), ty)
+        }
+    }
+}
+
 impl From<io::Error> for DocumentFormatError {
    fn from(error: io::Error) -> Self {
        Self::Io(error)
@@ -103,67 +125,140 @@ impl ErrorCode for DocumentFormatError {
    }
 }

-/// Reads CSV from input and write an obkv batch to writer.
-pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
-    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
-    let mmap = unsafe { MmapOptions::new().map(file)? };
-    let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
-    builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
-
-    let count = builder.documents_count();
-    let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
-
-    Ok(count as u64)
+// TODO remove that from the place I've borrowed it
+#[derive(Debug)]
+enum AllowedType {
+    String,
+    Boolean,
+    Number,
 }

-/// Reads JSON from temporary file and write an obkv batch to writer.
-pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
-    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
-    let mmap = unsafe { MmapOptions::new().map(file)? };
-    let mut deserializer = serde_json::Deserializer::from_slice(&mmap);
+fn parse_csv_header(header: &str) -> (&str, AllowedType) {
+    // if there are several separators we only split on the last one.
+    match header.rsplit_once(':') {
+        Some((field_name, field_type)) => match field_type {
+            "string" => (field_name, AllowedType::String),
+            "boolean" => (field_name, AllowedType::Boolean),
+            "number" => (field_name, AllowedType::Number),
+            // if the pattern isn't recognized, we keep the whole field.
+            _otherwise => (header, AllowedType::String),
+        },
+        None => (header, AllowedType::String),
+    }
+}

-    match array_each(&mut deserializer, |obj| builder.append_json_object(&obj)) {
+/// Reads CSV from file and write it in NDJSON in a file checking it along the way.
+pub fn read_csv(input: &File, output: impl io::Write, delimiter: u8) -> Result<u64> {
+    let ptype = PayloadType::Csv { delimiter };
+    let mut output = BufWriter::new(output);
+    let mut reader = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(input);
+
+    let headers = reader.headers().map_err(|e| DocumentFormatError::from((ptype, e)))?.clone();
+    let typed_fields: Vec<_> = headers.iter().map(parse_csv_header).collect();
+    let mut object: Map<_, _> =
+        typed_fields.iter().map(|(k, _)| (k.to_string(), Value::Null)).collect();
+
+    let mut line = 0;
+    let mut record = csv::StringRecord::new();
+    while reader.read_record(&mut record).map_err(|e| DocumentFormatError::from((ptype, e)))? {
+        // We increment here and not at the end of the loop
+        // to take the header offset into account.
+        line += 1;
+
+        // Reset the document values
+        object.iter_mut().for_each(|(_, v)| *v = Value::Null);
+
+        for (i, (name, atype)) in typed_fields.iter().enumerate() {
+            let value = &record[i];
+            let trimmed_value = value.trim();
+            let value = match atype {
+                AllowedType::Number if trimmed_value.is_empty() => Value::Null,
+                AllowedType::Number => match trimmed_value.parse::<i64>() {
+                    Ok(integer) => Value::from(integer),
+                    Err(_) => match trimmed_value.parse::<f64>() {
+                        Ok(float) => Value::from(float),
+                        Err(error) => {
+                            return Err(DocumentFormatError::MalformedPayload(
+                                Error::ParseFloat { error, line, value: value.to_string() },
+                                ptype,
+                            ))
+                        }
+                    },
+                },
+                AllowedType::Boolean if trimmed_value.is_empty() => Value::Null,
+                AllowedType::Boolean => match trimmed_value.parse::<bool>() {
+                    Ok(bool) => Value::from(bool),
+                    Err(error) => {
+                        return Err(DocumentFormatError::MalformedPayload(
+                            Error::ParseBool { error, line, value: value.to_string() },
+                            ptype,
+                        ))
+                    }
+                },
+                AllowedType::String if value.is_empty() => Value::Null,
+                AllowedType::String => Value::from(value),
+            };
+
+            *object.get_mut(*name).expect("encountered an unknown field") = value;
+        }
+
+        to_writer(&mut output, &object).map_err(|e| DocumentFormatError::from((ptype, e)))?;
+    }
+
+    Ok(line as u64)
+}
+
+/// Reads JSON from file and write it in NDJSON in a file checking it along the way.
+pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
+    // We memory map to be able to deserailize into a TopLevelMap<'pl> that
+    // does not allocate when possible and only materialize the first/top level.
+    let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
+
+    let mut out = BufWriter::new(output);
+    let mut deserializer = serde_json::Deserializer::from_slice(&input);
+    let count = match array_each(&mut deserializer, |obj: TopLevelMap| to_writer(&mut out, &obj)) {
        // The json data has been deserialized and does not need to be processed again.
        // The data has been transferred to the writer during the deserialization process.
-        Ok(Ok(_)) => (),
-        Ok(Err(e)) => return Err(DocumentFormatError::Io(e)),
+        Ok(Ok(count)) => count,
+        Ok(Err(e)) => return Err(DocumentFormatError::from((PayloadType::Json, e))),
        Err(e) => {
            // Attempt to deserialize a single json string when the cause of the exception is not Category.data
            // Other types of deserialisation exceptions are returned directly to the front-end
-            if e.classify() != serde_json::error::Category::Data {
-                return Err(DocumentFormatError::MalformedPayload(
-                    Error::Json(e),
-                    PayloadType::Json,
-                ));
+            if e.classify() != Category::Data {
+                return Err(DocumentFormatError::from((PayloadType::Json, e)));
            }

-            let content: Object = serde_json::from_slice(&mmap)
+            let content: Object = serde_json::from_slice(&input)
                .map_err(Error::Json)
                .map_err(|e| (PayloadType::Json, e))?;
-            builder.append_json_object(&content).map_err(DocumentFormatError::Io)?;
+            to_writer(&mut out, &content)
+                .map(|_| 1)
+                .map_err(|e| DocumentFormatError::from((PayloadType::Json, e)))?
        }
+    };
+
+    match out.into_inner() {
+        Ok(_) => Ok(count),
+        Err(ie) => Err(DocumentFormatError::Io(ie.into_error())),
    }
-
-    let count = builder.documents_count();
-    let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
-
-    Ok(count as u64)
 }

-/// Reads JSON from temporary file  and write an obkv batch to writer.
-pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
-    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
-    let mmap = unsafe { MmapOptions::new().map(file)? };
+/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
+pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
+    // We memory map to be able to deserailize into a TopLevelMap<'pl> that
+    // does not allocate when possible and only materialize the first/top level.
+    let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
+    let mut output = BufWriter::new(output);

-    for result in serde_json::Deserializer::from_slice(&mmap).into_iter() {
-        let object = result.map_err(Error::Json).map_err(|e| (PayloadType::Ndjson, e))?;
-        builder.append_json_object(&object).map_err(Into::into).map_err(DocumentFormatError::Io)?;
+    let mut count = 0;
+    for result in serde_json::Deserializer::from_slice(&input).into_iter() {
+        count += 1;
+        result
+            .and_then(|map: TopLevelMap| to_writer(&mut output, &map))
+            .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
    }

-    let count = builder.documents_count();
-    let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Io)?;
-
-    Ok(count as u64)
+    Ok(count)
 }

 /// The actual handling of the deserialization process in serde
@@ -172,20 +267,23 @@ pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
 /// ## References
 /// <https://serde.rs/stream-array.html>
 /// <https://github.com/serde-rs/json/issues/160>
-fn array_each<'de, D, T, F>(deserializer: D, f: F) -> std::result::Result<io::Result<u64>, D::Error>
+fn array_each<'de, D, T, F>(
+    deserializer: D,
+    f: F,
+) -> std::result::Result<serde_json::Result<u64>, D::Error>
 where
    D: Deserializer<'de>,
    T: Deserialize<'de>,
-    F: FnMut(T) -> io::Result<()>,
+    F: FnMut(T) -> serde_json::Result<()>,
 {
    struct SeqVisitor<T, F>(F, PhantomData<T>);

    impl<'de, T, F> Visitor<'de> for SeqVisitor<T, F>
    where
        T: Deserialize<'de>,
-        F: FnMut(T) -> io::Result<()>,
+        F: FnMut(T) -> serde_json::Result<()>,
    {
-        type Value = io::Result<u64>;
+        type Value = serde_json::Result<u64>;

        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("a nonempty sequence")
@@ -194,7 +292,7 @@ where
        fn visit_seq<A>(
            mut self,
            mut seq: A,
-        ) -> std::result::Result<io::Result<u64>, <A as SeqAccess<'de>>::Error>
+        ) -> std::result::Result<serde_json::Result<u64>, <A as SeqAccess<'de>>::Error>
        where
            A: SeqAccess<'de>,
        {
@@ -203,7 +301,7 @@ where
                match self.0(value) {
                    Ok(()) => max += 1,
                    Err(e) => return Ok(Err(e)),
-                };
+                }
            }
            Ok(Ok(max))
        }
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -238,8 +238,14 @@ InvalidIndexLimit                     , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexOffset                    , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexPrimaryKey                , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexUid                       , InvalidRequest       , BAD_REQUEST ;
+InvalidMultiSearchFacets              , InvalidRequest       , BAD_REQUEST ;
+InvalidMultiSearchFacetsByIndex       , InvalidRequest       , BAD_REQUEST ;
+InvalidMultiSearchFacetOrder          , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchFederated           , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchFederationOptions   , InvalidRequest       , BAD_REQUEST ;
+InvalidMultiSearchMaxValuesPerFacet   , InvalidRequest       , BAD_REQUEST ;
+InvalidMultiSearchMergeFacets         , InvalidRequest       , BAD_REQUEST ;
+InvalidMultiSearchQueryFacets         , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ;
@@ -389,7 +395,10 @@ impl ErrorCode for milli::Error {
                    | UserError::InvalidSettingsDimensions { .. }
                    | UserError::InvalidUrl { .. }
                    | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
-                    | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
+                    | UserError::InvalidPrompt(_)
+                    | UserError::InvalidDisableBinaryQuantization { .. } => {
+                        Code::InvalidSettingsEmbedders
+                    }
                    UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
                    UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
                    UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
--- a/meilisearch-types/src/index_uid.rs
+++ b/meilisearch-types/src/index_uid.rs
@@ -1,3 +1,4 @@
+use std::borrow::Borrow;
 use std::error::Error;
 use std::fmt;
 use std::str::FromStr;
@@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode};

 /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
 /// bytes long
-#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
+#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)]
 #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
 pub struct IndexUid(String);

@@ -70,6 +71,12 @@ impl From<IndexUid> for String {
    }
 }

+impl Borrow<String> for IndexUid {
+    fn borrow(&self) -> &String {
+        &self.0
+    }
+}
+
 #[derive(Debug)]
 pub struct IndexUidFormatError {
    pub invalid_uid: String,
--- a/meilisearch-types/src/locales.rs
+++ b/meilisearch-types/src/locales.rs
@@ -1,134 +1,6 @@
 use deserr::Deserr;
 use milli::LocalizedAttributesRule;
 use serde::{Deserialize, Serialize};
-use serde_json::json;
-
-/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
-///
-/// this enum implements `Deserr` in order to be used in the API.
-macro_rules! make_locale {
-
-    ($($language:tt), +) => {
-        #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
-        #[deserr(rename_all = camelCase)]
-        #[serde(rename_all = "camelCase")]
-        pub enum Locale {
-            $($language),+,
-        }
-
-        impl From<milli::tokenizer::Language> for Locale {
-            fn from(other: milli::tokenizer::Language) -> Locale {
-                match other {
-                    $(milli::tokenizer::Language::$language => Locale::$language), +
-                }
-            }
-        }
-
-        impl From<Locale> for milli::tokenizer::Language {
-            fn from(other: Locale) -> milli::tokenizer::Language {
-                match other {
-                    $(Locale::$language => milli::tokenizer::Language::$language), +,
-                }
-            }
-        }
-
-        #[derive(Debug)]
-        pub struct LocaleFormatError {
-            pub invalid_locale: String,
-        }
-
-        impl std::fmt::Display for LocaleFormatError {
-            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", ");
-                write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales)
-            }
-        }
-    };
-}
-
-make_locale! {
-    Epo,
-    Eng,
-    Rus,
-    Cmn,
-    Spa,
-    Por,
-    Ita,
-    Ben,
-    Fra,
-    Deu,
-    Ukr,
-    Kat,
-    Ara,
-    Hin,
-    Jpn,
-    Heb,
-    Yid,
-    Pol,
-    Amh,
-    Jav,
-    Kor,
-    Nob,
-    Dan,
-    Swe,
-    Fin,
-    Tur,
-    Nld,
-    Hun,
-    Ces,
-    Ell,
-    Bul,
-    Bel,
-    Mar,
-    Kan,
-    Ron,
-    Slv,
-    Hrv,
-    Srp,
-    Mkd,
-    Lit,
-    Lav,
-    Est,
-    Tam,
-    Vie,
-    Urd,
-    Tha,
-    Guj,
-    Uzb,
-    Pan,
-    Aze,
-    Ind,
-    Tel,
-    Pes,
-    Mal,
-    Ori,
-    Mya,
-    Nep,
-    Sin,
-    Khm,
-    Tuk,
-    Aka,
-    Zul,
-    Sna,
-    Afr,
-    Lat,
-    Slk,
-    Cat,
-    Tgl,
-    Hye
-}
-
-impl std::error::Error for LocaleFormatError {}
-
-impl std::str::FromStr for Locale {
-    type Err = LocaleFormatError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        milli::tokenizer::Language::from_code(s)
-            .map(Self::from)
-            .ok_or(LocaleFormatError { invalid_locale: s.to_string() })
-    }
-}

 #[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)]
 #[deserr(rename_all = camelCase)]
@@ -155,3 +27,140 @@ impl From<LocalizedAttributesRuleView> for LocalizedAttributesRule {
        }
    }
 }
+
+/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
+///
+/// this enum implements `Deserr` in order to be used in the API.
+macro_rules! make_locale {
+    ($(($iso_639_1:ident, $iso_639_1_str:expr) => ($iso_639_3:ident, $iso_639_3_str:expr),)+) => {
+        #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
+        #[deserr(rename_all = camelCase)]
+        #[serde(rename_all = "camelCase")]
+        pub enum Locale {
+            $($iso_639_1,)+
+            $($iso_639_3,)+
+            Cmn,
+        }
+
+        impl From<milli::tokenizer::Language> for Locale {
+            fn from(other: milli::tokenizer::Language) -> Locale {
+                match other {
+                    $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+
+                    milli::tokenizer::Language::Cmn => Locale::Cmn,
+                }
+            }
+        }
+
+        impl From<Locale> for milli::tokenizer::Language {
+            fn from(other: Locale) -> milli::tokenizer::Language {
+                match other {
+                    $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+
+                    $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+
+                    Locale::Cmn => milli::tokenizer::Language::Cmn,
+                }
+            }
+        }
+
+        impl std::str::FromStr for Locale {
+            type Err = LocaleFormatError;
+
+            fn from_str(s: &str) -> Result<Self, Self::Err> {
+                let locale = match s {
+                    $($iso_639_1_str => Locale::$iso_639_1,)+
+                    $($iso_639_3_str => Locale::$iso_639_3,)+
+                    "cmn" => Locale::Cmn,
+                    _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }),
+                };
+
+                Ok(locale)
+            }
+        }
+
+        #[derive(Debug)]
+        pub struct LocaleFormatError {
+            pub invalid_locale: String,
+        }
+
+        impl std::fmt::Display for LocaleFormatError {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"];
+                valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right)));
+                write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", "))
+            }
+        }
+
+        impl std::error::Error for LocaleFormatError {}
+    };
+}
+
+make_locale!(
+    (Af, "af") => (Afr, "afr"),
+    (Ak, "ak") => (Aka, "aka"),
+    (Am, "am") => (Amh, "amh"),
+    (Ar, "ar") => (Ara, "ara"),
+    (Az, "az") => (Aze, "aze"),
+    (Be, "be") => (Bel, "bel"),
+    (Bn, "bn") => (Ben, "ben"),
+    (Bg, "bg") => (Bul, "bul"),
+    (Ca, "ca") => (Cat, "cat"),
+    (Cs, "cs") => (Ces, "ces"),
+    (Da, "da") => (Dan, "dan"),
+    (De, "de") => (Deu, "deu"),
+    (El, "el") => (Ell, "ell"),
+    (En, "en") => (Eng, "eng"),
+    (Eo, "eo") => (Epo, "epo"),
+    (Et, "et") => (Est, "est"),
+    (Fi, "fi") => (Fin, "fin"),
+    (Fr, "fr") => (Fra, "fra"),
+    (Gu, "gu") => (Guj, "guj"),
+    (He, "he") => (Heb, "heb"),
+    (Hi, "hi") => (Hin, "hin"),
+    (Hr, "hr") => (Hrv, "hrv"),
+    (Hu, "hu") => (Hun, "hun"),
+    (Hy, "hy") => (Hye, "hye"),
+    (Id, "id") => (Ind, "ind"),
+    (It, "it") => (Ita, "ita"),
+    (Jv, "jv") => (Jav, "jav"),
+    (Ja, "ja") => (Jpn, "jpn"),
+    (Kn, "kn") => (Kan, "kan"),
+    (Ka, "ka") => (Kat, "kat"),
+    (Km, "km") => (Khm, "khm"),
+    (Ko, "ko") => (Kor, "kor"),
+    (La, "la") => (Lat, "lat"),
+    (Lv, "lv") => (Lav, "lav"),
+    (Lt, "lt") => (Lit, "lit"),
+    (Ml, "ml") => (Mal, "mal"),
+    (Mr, "mr") => (Mar, "mar"),
+    (Mk, "mk") => (Mkd, "mkd"),
+    (My, "my") => (Mya, "mya"),
+    (Ne, "ne") => (Nep, "nep"),
+    (Nl, "nl") => (Nld, "nld"),
+    (Nb, "nb") => (Nob, "nob"),
+    (Or, "or") => (Ori, "ori"),
+    (Pa, "pa") => (Pan, "pan"),
+    (Fa, "fa") => (Pes, "pes"),
+    (Pl, "pl") => (Pol, "pol"),
+    (Pt, "pt") => (Por, "por"),
+    (Ro, "ro") => (Ron, "ron"),
+    (Ru, "ru") => (Rus, "rus"),
+    (Si, "si") => (Sin, "sin"),
+    (Sk, "sk") => (Slk, "slk"),
+    (Sl, "sl") => (Slv, "slv"),
+    (Sn, "sn") => (Sna, "sna"),
+    (Es, "es") => (Spa, "spa"),
+    (Sr, "sr") => (Srp, "srp"),
+    (Sv, "sv") => (Swe, "swe"),
+    (Ta, "ta") => (Tam, "tam"),
+    (Te, "te") => (Tel, "tel"),
+    (Tl, "tl") => (Tgl, "tgl"),
+    (Th, "th") => (Tha, "tha"),
+    (Tk, "tk") => (Tuk, "tuk"),
+    (Tr, "tr") => (Tur, "tur"),
+    (Uk, "uk") => (Ukr, "ukr"),
+    (Ur, "ur") => (Urd, "urd"),
+    (Uz, "uz") => (Uzb, "uzb"),
+    (Vi, "vi") => (Vie, "vie"),
+    (Yi, "yi") => (Yid, "yid"),
+    (Zh, "zh") => (Zho, "zho"),
+    (Zu, "zu") => (Zul, "zul"),
+);
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
 mimalloc = { version = "0.1.43", default-features = false }
 mime = "0.3.17"
 num_cpus = "1.16.0"
-obkv = "0.2.2"
+obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 parking_lot = "0.12.3"
@@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"]
 khmer = ["meilisearch-types/khmer"]
 vietnamese = ["meilisearch-types/vietnamese"]
 swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
+german = ["meilisearch-types/german"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -646,8 +646,6 @@ pub struct SearchAggregator {
    max_vector_size: usize,
    // Whether the semantic ratio passed to a hybrid search equals the default ratio.
    semantic_ratio: bool,
-    // Whether a non-default embedder was specified
-    embedder: bool,
    hybrid: bool,
    retrieve_vectors: bool,

@@ -795,7 +793,6 @@ impl SearchAggregator {

        if let Some(hybrid) = hybrid {
            ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
-            ret.embedder = hybrid.embedder.is_some();
            ret.hybrid = true;
        }

@@ -863,7 +860,6 @@ impl SearchAggregator {
            show_ranking_score,
            show_ranking_score_details,
            semantic_ratio,
-            embedder,
            hybrid,
            total_degraded,
            total_used_negative_operator,
@@ -923,7 +919,6 @@ impl SearchAggregator {
        self.retrieve_vectors |= retrieve_vectors;
        self.semantic_ratio |= semantic_ratio;
        self.hybrid |= hybrid;
-        self.embedder |= embedder;

        // pagination
        self.max_limit = self.max_limit.max(max_limit);
@@ -999,7 +994,6 @@ impl SearchAggregator {
            show_ranking_score,
            show_ranking_score_details,
            semantic_ratio,
-            embedder,
            hybrid,
            total_degraded,
            total_used_negative_operator,
@@ -1051,7 +1045,6 @@ impl SearchAggregator {
                "hybrid": {
                    "enabled": hybrid,
                    "semantic_ratio": semantic_ratio,
-                    "embedder": embedder,
                },
                "pagination": {
                   "max_limit": max_limit,
@@ -1782,7 +1775,6 @@ pub struct SimilarAggregator {
    used_syntax: HashMap<String, usize>,

    // Whether a non-default embedder was specified
-    embedder: bool,
    retrieve_vectors: bool,

    // pagination
@@ -1803,7 +1795,7 @@ impl SimilarAggregator {
    pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
        let SimilarQuery {
            id: _,
-            embedder,
+            embedder: _,
            offset,
            limit,
            attributes_to_retrieve: _,
@@ -1851,7 +1843,6 @@ impl SimilarAggregator {
        ret.show_ranking_score_details = *show_ranking_score_details;
        ret.ranking_score_threshold = ranking_score_threshold.is_some();

-        ret.embedder = embedder.is_some();
        ret.retrieve_vectors = *retrieve_vectors;

        ret
@@ -1883,7 +1874,6 @@ impl SimilarAggregator {
            max_attributes_to_retrieve,
            show_ranking_score,
            show_ranking_score_details,
-            embedder,
            ranking_score_threshold,
            retrieve_vectors,
        } = other;
@@ -1914,7 +1904,6 @@ impl SimilarAggregator {
            *used_syntax = used_syntax.saturating_add(value);
        }

-        self.embedder |= embedder;
        self.retrieve_vectors |= retrieve_vectors;

        // pagination
@@ -1948,7 +1937,6 @@ impl SimilarAggregator {
            max_attributes_to_retrieve,
            show_ranking_score,
            show_ranking_score_details,
-            embedder,
            ranking_score_threshold,
            retrieve_vectors,
        } = self;
@@ -1980,9 +1968,6 @@ impl SimilarAggregator {
                "vector": {
                    "retrieve_vectors": retrieve_vectors,
                },
-                "hybrid": {
-                    "embedder": embedder,
-                },
                "pagination": {
                   "max_limit": max_limit,
                   "max_offset": max_offset,
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType};
 use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
 use meilisearch_types::error::{Code, ErrorCode, ResponseError};
 use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
+use meilisearch_types::milli::OrderBy;
 use serde_json::Value;
 use tokio::task::JoinError;

@@ -27,10 +28,20 @@ pub enum MeilisearchHttpError {
    EmptyFilter,
    #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
    InvalidExpression(&'static [&'static str], Value),
-    #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
+    #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
    FederationOptionsInNonFederatedRequest(usize),
-    #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
+    #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")]
    PaginationInFederatedQuery(usize, &'static str),
+    #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
+    FacetsInFederatedQuery(usize, String, Vec<String>),
+    #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
+    InconsistentFacetOrder {
+        facet: String,
+        previous_facet_order: OrderBy,
+        previous_uid: String,
+        index_facet_order: OrderBy,
+        current_uid: String,
+    },
    #[error("A {0} payload is missing.")]
    MissingPayload(PayloadType),
    #[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
@@ -61,7 +72,7 @@ pub enum MeilisearchHttpError {
    DocumentFormat(#[from] DocumentFormatError),
    #[error(transparent)]
    Join(#[from] JoinError),
-    #[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
+    #[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
    MissingSearchHybrid,
 }

@@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError {
            MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
                Code::InvalidMultiSearchQueryPagination
            }
+            MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets,
+            MeilisearchHttpError::InconsistentFacetOrder { .. } => {
+                Code::InvalidMultiSearchFacetOrder
+            }
        }
    }
 }
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet {
    }
 }

-impl From<SearchQueryGet> for SearchQuery {
-    fn from(other: SearchQueryGet) -> Self {
+impl TryFrom<SearchQueryGet> for SearchQuery {
+    type Error = ResponseError;
+
+    fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
        let filter = match other.filter {
            Some(f) => match serde_json::from_str(&f) {
                Ok(v) => Some(v),
@@ -140,19 +142,28 @@ impl From<SearchQueryGet> for SearchQuery {

        let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
            (None, None) => None,
-            (None, Some(semantic_ratio)) => {
-                Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
+            (None, Some(_)) => {
+                return Err(ResponseError::from_msg(
+                    "`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
+                    meilisearch_types::error::Code::InvalidHybridQuery,
+                ));
+            }
+            (Some(embedder), None) => {
+                Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
            }
-            (Some(embedder), None) => Some(HybridQuery {
-                semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
-                embedder: Some(embedder),
-            }),
            (Some(embedder), Some(semantic_ratio)) => {
-                Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
+                Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
            }
        };

-        Self {
+        if other.vector.is_some() && hybrid.is_none() {
+            return Err(ResponseError::from_msg(
+                "`hybridEmbedder` is mandatory when `vector` is present".into(),
+                meilisearch_types::error::Code::MissingSearchHybrid,
+            ));
+        }
+
+        Ok(Self {
            q: other.q,
            vector: other.vector.map(CS::into_inner),
            offset: other.offset.0,
@@ -179,7 +190,7 @@ impl From<SearchQueryGet> for SearchQuery {
            hybrid,
            ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
            locales: other.locales.map(|o| o.into_iter().collect()),
-        }
+        })
    }
 }

@@ -219,7 +230,7 @@ pub async fn search_with_url_query(
    debug!(parameters = ?params, "Search get");
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

-    let mut query: SearchQuery = params.into_inner().into();
+    let mut query: SearchQuery = params.into_inner().try_into()?;

    // Tenant token search_rules.
    if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@@ -312,44 +323,36 @@ pub fn search_kind(
        features.check_vector("Passing `hybrid` as a parameter")?;
    }

-    // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
-    if query.vector.is_none() {
-        match &query.q {
-            Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly),
-            None => return Ok(SearchKind::KeywordOnly),
-            _ => {}
+    // handle with care, the order of cases matters, the semantics is subtle
+    match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
+        // empty query, no vector => placeholder search
+        (Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
+        // no query, no vector => placeholder search
+        (None, _, None) => Ok(SearchKind::KeywordOnly),
+        // hybrid.semantic_ratio == 1.0 => vector
+        (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
+            SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
        }
-    }
-
-    match &query.hybrid {
-        Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => {
-            Ok(SearchKind::semantic(
-                index_scheduler,
-                index,
-                embedder.as_deref(),
-                query.vector.as_ref().map(Vec::len),
-            )?)
-        }
-        Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => {
+        // hybrid.semantic_ratio == 0.0 => keyword
+        (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
            Ok(SearchKind::KeywordOnly)
        }
-        Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid(
+        // no query, hybrid, vector => semantic
+        (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
+            SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
+        }
+        // query, no hybrid, no vector => keyword
+        (Some(_), None, None) => Ok(SearchKind::KeywordOnly),
+        // query, hybrid, maybe vector => hybrid
+        (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
            index_scheduler,
            index,
-            embedder.as_deref(),
+            embedder,
            **semantic_ratio,
-            query.vector.as_ref().map(Vec::len),
-        )?),
-        None => match (query.q.as_deref(), query.vector.as_deref()) {
-            (_query, None) => Ok(SearchKind::KeywordOnly),
-            (None, Some(_vector)) => Ok(SearchKind::semantic(
-                index_scheduler,
-                index,
-                None,
-                query.vector.as_ref().map(Vec::len),
-            )?),
-            (Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
-        },
+            v.map(|v| v.len()),
+        ),
+
+        (_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
    }
 }

--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -643,12 +643,19 @@ fn embedder_analytics(
            .max()
    });

+    let binary_quantization_used = setting.as_ref().map(|map| {
+        map.values()
+            .filter_map(|config| config.clone().set())
+            .any(|config| config.binary_quantized.set().is_some())
+    });
+
    json!(
        {
            "total": setting.as_ref().map(|s| s.len()),
            "sources": sources,
            "document_template_used": document_template_used,
-            "document_template_max_bytes": document_template_max_bytes
+            "document_template_max_bytes": document_template_max_bytes,
+            "binary_quantization_used": binary_quantization_used,
        }
    )
 }
--- a/meilisearch/src/routes/indexes/similar.rs
+++ b/meilisearch/src/routes/indexes/similar.rs
@@ -102,8 +102,8 @@ async fn similar(

    let index = index_scheduler.index(&index_uid)?;

-    let (embedder_name, embedder) =
-        SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
+    let (embedder_name, embedder, quantized) =
+        SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;

    tokio::task::spawn_blocking(move || {
        perform_similar(
@@ -111,6 +111,7 @@ async fn similar(
            query,
            embedder_name,
            embedder,
+            quantized,
            retrieve_vectors,
            index_scheduler.features(),
        )
@@ -139,8 +140,8 @@ pub struct SimilarQueryGet {
    show_ranking_score_details: Param<bool>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
    pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
-    #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
-    pub embedder: Option<String>,
+    #[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
+    pub embedder: String,
 }

 #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
--- a/meilisearch/src/search/federated.rs
+++ b/meilisearch/src/search/federated.rs
@@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec};

 use actix_http::StatusCode;
 use index_scheduler::{IndexScheduler, RoFeatures};
+use indexmap::IndexMap;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::deserr_codes::{
-    InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
+    InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
+    InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
+    InvalidSearchOffset,
 };
 use meilisearch_types::error::ResponseError;
+use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
-use meilisearch_types::milli::{self, DocumentId, TimeBudget};
+use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
 use roaring::RoaringBitmap;
 use serde::Serialize;

 use super::ranking_rules::{self, RankingRules};
 use super::{
-    prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
-    SearchQuery, SearchQueryWithIndex,
+    compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
+    HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
 };
 use crate::error::MeilisearchHttpError;
 use crate::routes::indexes::search::search_kind;
@@ -73,6 +77,17 @@ pub struct Federation {
    pub limit: usize,
    #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
    pub offset: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
+    pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
+    #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
+    pub merge_facets: Option<MergeFacets>,
+}
+
+#[derive(Copy, Clone, Debug, deserr::Deserr, Default)]
+#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
+pub struct MergeFacets {
+    #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
+    pub max_values_per_facet: Option<usize>,
 }

 #[derive(Debug, deserr::Deserr)]
@@ -82,7 +97,7 @@ pub struct FederatedSearch {
    #[deserr(default)]
    pub federation: Option<Federation>,
 }
-#[derive(Serialize, Clone, PartialEq)]
+#[derive(Serialize, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct FederatedSearchResult {
    pub hits: Vec<SearchHit>,
@@ -93,6 +108,13 @@ pub struct FederatedSearchResult {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub semantic_hit_count: Option<u32>,

+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub facet_stats: Option<BTreeMap<String, FacetStats>>,
+    #[serde(skip_serializing_if = "FederatedFacets::is_empty")]
+    pub facets_by_index: FederatedFacets,
+
    // These fields are only used for analytics purposes
    #[serde(skip)]
    pub degraded: bool,
@@ -109,6 +131,9 @@ impl fmt::Debug for FederatedSearchResult {
            semantic_hit_count,
            degraded,
            used_negative_operator,
+            facet_distribution,
+            facet_stats,
+            facets_by_index,
        } = self;

        let mut debug = f.debug_struct("SearchResult");
@@ -122,9 +147,18 @@ impl fmt::Debug for FederatedSearchResult {
        if *degraded {
            debug.field("degraded", degraded);
        }
+        if let Some(facet_distribution) = facet_distribution {
+            debug.field("facet_distribution", &facet_distribution);
+        }
+        if let Some(facet_stats) = facet_stats {
+            debug.field("facet_stats", &facet_stats);
+        }
        if let Some(semantic_hit_count) = semantic_hit_count {
            debug.field("semantic_hit_count", &semantic_hit_count);
        }
+        if !facets_by_index.is_empty() {
+            debug.field("facets_by_index", &facets_by_index);
+        }

        debug.finish()
    }
@@ -313,16 +347,104 @@ struct SearchHitByIndex {
 }

 struct SearchResultByIndex {
+    index: String,
    hits: Vec<SearchHitByIndex>,
-    candidates: RoaringBitmap,
+    estimated_total_hits: usize,
    degraded: bool,
    used_negative_operator: bool,
+    facets: Option<ComputedFacets>,
+}
+
+#[derive(Debug, Clone, Default, Serialize)]
+pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
+
+impl FederatedFacets {
+    pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
+        if let Some(facets) = facets {
+            self.0.insert(index, facets);
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    pub fn merge(
+        self,
+        MergeFacets { max_values_per_facet }: MergeFacets,
+        facet_order: BTreeMap<String, (String, OrderBy)>,
+    ) -> Option<ComputedFacets> {
+        if self.is_empty() {
+            return None;
+        }
+
+        let mut distribution: BTreeMap<String, _> = Default::default();
+        let mut stats: BTreeMap<String, FacetStats> = Default::default();
+
+        for facets_by_index in self.0.into_values() {
+            for (facet, index_distribution) in facets_by_index.distribution {
+                match distribution.entry(facet) {
+                    std::collections::btree_map::Entry::Vacant(entry) => {
+                        entry.insert(index_distribution);
+                    }
+                    std::collections::btree_map::Entry::Occupied(mut entry) => {
+                        let distribution = entry.get_mut();
+
+                        for (value, index_count) in index_distribution {
+                            distribution
+                                .entry(value)
+                                .and_modify(|count| *count += index_count)
+                                .or_insert(index_count);
+                        }
+                    }
+                }
+            }
+
+            for (facet, index_stats) in facets_by_index.stats {
+                match stats.entry(facet) {
+                    std::collections::btree_map::Entry::Vacant(entry) => {
+                        entry.insert(index_stats);
+                    }
+                    std::collections::btree_map::Entry::Occupied(mut entry) => {
+                        let stats = entry.get_mut();
+
+                        stats.min = f64::min(stats.min, index_stats.min);
+                        stats.max = f64::max(stats.max, index_stats.max);
+                    }
+                }
+            }
+        }
+
+        // fixup order
+        for (facet, values) in &mut distribution {
+            let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
+
+            match order_by {
+                OrderBy::Lexicographic => {
+                    values.sort_unstable_by(|left, _, right, _| left.cmp(right))
+                }
+                OrderBy::Count => {
+                    values.sort_unstable_by(|_, left, _, right| {
+                        left.cmp(right)
+                            // biggest first
+                            .reverse()
+                    })
+                }
+            }
+
+            if let Some(max_values_per_facet) = max_values_per_facet {
+                values.truncate(max_values_per_facet)
+            };
+        }
+
+        Some(ComputedFacets { distribution, stats })
+    }
 }

 pub fn perform_federated_search(
    index_scheduler: &IndexScheduler,
    queries: Vec<SearchQueryWithIndex>,
-    federation: Federation,
+    mut federation: Federation,
    features: RoFeatures,
 ) -> Result<FederatedSearchResult, ResponseError> {
    let before_search = std::time::Instant::now();
@@ -342,6 +464,16 @@ pub fn perform_federated_search(
            .into());
        }

+        if let Some(facets) = federated_query.has_facets() {
+            let facets = facets.to_owned();
+            return Err(MeilisearchHttpError::FacetsInFederatedQuery(
+                query_index,
+                federated_query.index_uid.into_inner(),
+                facets,
+            )
+            .into());
+        }
+
        let (index_uid, query, federation_options) = federated_query.into_index_query_federation();

        queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
@@ -353,13 +485,24 @@ pub fn perform_federated_search(

    // 2. perform queries, merge and make hits index by index
    let required_hit_count = federation.limit + federation.offset;
+
    // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
    // Then in step (3), we'll update its value if there is any semantic search
    let mut semantic_hit_count = None;
    let mut results_by_index = Vec::with_capacity(queries_by_index.len());
    let mut previous_query_data: Option<(RankingRules, usize, String)> = None;

+    // remember the order and name of first index for each facet when merging with index settings
+    // to detect if the order is inconsistent for a facet.
+    let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
+    {
+        Some(MergeFacets { .. }) => Some(Default::default()),
+        _ => None,
+    };
+
    for (index_uid, queries) in queries_by_index {
+        let first_query_index = queries.first().map(|query| query.query_index);
+
        let index = match index_scheduler.index(&index_uid) {
            Ok(index) => index,
            Err(err) => {
@@ -367,9 +510,8 @@ pub fn perform_federated_search(
                // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
                // here the resource not found is not part of the URL.
                err.code = StatusCode::BAD_REQUEST;
-                if let Some(query) = queries.first() {
-                    err.message =
-                        format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
+                if let Some(query_index) = first_query_index {
+                    err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
                }
                return Err(err);
            }
@@ -394,6 +536,23 @@ pub fn perform_federated_search(
        let mut used_negative_operator = false;
        let mut candidates = RoaringBitmap::new();

+        let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
+
+        // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
+        if let Err(mut error) =
+            check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
+        {
+            error.message = format!(
+                "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
+                if let Some(query_index) = first_query_index {
+                    format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
+                } else {
+                    Default::default()
+                }
+            );
+            return Err(error);
+        }
+
        // 2.1. Compute all candidates for each query in the index
        let mut results_by_query = Vec::with_capacity(queries.len());

@@ -562,34 +721,116 @@ pub fn perform_federated_search(
                .collect();

        let merged_result = merged_result?;
+
+        let estimated_total_hits = candidates.len() as usize;
+
+        let facets = facets_by_index
+            .map(|facets_by_index| {
+                compute_facet_distribution_stats(
+                    &facets_by_index,
+                    &index,
+                    &rtxn,
+                    candidates,
+                    super::Route::MultiSearch,
+                )
+            })
+            .transpose()
+            .map_err(|mut error| {
+                error.message = format!(
+                    "Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
+                    error.message,
+                    if let Some(query_index) = first_query_index {
+                        format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
+                    } else {
+                        Default::default()
+                    }
+                );
+                error
+            })?;
+
        results_by_index.push(SearchResultByIndex {
+            index: index_uid,
            hits: merged_result,
-            candidates,
+            estimated_total_hits,
            degraded,
            used_negative_operator,
+            facets,
        });
    }

+    // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
+    for (index_uid, facets) in federation.facets_by_index {
+        let index = match index_scheduler.index(&index_uid) {
+            Ok(index) => index,
+            Err(err) => {
+                let mut err = ResponseError::from(err);
+                // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
+                // here the resource not found is not part of the URL.
+                err.code = StatusCode::BAD_REQUEST;
+                err.message = format!(
+                    "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
+                    err.message
+                );
+                return Err(err);
+            }
+        };
+
+        // Important: this is the only transaction we'll use for this index during this federated search
+        let rtxn = index.read_txn()?;
+
+        if let Err(mut error) =
+            check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
+        {
+            error.message = format!(
+                "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
+            );
+            return Err(error);
+        }
+
+        if let Some(facets) = facets {
+            if let Err(mut error) = compute_facet_distribution_stats(
+                &facets,
+                &index,
+                &rtxn,
+                Default::default(),
+                super::Route::MultiSearch,
+            ) {
+                error.message =
+                    format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
+                return Err(error);
+            }
+        }
+    }
+
    // 3. merge hits and metadata across indexes
    // 3.1 merge metadata
-    let (estimated_total_hits, degraded, used_negative_operator) = {
+    let (estimated_total_hits, degraded, used_negative_operator, facets) = {
        let mut estimated_total_hits = 0;
        let mut degraded = false;
        let mut used_negative_operator = false;

+        let mut facets: FederatedFacets = FederatedFacets::default();
+
        for SearchResultByIndex {
+            index,
            hits: _,
-            candidates,
+            estimated_total_hits: estimated_total_hits_by_index,
+            facets: facets_by_index,
            degraded: degraded_by_index,
            used_negative_operator: used_negative_operator_by_index,
-        } in &results_by_index
+        } in &mut results_by_index
        {
-            estimated_total_hits += candidates.len() as usize;
+            estimated_total_hits += *estimated_total_hits_by_index;
            degraded |= *degraded_by_index;
            used_negative_operator |= *used_negative_operator_by_index;
+
+            let facets_by_index = std::mem::take(facets_by_index);
+            let index = std::mem::take(index);
+
+            facets.insert(index, facets_by_index);
        }

-        (estimated_total_hits, degraded, used_negative_operator)
+        (estimated_total_hits, degraded, used_negative_operator, facets)
    };

    // 3.2 merge hits
@@ -606,6 +847,20 @@ pub fn perform_federated_search(
        .map(|hit| hit.hit)
        .collect();

+    let (facet_distribution, facet_stats, facets_by_index) =
+        match federation.merge_facets.zip(facet_order) {
+            Some((merge_facets, facet_order)) => {
+                let facets = facets.merge(merge_facets, facet_order);
+
+                let (facet_distribution, facet_stats) = facets
+                    .map(|ComputedFacets { distribution, stats }| (distribution, stats))
+                    .unzip();
+
+                (facet_distribution, facet_stats, FederatedFacets::default())
+            }
+            None => (None, None, facets),
+        };
+
    let search_result = FederatedSearchResult {
        hits: merged_hits,
        processing_time_ms: before_search.elapsed().as_millis(),
@@ -617,7 +872,39 @@ pub fn perform_federated_search(
        semantic_hit_count,
        degraded,
        used_negative_operator,
+        facet_distribution,
+        facet_stats,
+        facets_by_index,
    };

    Ok(search_result)
 }
+
+fn check_facet_order(
+    facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
+    current_index: &str,
+    facets_by_index: &Option<Vec<String>>,
+    index: &milli::Index,
+    rtxn: &milli::heed::RoTxn<'_>,
+) -> Result<(), ResponseError> {
+    if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
+        let index_facet_order = index.sort_facet_values_by(rtxn)?;
+        for facet in facets_by_index {
+            let index_facet_order = index_facet_order.get(facet);
+            let (previous_index, previous_facet_order) = facet_order
+                .entry(facet.to_owned())
+                .or_insert_with(|| (current_index.to_owned(), index_facet_order));
+            if previous_facet_order != &index_facet_order {
+                return Err(MeilisearchHttpError::InconsistentFacetOrder {
+                    facet: facet.clone(),
+                    previous_facet_order: *previous_facet_order,
+                    previous_uid: previous_index.clone(),
+                    current_uid: current_index.to_owned(),
+                    index_facet_order,
+                }
+                .into());
+            }
+        }
+    };
+    Ok(())
+}
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@@ -267,58 +267,54 @@ impl fmt::Debug for SearchQuery {
 pub struct HybridQuery {
    #[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
    pub semantic_ratio: SemanticRatio,
-    #[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
-    pub embedder: Option<String>,
+    #[deserr(error = DeserrJsonError<InvalidEmbedder>)]
+    pub embedder: String,
 }

 #[derive(Clone)]
 pub enum SearchKind {
    KeywordOnly,
-    SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
-    Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
+    SemanticOnly { embedder_name: String, embedder: Arc<Embedder>, quantized: bool },
+    Hybrid { embedder_name: String, embedder: Arc<Embedder>, quantized: bool, semantic_ratio: f32 },
 }

 impl SearchKind {
    pub(crate) fn semantic(
        index_scheduler: &index_scheduler::IndexScheduler,
        index: &Index,
-        embedder_name: Option<&str>,
+        embedder_name: &str,
        vector_len: Option<usize>,
    ) -> Result<Self, ResponseError> {
-        let (embedder_name, embedder) =
+        let (embedder_name, embedder, quantized) =
            Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
-        Ok(Self::SemanticOnly { embedder_name, embedder })
+        Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
    }

    pub(crate) fn hybrid(
        index_scheduler: &index_scheduler::IndexScheduler,
        index: &Index,
-        embedder_name: Option<&str>,
+        embedder_name: &str,
        semantic_ratio: f32,
        vector_len: Option<usize>,
    ) -> Result<Self, ResponseError> {
-        let (embedder_name, embedder) =
+        let (embedder_name, embedder, quantized) =
            Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
-        Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
+        Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
    }

    pub(crate) fn embedder(
        index_scheduler: &index_scheduler::IndexScheduler,
        index: &Index,
-        embedder_name: Option<&str>,
+        embedder_name: &str,
        vector_len: Option<usize>,
-    ) -> Result<(String, Arc<Embedder>), ResponseError> {
+    ) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
        let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
        let embedders = index_scheduler.embedders(embedder_configs)?;

-        let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name());
-
-        let embedder = embedders.get(embedder_name);
-
-        let embedder = embedder
+        let (embedder, _, quantized) = embedders
+            .get(embedder_name)
            .ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
-            .map_err(milli::Error::from)?
-            .0;
+            .map_err(milli::Error::from)?;

        if let Some(vector_len) = vector_len {
            if vector_len != embedder.dimensions() {
@@ -332,7 +328,7 @@ impl SearchKind {
            }
        }

-        Ok((embedder_name.to_owned(), embedder))
+        Ok((embedder_name.to_owned(), embedder, quantized))
    }
 }

@@ -441,9 +437,6 @@ pub struct SearchQueryWithIndex {
 }

 impl SearchQueryWithIndex {
-    pub fn has_federation_options(&self) -> bool {
-        self.federation_options.is_some()
-    }
    pub fn has_pagination(&self) -> Option<&'static str> {
        if self.offset.is_some() {
            Some("offset")
@@ -458,6 +451,10 @@ impl SearchQueryWithIndex {
        }
    }

+    pub fn has_facets(&self) -> Option<&[String]> {
+        self.facets.as_deref().filter(|v| !v.is_empty())
+    }
+
    pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
        let SearchQueryWithIndex {
            index_uid,
@@ -537,8 +534,8 @@ pub struct SimilarQuery {
    pub limit: usize,
    #[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
    pub filter: Option<Value>,
-    #[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
-    pub embedder: Option<String>,
+    #[deserr(error = DeserrJsonError<InvalidEmbedder>)]
+    pub embedder: String,
    #[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
    pub attributes_to_retrieve: Option<BTreeSet<String>>,
    #[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
@@ -792,7 +789,7 @@ fn prepare_search<'t>(
                search.query(q);
            }
        }
-        SearchKind::SemanticOnly { embedder_name, embedder } => {
+        SearchKind::SemanticOnly { embedder_name, embedder, quantized } => {
            let vector = match query.vector.clone() {
                Some(vector) => vector,
                None => {
@@ -806,14 +803,19 @@ fn prepare_search<'t>(
                }
            };

-            search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
+            search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
        }
-        SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => {
+        SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
            if let Some(q) = &query.q {
                search.query(q);
            }
            // will be embedded in hybrid search if necessary
-            search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone());
+            search.semantic(
+                embedder_name.clone(),
+                embedder.clone(),
+                *quantized,
+                query.vector.clone(),
+            );
        }
    }

@@ -987,39 +989,13 @@ pub fn perform_search(
        HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
    };

-    let (facet_distribution, facet_stats) = match facets {
-        Some(ref fields) => {
-            let mut facet_distribution = index.facets_distribution(&rtxn);
-
-            let max_values_by_facet = index
-                .max_values_per_facet(&rtxn)
-                .map_err(milli::Error::from)?
-                .map(|x| x as usize)
-                .unwrap_or(DEFAULT_VALUES_PER_FACET);
-            facet_distribution.max_values_per_facet(max_values_by_facet);
-
-            let sort_facet_values_by =
-                index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
-
-            if fields.iter().all(|f| f != "*") {
-                let fields: Vec<_> =
-                    fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect();
-                facet_distribution.facets(fields);
-            }
-
-            let distribution = facet_distribution
-                .candidates(candidates)
-                .default_order_by(sort_facet_values_by.get("*"))
-                .execute()?;
-            let stats = facet_distribution.compute_stats()?;
-            (Some(distribution), Some(stats))
-        }
-        None => (None, None),
-    };
-
-    let facet_stats = facet_stats.map(|stats| {
-        stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
-    });
+    let (facet_distribution, facet_stats) = facets
+        .map(move |facets| {
+            compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
+        })
+        .transpose()?
+        .map(|ComputedFacets { distribution, stats }| (distribution, stats))
+        .unzip();

    let result = SearchResult {
        hits: documents,
@@ -1035,6 +1011,61 @@ pub fn perform_search(
    Ok(result)
 }

+#[derive(Debug, Clone, Default, Serialize)]
+pub struct ComputedFacets {
+    pub distribution: BTreeMap<String, IndexMap<String, u64>>,
+    pub stats: BTreeMap<String, FacetStats>,
+}
+
+enum Route {
+    Search,
+    MultiSearch,
+}
+
+fn compute_facet_distribution_stats<S: AsRef<str>>(
+    facets: &[S],
+    index: &Index,
+    rtxn: &RoTxn,
+    candidates: roaring::RoaringBitmap,
+    route: Route,
+) -> Result<ComputedFacets, ResponseError> {
+    let mut facet_distribution = index.facets_distribution(rtxn);
+
+    let max_values_by_facet = index
+        .max_values_per_facet(rtxn)
+        .map_err(milli::Error::from)?
+        .map(|x| x as usize)
+        .unwrap_or(DEFAULT_VALUES_PER_FACET);
+
+    facet_distribution.max_values_per_facet(max_values_by_facet);
+
+    let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?;
+
+    // add specific facet if there is no placeholder
+    if facets.iter().all(|f| f.as_ref() != "*") {
+        let fields: Vec<_> =
+            facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect();
+        facet_distribution.facets(fields);
+    }
+
+    let distribution = facet_distribution
+        .candidates(candidates)
+        .default_order_by(sort_facet_values_by.get("*"))
+        .execute()
+        .map_err(|error| match (error, route) {
+            (
+                error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution {
+                    ..
+                }),
+                Route::MultiSearch,
+            ) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets),
+            (error, _) => error.into(),
+        })?;
+    let stats = facet_distribution.compute_stats()?;
+    let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect();
+    Ok(ComputedFacets { distribution, stats })
+}
+
 pub fn search_from_kind(
    search_kind: SearchKind,
    search: milli::Search<'_>,
@@ -1413,6 +1444,7 @@ pub fn perform_similar(
    query: SimilarQuery,
    embedder_name: String,
    embedder: Arc<Embedder>,
+    quantized: bool,
    retrieve_vectors: RetrieveVectors,
    features: RoFeatures,
 ) -> Result<SimilarResult, ResponseError> {
@@ -1441,8 +1473,16 @@ pub fn perform_similar(
        ));
    };

-    let mut similar =
-        milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
+    let mut similar = milli::Similar::new(
+        internal_id,
+        offset,
+        limit,
+        index,
+        &rtxn,
+        embedder_name,
+        embedder,
+        quantized,
+    );

    if let Some(ref filter) = query.filter {
        if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
@@ -1642,7 +1682,7 @@ fn add_non_formatted_ids_to_formatted_options(
 fn make_document(
    displayed_attributes: &BTreeSet<FieldId>,
    field_ids_map: &FieldsIdsMap,
-    obkv: obkv::KvReaderU16,
+    obkv: &obkv::KvReaderU16,
 ) -> Result<Document, MeilisearchHttpError> {
    let mut document = serde_json::Map::new();

--- a/meilisearch/tests/documents/errors.rs
+++ b/meilisearch/tests/documents/errors.rs
@@ -136,7 +136,7 @@ async fn get_all_documents_bad_filter() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
      "code": "invalid_document_filter",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@@ -525,7 +525,7 @@ async fn delete_document_by_filter() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(response, @r###"
    {
-      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
      "code": "invalid_document_filter",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@@ -723,7 +723,7 @@ async fn fetch_document_by_filter() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(response, @r###"
    {
-      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
      "code": "invalid_document_filter",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@@ -646,7 +646,7 @@ async fn filter_invalid_syntax_object() {
        .search(json!({"filter": "title & Glass"}), |response, code| {
            snapshot!(response, @r###"
            {
-              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
              "code": "invalid_search_filter",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -669,7 +669,7 @@ async fn filter_invalid_syntax_array() {
        .search(json!({"filter": ["title & Glass"]}), |response, code| {
            snapshot!(response, @r###"
            {
-              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
              "code": "invalid_search_filter",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -1163,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() {
            snapshot!(code, @"400 Bad Request");
            snapshot!(json_string!(response), @r###"
            {
-              "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
+              "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
              "code": "feature_not_enabled",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@@ -1176,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() {
            snapshot!(code, @"400 Bad Request");
            snapshot!(json_string!(response), @r###"
            {
-              "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
+              "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
              "code": "feature_not_enabled",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@@ -1192,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
+      "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
      "code": "feature_not_enabled",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@@ -1204,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
+      "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
      "code": "feature_not_enabled",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -128,7 +128,7 @@ async fn simple_search() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -137,7 +137,7 @@ async fn simple_search() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}),
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -146,7 +146,7 @@ async fn simple_search() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}),
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -161,7 +161,7 @@ async fn limit_offset() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -174,7 +174,7 @@ async fn limit_offset() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -188,8 +188,11 @@ async fn simple_search_hf() {
    let server = Server::new().await;
    let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;

-    let (response, code) =
-        index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}),
+        )
+        .await;
    snapshot!(code, @"200 OK");
    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
    snapshot!(response["semanticHitCount"], @"0");
@@ -197,7 +200,7 @@ async fn simple_search_hf() {
    let (response, code) = index
        .search_post(
            // disable ranking score as the vectors between architectures are not equal
-            json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
+            json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -206,7 +209,7 @@ async fn simple_search_hf() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
+            json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -215,7 +218,7 @@ async fn simple_search_hf() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
+            json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -224,7 +227,7 @@ async fn simple_search_hf() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
+            json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -237,7 +240,7 @@ async fn distribution_shift() {
    let server = Server::new().await;
    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;

-    let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
+    let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true});
    let (response, code) = index.search_post(search.clone()).await;
    snapshot!(code, @"200 OK");
    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
@@ -271,7 +274,7 @@ async fn highlighter() {

    let (response, code) = index
        .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
-            "hybrid": {"semanticRatio": 0.2},
+            "hybrid": {"embedder": "default", "semanticRatio": 0.2},
           "retrieveVectors": true,
           "attributesToHighlight": [
                     "desc",
@@ -287,7 +290,7 @@ async fn highlighter() {

    let (response, code) = index
        .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
-            "hybrid": {"semanticRatio": 0.8},
+            "hybrid": {"embedder": "default", "semanticRatio": 0.8},
            "retrieveVectors": true,
            "showRankingScore": true,
            "attributesToHighlight": [
@@ -304,7 +307,7 @@ async fn highlighter() {
    // no highlighting on full semantic
    let (response, code) = index
        .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
-            "hybrid": {"semanticRatio": 1.0},
+            "hybrid": {"embedder": "default", "semanticRatio": 1.0},
            "retrieveVectors": true,
            "showRankingScore": true,
            "attributesToHighlight": [
@@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}),
        )
        .await;
    snapshot!(code, @"400 Bad Request");
@@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}),
        )
        .await;
    snapshot!(code, @"400 Bad Request");
@@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() {
    let (response, code) = index
        .search_get(
            &yaup::to_string(
-                &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
+                &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}),
            )
            .unwrap(),
        )
@@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() {
    let (response, code) = index
        .search_get(
            &yaup::to_string(
-                &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
+                &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}),
            )
            .unwrap(),
        )
@@ -398,7 +401,7 @@ async fn single_document() {

    let (response, code) = index
    .search_post(
-        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
+        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
    )
    .await;

@@ -414,7 +417,7 @@ async fn query_combination() {

    // search without query and vector, but with hybrid => still placeholder
    let (response, code) = index
-        .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
+        .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
        .await;

    snapshot!(code, @"200 OK");
@@ -423,7 +426,7 @@ async fn query_combination() {

    // same with a different semantic ratio
    let (response, code) = index
-        .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
+        .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
        .await;

    snapshot!(code, @"200 OK");
@@ -432,7 +435,7 @@ async fn query_combination() {

    // wrong vector dimensions
    let (response, code) = index
-    .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
+    .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
    .await;

    snapshot!(code, @"400 Bad Request");
@@ -447,7 +450,7 @@ async fn query_combination() {

    // full vector
    let (response, code) = index
-    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
+    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
    .await;

    snapshot!(code, @"200 OK");
@@ -456,7 +459,7 @@ async fn query_combination() {

    // full keyword, without a query
    let (response, code) = index
-    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
+    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
    .await;

    snapshot!(code, @"200 OK");
@@ -465,7 +468,7 @@ async fn query_combination() {

    // query + vector, full keyword => keyword
    let (response, code) = index
-    .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
+    .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
    .await;

    snapshot!(code, @"200 OK");
@@ -480,7 +483,7 @@ async fn query_combination() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(response, @r###"
    {
-      "message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.",
+      "message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
      "code": "missing_search_hybrid",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
@@ -490,7 +493,7 @@ async fn query_combination() {
    // full vector, without a vector => error
    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
+            json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
        )
        .await;

@@ -507,7 +510,7 @@ async fn query_combination() {
    // hybrid without a vector => full keyword
    let (response, code) = index
        .search_post(
-            json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
+            json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
        )
        .await;

@@ -523,7 +526,7 @@ async fn retrieve_vectors() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
+            json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
        )
        .await;
    snapshot!(code, @"200 OK");
@@ -573,7 +576,7 @@ async fn retrieve_vectors() {

    let (response, code) = index
        .search_post(
-            json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
+            json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
        )
        .await;
    snapshot!(code, @"200 OK");
--- a/meilisearch/tests/search/locales.rs
+++ b/meilisearch/tests/search/locales.rs
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -1099,22 +1099,28 @@ async fn experimental_feature_vector_store() {
    index.add_documents(json!(documents), None).await;
    index.wait_task(0).await;

-    index
-        .search(json!({
+    let (response, code) = index
+        .search_post(json!({
            "vector": [1.0, 2.0, 3.0],
+            "hybrid": {
+              "embedder": "manual",
+            },
            "showRankingScore": true
-        }), |response, code|{
-            meili_snap::snapshot!(code, @"400 Bad Request");
-            meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
-            {
-              "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
-              "code": "feature_not_enabled",
-              "type": "invalid_request",
-              "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
-            }
-            "###);
-        })
+        }))
        .await;
+
+    {
+        meili_snap::snapshot!(code, @"400 Bad Request");
+        meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+          {
+            "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
+            "code": "feature_not_enabled",
+            "type": "invalid_request",
+            "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
+          }
+          "###);
+    }
+
    index
        .search(json!({
            "retrieveVectors": true,
@@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() {
    let (response, code) = index
        .search_post(json!({
            "vector": [1.0, 2.0, 3.0],
+            "hybrid": {
+              "embedder": "manual",
+            },
            "showRankingScore": true,
            "retrieveVectors": true,
        }))
--- a/meilisearch/tests/search/multi.rs
+++ b/meilisearch/tests/search/multi.rs
--- a/meilisearch/tests/similar/errors.rs
+++ b/meilisearch/tests/similar/errors.rs
@@ -18,7 +18,7 @@ async fn similar_unexisting_index() {
    });

    index
-        .similar(json!({"id": 287947}), |response, code| {
+        .similar(json!({"id": 287947, "embedder": "manual"}), |response, code| {
            assert_eq!(code, 404);
            assert_eq!(response, expected_response);
        })
@@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() {
    let server = Server::new().await;
    let index = server.index("test");

-    let (response, code) = index.similar_post(json!({"id": 287947})).await;
+    let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -199,7 +199,8 @@ async fn similar_not_found_id() {
    snapshot!(code, @"202 Accepted");
    server.wait_task(response.uid()).await;

-    let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
+    let (response, code) =
+        index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -230,7 +231,8 @@ async fn similar_bad_offset() {
    snapshot!(code, @"202 Accepted");
    server.wait_task(response.uid()).await;

-    let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
+    let (response, code) =
+        index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -241,7 +243,7 @@ async fn similar_bad_offset() {
    }
    "###);

-    let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
+    let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -272,7 +274,8 @@ async fn similar_bad_limit() {
    snapshot!(code, @"202 Accepted");
    server.wait_task(response.uid()).await;

-    let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
+    let (response, code) =
+        index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -283,7 +286,7 @@ async fn similar_bad_limit() {
    }
    "###);

-    let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
+    let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -323,7 +326,8 @@ async fn similar_bad_filter() {
    snapshot!(code, @"202 Accepted");
    index.wait_task(value.uid()).await;

-    let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
+    let (response, code) =
+        index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -361,10 +365,10 @@ async fn filter_invalid_syntax_object() {
    index.wait_task(value.uid()).await;

    index
-        .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
+        .similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
            snapshot!(response, @r###"
            {
-              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
              "code": "invalid_similar_filter",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
@@ -400,10 +404,10 @@ async fn filter_invalid_syntax_array() {
    index.wait_task(value.uid()).await;

    index
-        .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
+        .similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
            snapshot!(response, @r###"
            {
-              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+              "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
              "code": "invalid_similar_filter",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
@@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() {
    });
    index
        .similar(
-            json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
+            json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}),
            |response, code| {
                assert_eq!(response, expected_response);
                assert_eq!(code, 400);
@@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() {
        "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
    });
    index
-        .similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
-            assert_eq!(response, expected_response);
-            assert_eq!(code, 400);
-        })
+        .similar(
+            json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}),
+            |response, code| {
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
+            },
+        )
        .await;
 }

@@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() {
    server.set_features(json!({"vectorStore": true})).await;
    let index = server.index("test");

-    let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
+    let (response, code) =
+        index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() {
    }
    "###);

-    let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
+    let (response, code) =
+        index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
--- a/meilisearch/tests/similar/mod.rs
+++ b/meilisearch/tests/similar/mod.rs
@@ -80,9 +80,11 @@ async fn basic() {
    index.wait_task(value.uid()).await;

    index
-        .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
+        .similar(
+            json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "title": "Escape Room",
@@ -154,13 +156,16 @@ async fn basic() {
              }
            ]
            "###);
-        })
+            },
+        )
        .await;

    index
-        .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
+        .similar(
+            json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "title": "How to Train Your Dragon: The Hidden World",
@@ -232,7 +237,8 @@ async fn basic() {
              }
            ]
            "###);
-        })
+            },
+        )
        .await;
 }

@@ -272,7 +278,7 @@ async fn ranking_score_threshold() {

    index
        .similar(
-            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
@@ -358,7 +364,7 @@ async fn ranking_score_threshold() {

    index
        .similar(
-            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
@@ -426,7 +432,7 @@ async fn ranking_score_threshold() {

    index
        .similar(
-            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
@@ -476,7 +482,7 @@ async fn ranking_score_threshold() {

    index
        .similar(
-            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
@@ -508,7 +514,7 @@ async fn ranking_score_threshold() {

    index
        .similar(
-            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                snapshot!(json_string!(response["hits"]), @"[]");
@@ -553,7 +559,7 @@ async fn filter() {

    index
        .similar(
-            json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
+            json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                snapshot!(json_string!(response["hits"]), @r###"
@@ -617,7 +623,7 @@ async fn filter() {

    index
        .similar(
-            json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
+            json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                snapshot!(json_string!(response["hits"]), @r###"
@@ -681,9 +687,11 @@ async fn limit_and_offset() {
    index.wait_task(value.uid()).await;

    index
-        .similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
+        .similar(
+            json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "title": "Escape Room",
@@ -704,12 +712,13 @@ async fn limit_and_offset() {
              }
            ]
            "###);
-        })
+            },
+        )
        .await;

    index
        .similar(
-            json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
+            json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}),
            |response, code| {
                snapshot!(code, @"200 OK");
                snapshot!(json_string!(response["hits"]), @r###"
--- a/meilisearch/tests/vector/binary_quantized.rs
+++ b/meilisearch/tests/vector/binary_quantized.rs
@@ -0,0 +1,380 @@
+use meili_snap::{json_string, snapshot};
+
+use crate::common::{GetAllDocumentsOptions, Server};
+use crate::json;
+use crate::vector::generate_default_user_provided_documents;
+
+#[actix_rt::test]
+async fn retrieve_binary_quantize_status_in_the_settings() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false,
+      "editDocumentsByFunction": false,
+      "containsFilter": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (settings, code) = index.settings().await;
+    snapshot!(code, @"200 OK");
+    snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+                  "binaryQuantized": false,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (settings, code) = index.settings().await;
+    snapshot!(code, @"200 OK");
+    snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+                  "binaryQuantized": true,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (settings, code) = index.settings().await;
+    snapshot!(code, @"200 OK");
+    snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###);
+}
+
+#[actix_rt::test]
+async fn binary_quantize_before_sending_documents() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false,
+      "editDocumentsByFunction": false,
+      "containsFilter": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+                  "binaryQuantized": true,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(value.uid()).await.succeeded();
+
+    // Make sure the documents are binary quantized
+    let (documents, _code) = index
+        .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+        .await;
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "name": "kefir",
+          "_vectors": {
+            "manual": {
+              "embeddings": [
+                [
+                  -1.0,
+                  -1.0,
+                  1.0
+                ]
+              ],
+              "regenerate": false
+            }
+          }
+        },
+        {
+          "id": 1,
+          "name": "echo",
+          "_vectors": {
+            "manual": {
+              "embeddings": [
+                [
+                  1.0,
+                  1.0,
+                  -1.0
+                ]
+              ],
+              "regenerate": false
+            }
+          }
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn binary_quantize_after_sending_documents() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false,
+      "editDocumentsByFunction": false,
+      "containsFilter": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(value.uid()).await.succeeded();
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+                  "binaryQuantized": true,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    // Make sure the documents are binary quantized
+    let (documents, _code) = index
+        .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+        .await;
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "name": "kefir",
+          "_vectors": {
+            "manual": {
+              "embeddings": [
+                [
+                  -1.0,
+                  -1.0,
+                  1.0
+                ]
+              ],
+              "regenerate": false
+            }
+          }
+        },
+        {
+          "id": 1,
+          "name": "echo",
+          "_vectors": {
+            "manual": {
+              "embeddings": [
+                [
+                  1.0,
+                  1.0,
+                  -1.0
+                ]
+              ],
+              "regenerate": false
+            }
+          }
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn try_to_disable_binary_quantization() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false,
+      "editDocumentsByFunction": false,
+      "containsFilter": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+                  "binaryQuantized": true,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+                  "binaryQuantized": false,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    let ret = server.wait_task(response.uid()).await;
+    snapshot!(ret, @r###"
+    {
+      "uid": "[uid]",
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "settingsUpdate",
+      "canceledBy": null,
+      "details": {
+        "embedders": {
+          "manual": {
+            "source": "userProvided",
+            "dimensions": 3,
+            "binaryQuantized": false
+          }
+        }
+      },
+      "error": {
+        "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
+        "code": "invalid_settings_embedders",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn binary_quantize_clear_documents() {
+    let server = Server::new().await;
+    let index = generate_default_user_provided_documents(&server).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "binaryQuantized": true,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (value, _code) = index.clear_all_documents().await;
+    index.wait_task(value.uid()).await.succeeded();
+
+    // Make sure the documents DB has been cleared
+    let (documents, _code) = index
+        .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+        .await;
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [],
+      "offset": 0,
+      "limit": 20,
+      "total": 0
+    }
+    "###);
+
+    // Make sure the arroy DB has been cleared
+    let (documents, _code) =
+        index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
+    snapshot!(documents, @r###"
+    {
+      "hits": [],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0,
+      "semanticHitCount": 0
+    }
+    "###);
+}
--- a/meilisearch/tests/vector/mod.rs
+++ b/meilisearch/tests/vector/mod.rs
@@ -1,3 +1,4 @@
+mod binary_quantized;
 mod openai;
 mod rest;
 mod settings;
@@ -624,7 +625,8 @@ async fn clear_documents() {
    "###);

    // Make sure the arroy DB has been cleared
-    let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
+    let (documents, _code) =
+        index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
    snapshot!(documents, @r###"
    {
      "hits": [],
@@ -685,7 +687,11 @@ async fn add_remove_one_vector_4588() {
    let task = index.wait_task(value.uid()).await;
    snapshot!(task, name: "document-deleted");

-    let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
+    let (documents, _code) = index
+        .search_post(
+            json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }),
+        )
+        .await;
    snapshot!(documents, @r###"
    {
      "hits": [
--- a/meilisearch/tests/vector/openai.rs
+++ b/meilisearch/tests/vector/openai.rs
@@ -449,7 +449,7 @@ async fn it_works() {
    let (response, code) = index
        .search_post(json!({
            "q": "chien de chasse",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"},
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -489,7 +489,7 @@ async fn it_works() {
    let (response, code) = index
        .search_post(json!({
            "q": "petit chien",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -529,7 +529,7 @@ async fn it_works() {
    let (response, code) = index
        .search_post(json!({
            "q": "grand chien de berger des montagnes",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -616,7 +616,7 @@ async fn tokenize_long_text() {
            "q": "grand chien de berger des montagnes",
            "showRankingScore": true,
            "attributesToRetrieve": ["id"],
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1064,7 +1064,7 @@ async fn smaller_dimensions() {
    let (response, code) = index
        .search_post(json!({
            "q": "chien de chasse",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1104,7 +1104,7 @@ async fn smaller_dimensions() {
    let (response, code) = index
        .search_post(json!({
            "q": "petit chien",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1144,7 +1144,7 @@ async fn smaller_dimensions() {
    let (response, code) = index
        .search_post(json!({
            "q": "grand chien de berger des montagnes",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1295,7 +1295,7 @@ async fn small_embedding_model() {
    let (response, code) = index
        .search_post(json!({
            "q": "chien de chasse",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1335,7 +1335,7 @@ async fn small_embedding_model() {
    let (response, code) = index
        .search_post(json!({
            "q": "petit chien",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1375,7 +1375,7 @@ async fn small_embedding_model() {
    let (response, code) = index
        .search_post(json!({
            "q": "grand chien de berger des montagnes",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1525,7 +1525,7 @@ async fn legacy_embedding_model() {
    let (response, code) = index
        .search_post(json!({
            "q": "chien de chasse",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1565,7 +1565,7 @@ async fn legacy_embedding_model() {
    let (response, code) = index
        .search_post(json!({
            "q": "petit chien",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1605,7 +1605,7 @@ async fn legacy_embedding_model() {
    let (response, code) = index
        .search_post(json!({
            "q": "grand chien de berger des montagnes",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1756,7 +1756,7 @@ async fn it_still_works() {
    let (response, code) = index
        .search_post(json!({
            "q": "chien de chasse",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1796,7 +1796,7 @@ async fn it_still_works() {
    let (response, code) = index
        .search_post(json!({
            "q": "petit chien",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
@@ -1836,7 +1836,7 @@ async fn it_still_works() {
    let (response, code) = index
        .search_post(json!({
            "q": "grand chien de berger des montagnes",
-            "hybrid": {"semanticRatio": 1.0}
+            "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
        }))
        .await;
    snapshot!(code, @"200 OK");
--- a/meilisearch/tests/vector/settings.rs
+++ b/meilisearch/tests/vector/settings.rs
@@ -218,7 +218,8 @@ async fn reset_embedder_documents() {
    "###);

    // Make sure the arroy DB has been cleared
-    let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
+    let (documents, _code) =
+        index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
    snapshot!(json_string!(documents), @r###"
    {
      "message": "Cannot find embedder with name `default`.",
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -682,7 +682,7 @@ fn export_a_dump(
                        format!("While iterating on content file {:?}", content_file_uuid)
                    })? {
                        dump_content_file
-                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
+                            .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
                    }
                    dump_content_file.flush()?;
                    count += 1;
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -12,12 +12,14 @@ readme.workspace = true
 license.workspace = true

 [dependencies]
+big_s = "1.0.2"
 bimap = { version = "0.6.3", features = ["serde"] }
 bincode = "1.3.3"
 bstr = "1.9.1"
 bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.9.0", default-features = false }
+# charabia = { version = "0.9.0", default-features = false }
+charabia = { git = "https://github.com/meilisearch/charabia", branch = "mutualize-char-normalizer", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.13"
 deserr = "0.6.2"
@@ -27,9 +29,9 @@ fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
 grenad = { version = "0.4.7", default-features = false, features = [
-    "rayon",
-    "tempfile",
-] }
+    "rayon", # TODO Should we keep this feature
+    "tempfile"
+], git = "https://github.com/meilisearch/grenad", branch = "various-improvements" }
 heed = { version = "0.20.3", default-features = false, features = [
    "serde-json",
    "serde-bincode",
@@ -40,14 +42,14 @@ json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memchr = "2.5.0"
 memmap2 = "0.9.4"
-obkv = "0.2.2"
+obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 rayon = "1.10.0"
 roaring = { version = "0.10.6", features = ["serde"] }
 rstar = { version = "0.12.0", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
-serde_json = { version = "1.0.120", features = ["preserve_order"] }
+serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
 slice-group-by = "0.3.1"
 smallstr = { version = "0.3.0", features = ["serde"] }
 smallvec = "1.13.2"
@@ -80,16 +82,16 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 tiktoken-rs = "0.5.9"
 liquid = "0.26.6"
 rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
-arroy = "0.4.0"
+arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.10.0", features = ["json"] }
 url = "2.5.2"
 rayon-par-bridge = "0.1.0"
+hashbrown = "0.14.5"

 [dev-dependencies]
 mimalloc = { version = "0.1.43", default-features = false }
-big_s = "1.0.2"
 insta = "1.39.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
@@ -106,6 +108,8 @@ all-tokenizations = [
    "charabia/greek",
    "charabia/khmer",
    "charabia/vietnamese",
+    "charabia/swedish-recomposition",
+    "charabia/german-segmentation",
 ]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
@@ -138,6 +142,9 @@ khmer = ["charabia/khmer"]
 # allow vietnamese specialized tokenization
 vietnamese = ["charabia/vietnamese"]

+# allow german specialized tokenization
+german = ["charabia/german-segmentation"]
+
 # force swedish character recomposition
 swedish-recomposition = ["charabia/swedish-recomposition"]

--- a/milli/src/documents/builder.rs
+++ b/milli/src/documents/builder.rs
@@ -292,7 +292,7 @@ mod test {
            .unwrap()
            .into_cursor_and_fields_index();
        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -321,7 +321,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -348,7 +348,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -375,7 +375,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -402,7 +402,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -429,7 +429,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -456,7 +456,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -483,7 +483,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -510,7 +510,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
@@ -555,7 +555,7 @@ mod test {
            .into_cursor_and_fields_index();

        let doc = cursor.next_document().unwrap().unwrap();
-        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+        let val = obkv_to_object(doc, &index).map(Value::from).unwrap();

        assert_eq!(
            val,
--- a/milli/src/documents/enriched.rs
+++ b/milli/src/documents/enriched.rs
@@ -69,7 +69,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {

 #[derive(Debug, Clone)]
 pub struct EnrichedDocument<'a> {
-    pub document: KvReader<'a, FieldId>,
+    pub document: &'a KvReader<FieldId>,
    pub document_id: DocumentId,
 }

--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
 const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();

 /// Helper function to convert an obkv reader into a JSON object.
-pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
+pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
    obkv.iter()
        .map(|(field_id, value)| {
            let field_name = index
@@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
        self.0.get_by_right(name).cloned()
    }

-    pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
+    pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
        let mut map = Object::new();

        for (k, v) in document.iter() {
--- a/milli/src/documents/primary_key.rs
+++ b/milli/src/documents/primary_key.rs
@@ -1,8 +1,10 @@
+use std::borrow::Cow;
 use std::iter;
 use std::result::Result as StdResult;

-use serde_json::Value;
+use serde_json::{from_str, Value};

+use crate::update::new::{CowStr, TopLevelMap};
 use crate::{FieldId, InternalError, Object, Result, UserError};

 /// The symbol used to define levels in a nested primary key.
@@ -52,7 +54,7 @@ impl<'a> PrimaryKey<'a> {

    pub fn document_id(
        &self,
-        document: &obkv::KvReader<'_, FieldId>,
+        document: &obkv::KvReader<FieldId>,
        fields: &impl FieldIdMapper,
    ) -> Result<StdResult<String, DocumentIdExtractionError>> {
        match self {
@@ -100,6 +102,45 @@ impl<'a> PrimaryKey<'a> {
        }
    }

+    /// Returns the document ID based on the primary and
+    /// search for it recursively in zero-copy-deserialized documents.
+    pub fn document_id_from_top_level_map<'p>(
+        &self,
+        document: &TopLevelMap<'p>,
+    ) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
+        fn get_docid<'p>(
+            document: &TopLevelMap<'p>,
+            primary_key: &[&str],
+        ) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
+            match primary_key {
+                [] => unreachable!("arrrgh"), // would None be ok?
+                [primary_key] => match document.0.get(*primary_key) {
+                    Some(value) => match from_str::<u64>(value.get()) {
+                        Ok(value) => Ok(Ok(CowStr(Cow::Owned(value.to_string())))),
+                        Err(_) => match from_str(value.get()) {
+                            Ok(document_id) => Ok(Ok(document_id)),
+                            Err(e) => Ok(Err(DocumentIdExtractionError::InvalidDocumentId(
+                                UserError::SerdeJson(e),
+                            ))),
+                        },
+                    },
+                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+                },
+                [head, tail @ ..] => match document.0.get(*head) {
+                    Some(value) => {
+                        let document = from_str(value.get()).map_err(InternalError::SerdeJson)?;
+                        get_docid(&document, tail)
+                    }
+                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+                },
+            }
+        }
+
+        /// TODO do not allocate a vec everytime here
+        let primary_key: Vec<_> = self.name().split(PRIMARY_KEY_SPLIT_SYMBOL).collect();
+        get_docid(document, &primary_key)
+    }
+
    /// Returns an `Iterator` that gives all the possible fields names the primary key
    /// can have depending of the first level name and depth of the objects.
    pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
--- a/milli/src/documents/reader.rs
+++ b/milli/src/documents/reader.rs
@@ -72,15 +72,24 @@ impl<R> DocumentsBatchCursor<R> {
 }

 impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
+    /// Returns a single document from the database.
+    pub fn get(
+        &mut self,
+        offset: u32,
+    ) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
+        match self.cursor.move_on_key_equal_to(offset.to_be_bytes())? {
+            Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
+            _otherwise => Ok(None),
+        }
+    }
+
    /// Returns the next document, starting from the first one. Subsequent calls to
    /// `next_document` advance the document reader until all the documents have been read.
    pub fn next_document(
        &mut self,
-    ) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
+    ) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
        match self.cursor.move_on_next()? {
-            Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
-                Ok(Some(KvReader::new(value)))
-            }
+            Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
            _otherwise => Ok(None),
        }
    }
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -258,6 +258,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    },
    #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
    InvalidSettingsDimensions { embedder_name: String },
+    #[error(
+        "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
+    )]
+    InvalidDisableBinaryQuantization { embedder_name: String },
    #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
    InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
    #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
--- a/milli/src/fields_ids_map.rs
+++ b/milli/src/fields_ids_map.rs
@@ -4,6 +4,9 @@ use serde::{Deserialize, Serialize};

 use crate::FieldId;

+mod global;
+pub use global::GlobalFieldsIdsMap;
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FieldsIdsMap {
    names_ids: BTreeMap<String, FieldId>,
--- a/milli/src/fields_ids_map/global.rs
+++ b/milli/src/fields_ids_map/global.rs
@@ -0,0 +1,86 @@
+use std::collections::BTreeMap;
+use std::sync::RwLock;
+
+use crate::{FieldId, FieldsIdsMap};
+
+/// A fields ids map that can be globally updated to add fields
+#[derive(Debug, Clone)]
+pub struct GlobalFieldsIdsMap<'indexing> {
+    global: &'indexing RwLock<FieldsIdsMap>,
+    local: LocalFieldsIdsMap,
+}
+
+#[derive(Debug, Clone)]
+struct LocalFieldsIdsMap {
+    names_ids: BTreeMap<String, FieldId>,
+    ids_names: BTreeMap<FieldId, String>,
+}
+
+impl LocalFieldsIdsMap {
+    fn new(global: &RwLock<FieldsIdsMap>) -> Self {
+        let global = global.read().unwrap();
+        Self { names_ids: global.names_ids.clone(), ids_names: global.ids_names.clone() }
+    }
+
+    fn insert(&mut self, name: &str, field_id: FieldId) {
+        self.names_ids.insert(name.to_owned(), field_id);
+        self.ids_names.insert(field_id, name.to_owned());
+    }
+
+    fn name(&self, id: FieldId) -> Option<&str> {
+        self.ids_names.get(&id).map(String::as_str)
+    }
+
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.names_ids.get(name).copied()
+    }
+}
+
+impl<'indexing> GlobalFieldsIdsMap<'indexing> {
+    pub fn new(global: &'indexing RwLock<FieldsIdsMap>) -> Self {
+        Self { local: LocalFieldsIdsMap::new(global), global }
+    }
+
+    /// Returns the field id related to a field name, it will create a new field id if the
+    /// name is not already known. Returns `None` if the maximum field id as been reached.
+    pub fn id_or_insert(&mut self, name: &str) -> Option<FieldId> {
+        if let Some(field_id) = self.local.id(name) {
+            return Some(field_id);
+        }
+
+        {
+            // optimistically lookup the global map
+            let global = self.global.read().unwrap();
+
+            if let Some(field_id) = global.id(name) {
+                self.local.insert(name, field_id);
+                return Some(field_id);
+            }
+        }
+
+        {
+            let mut global = self.global.write().unwrap();
+
+            if let Some(field_id) = global.id(name) {
+                self.local.insert(name, field_id);
+                return Some(field_id);
+            }
+
+            let field_id = global.insert(name)?;
+            self.local.insert(name, field_id);
+            Some(field_id)
+        }
+    }
+
+    /// Get the name of a field based on its id.
+    pub fn name(&mut self, id: FieldId) -> Option<&str> {
+        if self.local.name(id).is_none() {
+            let global = self.global.read().unwrap();
+
+            let name = global.name(id)?;
+            self.local.insert(name, id);
+        }
+
+        self.local.name(id)
+    }
+}
--- a/milli/src/heed_codec/obkv_codec.rs
+++ b/milli/src/heed_codec/obkv_codec.rs
@@ -6,10 +6,10 @@ use obkv::{KvReaderU16, KvWriterU16};
 pub struct ObkvCodec;

 impl<'a> heed::BytesDecode<'a> for ObkvCodec {
-    type DItem = KvReaderU16<'a>;
+    type DItem = &'a KvReaderU16;

    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
-        Ok(KvReaderU16::new(bytes))
+        Ok(KvReaderU16::from_slice(bytes))
    }
 }

--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -122,7 +122,7 @@ impl CboRoaringBitmapCodec {

    /// Merges a DelAdd delta into a CboRoaringBitmap.
    pub fn merge_deladd_into<'a>(
-        deladd: KvReaderDelAdd<'_>,
+        deladd: &KvReaderDelAdd,
        previous: &[u8],
        buffer: &'a mut Vec<u8>,
    ) -> io::Result<Option<&'a [u8]>> {
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
 use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
-use crate::vector::{Embedding, EmbeddingConfig};
+use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
 use crate::{
    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@@ -162,7 +162,7 @@ pub struct Index {
    /// Maps an embedder name to its id in the arroy store.
    pub embedder_category_id: Database<Str, U8>,
    /// Vector store based on arroy™.
-    pub vector_arroy: arroy::Database<arroy::distances::Angular>,
+    pub vector_arroy: arroy::Database<Unspecified>,

    /// Maps the document id to the document as an obkv store.
    pub(crate) documents: Database<BEU32, ObkvCodec>,
@@ -1251,12 +1251,20 @@ impl Index {

    /* documents */

+    /// Returns a document by using the document id.
+    pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<&'t obkv::KvReaderU16> {
+        self.documents
+            .get(rtxn, &id)?
+            .ok_or(UserError::UnknownInternalDocumentId { document_id: id })
+            .map_err(Into::into)
+    }
+
    /// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
    pub fn iter_documents<'a, 't: 'a>(
        &'a self,
        rtxn: &'t RoTxn<'t>,
        ids: impl IntoIterator<Item = DocumentId> + 'a,
-    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
        Ok(ids.into_iter().map(move |id| {
            let kv = self
                .documents
@@ -1271,7 +1279,7 @@ impl Index {
        &self,
        rtxn: &'t RoTxn<'t>,
        ids: impl IntoIterator<Item = DocumentId>,
-    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
+    ) -> Result<Vec<(DocumentId, &'t obkv::KvReaderU16)>> {
        self.iter_documents(rtxn, ids)?.collect()
    }

@@ -1279,7 +1287,7 @@ impl Index {
    pub fn all_documents<'a, 't: 'a>(
        &'a self,
        rtxn: &'t RoTxn<'t>,
-    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
        self.iter_documents(rtxn, self.documents_ids(rtxn)?)
    }

@@ -1303,7 +1311,7 @@ impl Index {
        })?;
        Ok(self.iter_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
            let (_docid, obkv) = entry?;
-            match primary_key.document_id(&obkv, &fields)? {
+            match primary_key.document_id(obkv, &fields)? {
                Ok(document_id) => Ok(document_id),
                Err(_) => Err(InternalError::DocumentsError(
                    crate::documents::Error::InvalidDocumentFormat,
@@ -1614,15 +1622,17 @@ impl Index {
        &'a self,
        rtxn: &'a RoTxn<'a>,
        embedder_id: u8,
-    ) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a {
+        quantized: bool,
+    ) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
        crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
-            arroy::Reader::open(rtxn, k, self.vector_arroy)
-                .map(Some)
-                .or_else(|e| match e {
-                    arroy::Error::MissingMetadata(_) => Ok(None),
-                    e => Err(e.into()),
-                })
-                .transpose()
+            let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
+            // Here we don't care about the dimensions, but we want to know if we can read
+            // in the database or if its metadata are missing because there is no document with that many vectors.
+            match reader.dimensions(rtxn) {
+                Ok(_) => Some(Ok(reader)),
+                Err(arroy::Error::MissingMetadata(_)) => None,
+                Err(e) => Some(Err(e.into())),
+            }
        })
    }

@@ -1644,32 +1654,18 @@ impl Index {
        docid: DocumentId,
    ) -> Result<BTreeMap<String, Vec<Embedding>>> {
        let mut res = BTreeMap::new();
-        for row in self.embedder_category_id.iter(rtxn)? {
-            let (embedder_name, embedder_id) = row?;
-            let embedder_id = (embedder_id as u16) << 8;
-            let mut embeddings = Vec::new();
-            'vectors: for i in 0..=u8::MAX {
-                let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
-                    .map(Some)
-                    .or_else(|e| match e {
-                        arroy::Error::MissingMetadata(_) => Ok(None),
-                        e => Err(e),
-                    })
-                    .transpose();
-
-                let Some(reader) = reader else {
-                    break 'vectors;
-                };
-
-                let embedding = reader?.item_vector(rtxn, docid)?;
-                if let Some(embedding) = embedding {
-                    embeddings.push(embedding)
-                } else {
-                    break 'vectors;
-                }
-            }
-
-            res.insert(embedder_name.to_owned(), embeddings);
+        let embedding_configs = self.embedding_configs(rtxn)?;
+        for config in embedding_configs {
+            let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
+            let embeddings = self
+                .arroy_readers(rtxn, embedder_id, config.config.quantized())
+                .map_while(|reader| {
+                    reader
+                        .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
+                        .transpose()
+                })
+                .collect::<Result<Vec<_>>>()?;
+            res.insert(config.name.to_owned(), embeddings);
        }
        Ok(res)
    }
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -55,7 +55,7 @@ pub use self::error::{
 };
 pub use self::external_documents_ids::ExternalDocumentsIds;
 pub use self::fieldids_weights_map::FieldidsWeightsMap;
-pub use self::fields_ids_map::FieldsIdsMap;
+pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
 pub use self::heed_codec::{
    BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
@@ -214,7 +214,7 @@ pub fn bucketed_position(relative: u16) -> u16 {
 pub fn obkv_to_json(
    displayed_fields: &[FieldId],
    fields_ids_map: &FieldsIdsMap,
-    obkv: obkv::KvReaderU16<'_>,
+    obkv: &obkv::KvReaderU16,
 ) -> Result<Object> {
    displayed_fields
        .iter()
@@ -232,10 +232,7 @@ pub fn obkv_to_json(
 }

 /// Transform every field of a raw obkv store into a JSON Object.
-pub fn all_obkv_to_json(
-    obkv: obkv::KvReaderU16<'_>,
-    fields_ids_map: &FieldsIdsMap,
-) -> Result<Object> {
+pub fn all_obkv_to_json(obkv: &obkv::KvReaderU16, fields_ids_map: &FieldsIdsMap) -> Result<Object> {
    let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
    obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
 }
@@ -434,7 +431,7 @@ mod tests {
        writer.insert(id1, b"1234").unwrap();
        writer.insert(id2, b"4321").unwrap();
        let contents = writer.into_inner().unwrap();
-        let obkv = obkv::KvReaderU16::new(&contents);
+        let obkv = obkv::KvReaderU16::from_slice(&contents);

        let expected = json!({
            "field1": 1234,
--- a/milli/src/prompt/document.rs
+++ b/milli/src/prompt/document.rs
@@ -30,13 +30,13 @@ impl ParsedValue {

 impl<'a> Document<'a> {
    pub fn new(
-        data: obkv::KvReaderU16<'a>,
+        data: &'a obkv::KvReaderU16,
        side: DelAdd,
        inverted_field_map: &'a FieldsIdsMap,
    ) -> Self {
        let mut out_data = BTreeMap::new();
        for (fid, raw) in data {
-            let obkv = KvReaderDelAdd::new(raw);
+            let obkv = KvReaderDelAdd::from_slice(raw);
            let Some(raw) = obkv.get(side) else {
                continue;
            };
--- a/milli/src/prompt/mod.rs
+++ b/milli/src/prompt/mod.rs
@@ -111,7 +111,7 @@ impl Prompt {

    pub fn render(
        &self,
-        document: obkv::KvReaderU16<'_>,
+        document: &obkv::KvReaderU16,
        side: DelAdd,
        field_id_map: &FieldsIdsMapWithMetadata,
    ) -> Result<String, RenderPromptError> {
--- a/milli/src/search/facet/facet_distribution.rs
+++ b/milli/src/search/facet/facet_distribution.rs
@@ -1,4 +1,5 @@
 use std::collections::{BTreeMap, HashMap, HashSet};
+use std::fmt::Display;
 use std::ops::ControlFlow;
 use std::{fmt, mem};

@@ -37,6 +38,15 @@ pub enum OrderBy {
    Count,
 }

+impl Display for OrderBy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            OrderBy::Lexicographic => f.write_str("alphabetically"),
+            OrderBy::Count => f.write_str("by count"),
+        }
+    }
+}
+
 pub struct FacetDistribution<'a> {
    facets: Option<HashMap<String, OrderBy>>,
    candidates: Option<RoaringBitmap>,
@@ -100,7 +110,6 @@ impl<'a> FacetDistribution<'a> {
                let mut lexicographic_distribution = BTreeMap::new();
                let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();

-                let distribution_prelength = distribution.len();
                let db = self.index.field_id_docid_facet_f64s;
                for docid in candidates {
                    key_buffer.truncate(mem::size_of::<FieldId>());
@@ -113,23 +122,21 @@ impl<'a> FacetDistribution<'a> {
                    for result in iter {
                        let ((_, _, value), ()) = result?;
                        *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1;
-
-                        if lexicographic_distribution.len() - distribution_prelength
-                            == self.max_values_per_facet
-                        {
-                            break;
-                        }
                    }
                }

-                distribution.extend(lexicographic_distribution);
+                distribution.extend(
+                    lexicographic_distribution
+                        .into_iter()
+                        .take(self.max_values_per_facet.saturating_sub(distribution.len())),
+                );
            }
            FacetType::String => {
                let mut normalized_distribution = BTreeMap::new();
                let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();

                let db = self.index.field_id_docid_facet_strings;
-                'outer: for docid in candidates {
+                for docid in candidates {
                    key_buffer.truncate(mem::size_of::<FieldId>());
                    key_buffer.extend_from_slice(&docid.to_be_bytes());
                    let iter = db
@@ -144,14 +151,14 @@ impl<'a> FacetDistribution<'a> {
                            .or_insert_with(|| (original_value, 0));
                        *count += 1;

-                        if normalized_distribution.len() == self.max_values_per_facet {
-                            break 'outer;
-                        }
+                        // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid,
+                        // so higher ranked facets could be in later docids
                    }
                }

                let iter = normalized_distribution
                    .into_iter()
+                    .take(self.max_values_per_facet.saturating_sub(distribution.len()))
                    .map(|(_normalized, (original, count))| (original.to_string(), count));
                distribution.extend(iter);
            }
@@ -467,7 +474,7 @@ mod tests {
            .execute()
            .unwrap();

-        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###);
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);

        let map = FacetDistribution::new(&txn, &index)
            .facets(iter::once(("colour", OrderBy::Count)))
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@@ -12,7 +12,7 @@ use serde_json::Value;
 use super::facet_range_search;
 use crate::error::{Error, UserError};
 use crate::heed_codec::facet::{
-    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
 };
 use crate::index::db_name::FACET_ID_STRING_DOCIDS;
 use crate::{
@@ -336,6 +336,24 @@ impl<'a> Filter<'a> {

                return Ok(docids);
            }
+            Condition::StartsWith { keyword: _, word } => {
+                let value = crate::normalize_facet(word.value());
+                let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() };
+                let docids = strings_db
+                    .prefix_iter(rtxn, &base)?
+                    .map(|result| -> Result<RoaringBitmap> {
+                        match result {
+                            Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap),
+                            Err(_e) => Err(InternalError::from(SerializationError::Decoding {
+                                db_name: Some(FACET_ID_STRING_DOCIDS),
+                            })
+                            .into()),
+                        }
+                    })
+                    .union()?;
+
+                return Ok(docids);
+            }
        };

        let mut output = RoaringBitmap::new();
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@@ -190,7 +190,7 @@ impl<'a> Search<'a> {
            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        };
        // no embedder, no semantic search
-        let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
+        let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        };

@@ -212,7 +212,7 @@ impl<'a> Search<'a> {
        };

        search.semantic =
-            Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder });
+            Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });

        // TODO: would be better to have two distinct functions at this point
        let vector_results = search.execute()?;
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -32,6 +32,7 @@ pub struct SemanticSearch {
    vector: Option<Vec<f32>>,
    embedder_name: String,
    embedder: Arc<Embedder>,
+    quantized: bool,
 }

 pub struct Search<'a> {
@@ -89,9 +90,10 @@ impl<'a> Search<'a> {
        &mut self,
        embedder_name: String,
        embedder: Arc<Embedder>,
+        quantized: bool,
        vector: Option<Vec<f32>>,
    ) -> &mut Search<'a> {
-        self.semantic = Some(SemanticSearch { embedder_name, embedder, vector });
+        self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
        self
    }

@@ -206,7 +208,7 @@ impl<'a> Search<'a> {
            degraded,
            used_negative_operator,
        } = match self.semantic.as_ref() {
-            Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => {
+            Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
                execute_vector_search(
                    &mut ctx,
                    vector,
@@ -219,6 +221,7 @@ impl<'a> Search<'a> {
                    self.limit,
                    embedder_name,
                    embedder,
+                    *quantized,
                    self.time_budget.clone(),
                    self.ranking_score_threshold,
                )?
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@@ -3,6 +3,7 @@ use std::collections::hash_map::Entry;
 use std::hash::Hash;

 use fxhash::FxHashMap;
+use grenad::MergeFunction;
 use heed::types::Bytes;
 use heed::{BytesEncode, Database, RoTxn};
 use roaring::RoaringBitmap;
@@ -11,7 +12,7 @@ use super::interner::Interned;
 use super::Word;
 use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
 use crate::proximity::ProximityPrecision;
-use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
+use crate::update::MergeCboRoaringBitmaps;
 use crate::{
    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
 };
@@ -110,19 +111,21 @@ impl<'ctx> DatabaseCache<'ctx> {
            .map_err(Into::into)
    }

-    fn get_value_from_keys<'v, K1, KC>(
+    fn get_value_from_keys<'v, K1, KC, MF>(
        txn: &'ctx RoTxn<'_>,
        cache_key: K1,
        db_keys: &'v [KC::EItem],
        cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
        db: Database<KC, Bytes>,
        universe: Option<&RoaringBitmap>,
-        merger: MergeFn,
+        merger: MF,
    ) -> Result<Option<RoaringBitmap>>
    where
        K1: Copy + Eq + Hash,
        KC: BytesEncode<'v>,
        KC::EItem: Sized,
+        MF: MergeFunction,
+        crate::Error: From<MF::Error>,
    {
        if let Entry::Vacant(entry) = cache.entry(cache_key) {
            let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys {
@@ -138,7 +141,7 @@ impl<'ctx> DatabaseCache<'ctx> {
                    if bitmaps.is_empty() {
                        None
                    } else {
-                        Some(merger(&[], &bitmaps[..])?)
+                        Some(merger.merge(&[], &bitmaps[..])?)
                    }
                }
            };
@@ -213,17 +216,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys::<_, _>(
+                DatabaseCache::get_value_from_keys(
                    self.txn,
                    word,
                    &keys[..],
                    &mut self.db_cache.word_docids,
                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    merge_cbo_roaring_bitmaps,
+                    MergeCboRoaringBitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _>(
+            None => DatabaseCache::get_value(
                self.txn,
                word,
                self.word_interner.get(word).as_str(),
@@ -245,17 +248,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys::<_, _>(
+                DatabaseCache::get_value_from_keys(
                    self.txn,
                    word,
                    &keys[..],
                    &mut self.db_cache.exact_word_docids,
                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    merge_cbo_roaring_bitmaps,
+                    MergeCboRoaringBitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _>(
+            None => DatabaseCache::get_value(
                self.txn,
                word,
                self.word_interner.get(word).as_str(),
@@ -302,17 +305,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys::<_, _>(
+                DatabaseCache::get_value_from_keys(
                    self.txn,
                    prefix,
                    &keys[..],
                    &mut self.db_cache.word_prefix_docids,
                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    merge_cbo_roaring_bitmaps,
+                    MergeCboRoaringBitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _>(
+            None => DatabaseCache::get_value(
                self.txn,
                prefix,
                self.word_interner.get(prefix).as_str(),
@@ -334,17 +337,17 @@ impl<'ctx> SearchContext<'ctx> {
                let keys: Vec<_> =
                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();

-                DatabaseCache::get_value_from_keys::<_, _>(
+                DatabaseCache::get_value_from_keys(
                    self.txn,
                    prefix,
                    &keys[..],
                    &mut self.db_cache.exact_word_prefix_docids,
                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
                    universe,
-                    merge_cbo_roaring_bitmaps,
+                    MergeCboRoaringBitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _>(
+            None => DatabaseCache::get_value(
                self.txn,
                prefix,
                self.word_interner.get(prefix).as_str(),
@@ -405,7 +408,7 @@ impl<'ctx> SearchContext<'ctx> {

                Ok(docids)
            }
-            ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _>(
+            ProximityPrecision::ByWord => DatabaseCache::get_value(
                self.txn,
                (proximity, word1, word2),
                &(
@@ -538,7 +541,7 @@ impl<'ctx> SearchContext<'ctx> {
            return Ok(None);
        }

-        DatabaseCache::get_value::<_, _>(
+        DatabaseCache::get_value(
            self.txn,
            (word, fid),
            &(self.word_interner.get(word).as_str(), fid),
@@ -559,7 +562,7 @@ impl<'ctx> SearchContext<'ctx> {
            return Ok(None);
        }

-        DatabaseCache::get_value::<_, _>(
+        DatabaseCache::get_value(
            self.txn,
            (word_prefix, fid),
            &(self.word_interner.get(word_prefix).as_str(), fid),
@@ -629,7 +632,7 @@ impl<'ctx> SearchContext<'ctx> {
        word: Interned<String>,
        position: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _>(
+        DatabaseCache::get_value(
            self.txn,
            (word, position),
            &(self.word_interner.get(word).as_str(), position),
@@ -645,7 +648,7 @@ impl<'ctx> SearchContext<'ctx> {
        word_prefix: Interned<String>,
        position: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _>(
+        DatabaseCache::get_value(
            self.txn,
            (word_prefix, position),
            &(self.word_interner.get(word_prefix).as_str(), position),
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
    Ok(ranking_rules)
 }

+#[allow(clippy::too_many_arguments)]
 fn get_ranking_rules_for_vector<'ctx>(
    ctx: &SearchContext<'ctx>,
    sort_criteria: &Option<Vec<AscDesc>>,
@@ -320,6 +321,7 @@ fn get_ranking_rules_for_vector<'ctx>(
    target: &[f32],
    embedder_name: &str,
    embedder: &Embedder,
+    quantized: bool,
 ) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
    // query graph search

@@ -347,6 +349,7 @@ fn get_ranking_rules_for_vector<'ctx>(
                        limit_plus_offset,
                        embedder_name,
                        embedder,
+                        quantized,
                    )?;
                    ranking_rules.push(Box::new(vector_sort));
                    vector = true;
@@ -576,6 +579,7 @@ pub fn execute_vector_search(
    length: usize,
    embedder_name: &str,
    embedder: &Embedder,
+    quantized: bool,
    time_budget: TimeBudget,
    ranking_score_threshold: Option<f64>,
 ) -> Result<PartialSearchResult> {
@@ -591,6 +595,7 @@ pub fn execute_vector_search(
        vector,
        embedder_name,
        embedder,
+        quantized,
    )?;

    let mut placeholder_search_logger = logger::DefaultSearchLogger;
--- a/milli/src/search/new/vector_sort.rs
+++ b/milli/src/search/new/vector_sort.rs
@@ -16,6 +16,7 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
    limit: usize,
    distribution_shift: Option<DistributionShift>,
    embedder_index: u8,
+    quantized: bool,
 }

 impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
@@ -26,6 +27,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
        limit: usize,
        embedder_name: &str,
        embedder: &Embedder,
+        quantized: bool,
    ) -> Result<Self> {
        let embedder_index = ctx
            .index
@@ -41,6 +43,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
            limit,
            distribution_shift: embedder.distribution(),
            embedder_index,
+            quantized,
        })
    }

@@ -49,16 +52,12 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
        ctx: &mut SearchContext<'_>,
        vector_candidates: &RoaringBitmap,
    ) -> Result<()> {
-        let readers: std::result::Result<Vec<_>, _> =
-            ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
-        let readers = readers?;
-
        let target = &self.target;
        let mut results = Vec::new();

-        for reader in readers.iter() {
+        for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) {
            let nns_by_vector =
-                reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?;
+                reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
            results.extend(nns_by_vector.into_iter());
        }
        results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
--- a/milli/src/search/similar.rs
+++ b/milli/src/search/similar.rs
@@ -18,9 +18,11 @@ pub struct Similar<'a> {
    embedder_name: String,
    embedder: Arc<Embedder>,
    ranking_score_threshold: Option<f64>,
+    quantized: bool,
 }

 impl<'a> Similar<'a> {
+    #[allow(clippy::too_many_arguments)]
    pub fn new(
        id: DocumentId,
        offset: usize,
@@ -29,6 +31,7 @@ impl<'a> Similar<'a> {
        rtxn: &'a heed::RoTxn<'a>,
        embedder_name: String,
        embedder: Arc<Embedder>,
+        quantized: bool,
    ) -> Self {
        Self {
            id,
@@ -40,6 +43,7 @@ impl<'a> Similar<'a> {
            embedder_name,
            embedder,
            ranking_score_threshold: None,
+            quantized,
        }
    }

@@ -67,19 +71,13 @@ impl<'a> Similar<'a> {
                .get(self.rtxn, &self.embedder_name)?
                .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;

-        let readers: std::result::Result<Vec<_>, _> =
-            self.index.arroy_readers(self.rtxn, embedder_index).collect();
-
-        let readers = readers?;
-
        let mut results = Vec::new();

-        for reader in readers.iter() {
-            let nns_by_item = reader.nns_by_item(
+        for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) {
+            let nns_by_item = reader?.nns_by_item(
                self.rtxn,
                self.id,
                self.limit + self.offset + 1,
-                None,
                Some(&universe),
            )?;
            if let Some(mut nns_by_item) = nns_by_item {
--- a/milli/src/update/available_documents_ids.rs
+++ b/milli/src/update/available_documents_ids.rs
@@ -3,12 +3,12 @@ use std::ops::RangeInclusive;

 use roaring::bitmap::{IntoIter, RoaringBitmap};

-pub struct AvailableDocumentsIds {
+pub struct AvailableIds {
    iter: Chain<IntoIter, RangeInclusive<u32>>,
 }

-impl AvailableDocumentsIds {
-    pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
+impl AvailableIds {
+    pub fn new(docids: &RoaringBitmap) -> AvailableIds {
        match docids.max() {
            Some(last_id) => {
                let mut available = RoaringBitmap::from_iter(0..last_id);
@@ -20,17 +20,17 @@ impl AvailableDocumentsIds {
                    None => 1..=0, // empty range iterator
                };

-                AvailableDocumentsIds { iter: available.into_iter().chain(iter) }
+                AvailableIds { iter: available.into_iter().chain(iter) }
            }
            None => {
                let empty = RoaringBitmap::new().into_iter();
-                AvailableDocumentsIds { iter: empty.chain(0..=u32::MAX) }
+                AvailableIds { iter: empty.chain(0..=u32::MAX) }
            }
        }
    }
 }

-impl Iterator for AvailableDocumentsIds {
+impl Iterator for AvailableIds {
    type Item = u32;

    fn next(&mut self) -> Option<Self::Item> {
@@ -45,7 +45,7 @@ mod tests {
    #[test]
    fn empty() {
        let base = RoaringBitmap::new();
-        let left = AvailableDocumentsIds::from_documents_ids(&base);
+        let left = AvailableIds::new(&base);
        let right = 0..=u32::MAX;
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
@@ -58,7 +58,7 @@ mod tests {
        base.insert(100);
        base.insert(405);

-        let left = AvailableDocumentsIds::from_documents_ids(&base);
+        let left = AvailableIds::new(&base);
        let right = (0..=u32::MAX).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
--- a/milli/src/update/concurrent_available_ids.rs
+++ b/milli/src/update/concurrent_available_ids.rs
@@ -0,0 +1,59 @@
+use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
+
+use roaring::RoaringBitmap;
+
+/// A concurrent ID generate that will never return the same ID twice.
+#[derive(Debug)]
+pub struct ConcurrentAvailableIds {
+    /// The current tree node ID we should use if there is no other IDs available.
+    current: AtomicU32,
+    /// The total number of tree node IDs used.
+    used: AtomicU64,
+
+    /// A list of IDs to exhaust before picking IDs from `current`.
+    available: RoaringBitmap,
+    /// The current Nth ID to select in the bitmap.
+    select_in_bitmap: AtomicU32,
+    /// Tells if you should look in the roaring bitmap or if all the IDs are already exhausted.
+    look_into_bitmap: AtomicBool,
+}
+
+impl ConcurrentAvailableIds {
+    /// Creates an ID generator returning unique IDs, avoiding the specified used IDs.
+    pub fn new(used: RoaringBitmap) -> ConcurrentAvailableIds {
+        let last_id = used.max().map_or(0, |id| id + 1);
+        let used_ids = used.len();
+        let available = RoaringBitmap::from_sorted_iter(0..last_id).unwrap() - used;
+
+        ConcurrentAvailableIds {
+            current: AtomicU32::new(last_id),
+            used: AtomicU64::new(used_ids),
+            select_in_bitmap: AtomicU32::new(0),
+            look_into_bitmap: AtomicBool::new(!available.is_empty()),
+            available,
+        }
+    }
+
+    /// Returns a new unique ID and increase the count of IDs used.
+    pub fn next(&self) -> Option<u32> {
+        if self.used.fetch_add(1, Ordering::Relaxed) > u32::MAX as u64 {
+            None
+        } else if self.look_into_bitmap.load(Ordering::Relaxed) {
+            let current = self.select_in_bitmap.fetch_add(1, Ordering::Relaxed);
+            match self.available.select(current) {
+                Some(id) => Some(id),
+                None => {
+                    self.look_into_bitmap.store(false, Ordering::Relaxed);
+                    Some(self.current.fetch_add(1, Ordering::Relaxed))
+                }
+            }
+        } else {
+            Some(self.current.fetch_add(1, Ordering::Relaxed))
+        }
+    }
+
+    /// Returns the number of used ids in total.
+    pub fn used(&self) -> u64 {
+        self.used.load(Ordering::Relaxed)
+    }
+}
--- a/milli/src/update/del_add.rs
+++ b/milli/src/update/del_add.rs
@@ -1,7 +1,7 @@
 use obkv::Key;

 pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
-pub type KvReaderDelAdd<'a> = obkv::KvReader<'a, DelAdd>;
+pub type KvReaderDelAdd = obkv::KvReader<DelAdd>;

 /// DelAdd defines the new value to add in the database and old value to delete from the database.
 ///
@@ -36,7 +36,7 @@ impl Key for DelAdd {
 /// Addition: put all the values under DelAdd::Addition,
 /// DeletionAndAddition: put all the values under DelAdd::Deletion and DelAdd::Addition,
 pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
-    reader: obkv::KvReader<'_, K>,
+    reader: &obkv::KvReader<K>,
    operation: DelAddOperation,
    buffer: &mut Vec<u8>,
 ) -> Result<(), std::io::Error> {
@@ -46,7 +46,7 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
 /// Akin to the [into_del_add_obkv] function but lets you
 /// conditionally define the `DelAdd` variant based on the obkv key.
 pub fn into_del_add_obkv_conditional_operation<K, F>(
-    reader: obkv::KvReader<'_, K>,
+    reader: &obkv::KvReader<K>,
    buffer: &mut Vec<u8>,
    operation: F,
 ) -> std::io::Result<()>
@@ -86,8 +86,8 @@ pub enum DelAddOperation {
 /// putting each deletion obkv's keys under an DelAdd::Deletion
 /// and putting each addition obkv's keys under an DelAdd::Addition
 pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
-    deletion: &obkv::KvReader<'_, K>,
-    addition: &obkv::KvReader<'_, K>,
+    deletion: &obkv::KvReader<K>,
+    addition: &obkv::KvReader<K>,
    buffer: &mut Vec<u8>,
 ) -> Result<(), std::io::Error> {
    use itertools::merge_join_by;
@@ -121,7 +121,7 @@ pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
    writer.finish()
 }

-pub fn is_noop_del_add_obkv(del_add: KvReaderDelAdd<'_>) -> bool {
+pub fn is_noop_del_add_obkv(del_add: &KvReaderDelAdd) -> bool {
    del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition)
 }

@@ -136,5 +136,5 @@ pub fn deladd_serialize_add_side<'a>(
    obkv: &'a [u8],
    _buffer: &mut Vec<u8>,
 ) -> crate::Result<&'a [u8]> {
-    Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
+    Ok(KvReaderDelAdd::from_slice(obkv).get(DelAdd::Addition).unwrap_or_default())
 }
--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@@ -14,7 +14,7 @@ use crate::heed_codec::facet::{
 use crate::heed_codec::BytesRefCodec;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
 use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
-use crate::update::MergeFn;
+use crate::update::MergeDeladdCboRoaringBitmaps;
 use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};

 /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
@@ -29,7 +29,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    delta_data: Option<Merger<BufReader<File>, MergeFn>>,
+    delta_data: Option<Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@@ -37,7 +37,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        delta_data: Merger<BufReader<File>, MergeFn>,
+        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@@ -90,7 +90,7 @@ impl<'i> FacetsUpdateBulk<'i> {
 /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
 pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
    pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
-    pub delta_data: Option<Merger<R, MergeFn>>,
+    pub delta_data: Option<Merger<R, MergeDeladdCboRoaringBitmaps>>,
    pub group_size: u8,
    pub min_level_size: u8,
 }
@@ -135,7 +135,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
                if !valid_lmdb_key(key) {
                    continue;
                }
-                let value = KvReaderDelAdd::new(value);
+                let value = KvReaderDelAdd::from_slice(value);

                // DB is empty, it is safe to ignore Del operations
                let Some(value) = value.get(DelAdd::Addition) else {
@@ -161,7 +161,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
                    continue;
                }

-                let value = KvReaderDelAdd::new(value);
+                let value = KvReaderDelAdd::from_slice(value);

                // the value is a CboRoaringBitmap, but I still need to prepend the
                // group size for level 0 (= 1) to it
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@@ -15,7 +15,7 @@ use crate::heed_codec::BytesRefCodec;
 use crate::search::facet::get_highest_level;
 use crate::update::del_add::DelAdd;
 use crate::update::index_documents::valid_lmdb_key;
-use crate::update::MergeFn;
+use crate::update::MergeDeladdCboRoaringBitmaps;
 use crate::{CboRoaringBitmapCodec, Index, Result};

 /// Enum used as a return value for the facet incremental indexing.
@@ -57,14 +57,14 @@ enum ModificationResult {
 /// `facet_id_(string/f64)_docids` databases.
 pub struct FacetsUpdateIncremental {
    inner: FacetsUpdateIncrementalInner,
-    delta_data: Merger<BufReader<File>, MergeFn>,
+    delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
 }

 impl FacetsUpdateIncremental {
    pub fn new(
        index: &Index,
        facet_type: FacetType,
-        delta_data: Merger<BufReader<File>, MergeFn>,
+        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
@@ -109,7 +109,7 @@ impl FacetsUpdateIncremental {
            }
            current_field_id = Some(key.field_id);

-            let value = KvReader::new(value);
+            let value = KvReader::from_slice(value);
            let docids_to_delete = value
                .get(DelAdd::Deletion)
                .map(CboRoaringBitmapCodec::bytes_decode)
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -86,12 +86,11 @@ use time::OffsetDateTime;
 use tracing::debug;

 use self::incremental::FacetsUpdateIncremental;
-use super::FacetsUpdateBulk;
+use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
 use crate::facet::FacetType;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
 use crate::heed_codec::BytesRefCodec;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
-use crate::update::MergeFn;
 use crate::{try_split_array_at, FieldId, Index, Result};

 pub mod bulk;
@@ -105,8 +104,8 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    delta_data: Merger<BufReader<File>, MergeFn>,
-    normalized_delta_data: Option<Merger<BufReader<File>, MergeFn>>,
+    delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+    normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
@@ -116,8 +115,8 @@ impl<'i> FacetsUpdate<'i> {
    pub fn new(
        index: &'i Index,
        facet_type: FacetType,
-        delta_data: Merger<BufReader<File>, MergeFn>,
-        normalized_delta_data: Option<Merger<BufReader<File>, MergeFn>>,
+        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+        normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
        data_size: u64,
    ) -> Self {
        let database = match facet_type {
@@ -182,12 +181,12 @@ impl<'i> FacetsUpdate<'i> {

 fn index_facet_search(
    wtxn: &mut heed::RwTxn<'_>,
-    normalized_delta_data: Merger<BufReader<File>, MergeFn>,
+    normalized_delta_data: Merger<BufReader<File>, MergeDeladdBtreesetString>,
    index: &Index,
 ) -> Result<()> {
    let mut iter = normalized_delta_data.into_stream_merger_iter()?;
    while let Some((key_bytes, delta_bytes)) = iter.next()? {
-        let deladd_reader = KvReaderDelAdd::new(delta_bytes);
+        let deladd_reader = KvReaderDelAdd::from_slice(delta_bytes);

        let database_set = index
            .facet_id_normalized_string_strings
@@ -298,8 +297,8 @@ pub(crate) mod test_helpers {
    use crate::search::facet::get_highest_level;
    use crate::snapshot_tests::display_bitmap;
    use crate::update::del_add::{DelAdd, KvWriterDelAdd};
-    use crate::update::index_documents::merge_deladd_cbo_roaring_bitmaps;
-    use crate::update::{FacetsUpdateIncrementalInner, MergeFn};
+    use crate::update::index_documents::MergeDeladdCboRoaringBitmaps;
+    use crate::update::FacetsUpdateIncrementalInner;
    use crate::CboRoaringBitmapCodec;

    /// Utility function to generate a string whose position in a lexicographically
@@ -484,7 +483,7 @@ pub(crate) mod test_helpers {
            }
            writer.finish().unwrap();
            let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();
-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            builder.push(reader.into_cursor().unwrap());
            let merger = builder.build();

--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@@ -47,7 +47,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
                return match cursor.next_document()? {
                    Some(first_document) => Ok(Err(UserError::MissingDocumentId {
                        primary_key: primary_key.to_string(),
-                        document: obkv_to_object(&first_document, &documents_batch_index)?,
+                        document: obkv_to_object(first_document, &documents_batch_index)?,
                    })),
                    None => unreachable!("Called with reader.is_empty()"),
                };
@@ -106,7 +106,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
    let mut count = 0;
    while let Some(document) = cursor.next_document()? {
        let document_id = match fetch_or_generate_document_id(
-            &document,
+            document,
            &documents_batch_index,
            primary_key,
            autogenerate_docids,
@@ -145,7 +145,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
 #[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
 target = "indexing::documents")]
 fn fetch_or_generate_document_id(
-    document: &obkv::KvReader<'_, FieldId>,
+    document: &obkv::KvReader<FieldId>,
    documents_batch_index: &DocumentsBatchIndex,
    primary_key: PrimaryKey<'_>,
    autogenerate_docids: bool,
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -8,7 +8,7 @@ use obkv::{KvReader, KvWriterU16};
 use roaring::RoaringBitmap;
 use serde_json::Value;

-use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
+use super::helpers::{create_sorter, sorter_into_reader, GrenadParameters, KeepLatestObkv};
 use crate::error::{InternalError, SerializationError};
 use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
@@ -35,7 +35,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    let mut documents_ids = RoaringBitmap::new();
    let mut docid_word_positions_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        keep_latest_obkv,
+        KeepLatestObkv,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -80,10 +80,10 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            .try_into()
            .map(u32::from_be_bytes)
            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-        let obkv = KvReader::<FieldId>::new(value);
+        let obkv = KvReader::<FieldId>::from_slice(value);

        // if the searchable fields didn't change, skip the searchable indexing for this document.
-        if !force_reindexing && !searchable_fields_changed(&obkv, settings_diff) {
+        if !force_reindexing && !searchable_fields_changed(obkv, settings_diff) {
            continue;
        }

@@ -98,7 +98,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            || {
                // deletions
                tokens_from_document(
-                    &obkv,
+                    obkv,
                    &settings_diff.old,
                    &del_tokenizer,
                    max_positions_per_attributes,
@@ -109,7 +109,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            || {
                // additions
                tokens_from_document(
-                    &obkv,
+                    obkv,
                    &settings_diff.new,
                    &add_tokenizer,
                    max_positions_per_attributes,
@@ -126,13 +126,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
        value_buffer.clear();
        del_add_from_two_obkvs(
-            &KvReader::<FieldId>::new(del_obkv),
-            &KvReader::<FieldId>::new(add_obkv),
+            KvReader::<FieldId>::from_slice(del_obkv),
+            KvReader::<FieldId>::from_slice(add_obkv),
            &mut value_buffer,
        )?;

        // write each KV<DelAdd, KV<u16, String>> into the sorter, field by field.
-        let obkv = KvReader::<FieldId>::new(&value_buffer);
+        let obkv = KvReader::<FieldId>::from_slice(&value_buffer);
        for (field_id, value) in obkv.iter() {
            key_buffer.truncate(mem::size_of::<u32>());
            key_buffer.extend_from_slice(&field_id.to_be_bytes());
@@ -146,13 +146,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(

 /// Check if any searchable fields of a document changed.
 fn searchable_fields_changed(
-    obkv: &KvReader<'_, FieldId>,
+    obkv: &KvReader<FieldId>,
    settings_diff: &InnerIndexSettingsDiff,
 ) -> bool {
    let searchable_fields = &settings_diff.new.searchable_fields_ids;
    for (field_id, field_bytes) in obkv.iter() {
        if searchable_fields.contains(&field_id) {
-            let del_add = KvReaderDelAdd::new(field_bytes);
+            let del_add = KvReaderDelAdd::from_slice(field_bytes);
            match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
                // if both fields are None, check the next field.
                (None, None) => (),
@@ -189,7 +189,7 @@ fn tokenizer_builder<'a>(

 /// Extract words mapped with their positions of a document.
 fn tokens_from_document<'a>(
-    obkv: &KvReader<'a, FieldId>,
+    obkv: &'a KvReader<FieldId>,
    settings: &InnerIndexSettings,
    tokenizer: &Tokenizer<'_>,
    max_positions_per_attributes: u32,
@@ -202,7 +202,7 @@ fn tokens_from_document<'a>(
        // if field is searchable.
        if settings.searchable_fields_ids.contains(&field_id) {
            // extract deletion or addition only.
-            if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
+            if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
                // parse json.
                let value =
                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@@ -4,7 +4,7 @@ use std::io::{self, BufReader};
 use heed::{BytesDecode, BytesEncode};

 use super::helpers::{
-    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
+    create_sorter, sorter_into_reader, GrenadParameters, MergeDeladdCboRoaringBitmaps,
 };
 use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
@@ -27,7 +27,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(

    let mut facet_number_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -45,7 +45,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(

        buffer.clear();
        let mut obkv = KvWriterDelAdd::new(&mut buffer);
-        for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() {
+        for (deladd_key, _) in KvReaderDelAdd::from_slice(deladd_obkv_bytes).iter() {
            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
        }
        obkv.finish()?;
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@@ -15,7 +15,7 @@ use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
 use crate::localized_attributes_rules::LocalizedFieldIds;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::helpers::{
-    merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
+    MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
 };
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
@@ -56,7 +56,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -65,7 +65,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(

    let mut normalized_facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        merge_deladd_btreeset_string,
+        MergeDeladdBtreesetString,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -75,7 +75,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
    let mut buffer = Vec::new();
    let mut cursor = docid_fid_facet_string.into_cursor()?;
    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
-        let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);
+        let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);

        let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
            && deladd_reader.get(DelAdd::Addition).is_some();
@@ -144,7 +144,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -153,7 +153,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(

    let mut normalized_facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        merge_deladd_btreeset_string,
+        MergeDeladdBtreesetString,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -163,7 +163,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
    let mut buffer = Vec::new();
    let mut cursor = docid_fid_facet_string.into_cursor()?;
    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
-        let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);
+        let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);

        let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
            && deladd_reader.get(DelAdd::Addition).is_some();
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -1,10 +1,8 @@
-use std::borrow::Cow;
 use std::collections::{BTreeMap, BTreeSet};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
 use std::mem::size_of;
-use std::result::Result as StdResult;

 use bytemuck::bytes_of;
 use grenad::Sorter;
@@ -15,13 +13,13 @@ use roaring::RoaringBitmap;
 use serde_json::{from_slice, Value};
 use FilterableValues::{Empty, Null, Values};

-use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
+use super::helpers::{create_sorter, sorter_into_reader, GrenadParameters, KeepFirst};
 use crate::error::InternalError;
 use crate::facet::value_encoding::f64_into_bytes;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::{create_writer, writer_into_reader};
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
+use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};

 /// The length of the elements that are always in the buffer when inserting new values.
 const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@@ -50,7 +48,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(

    let mut fid_docid_facet_numbers_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        keep_first,
+        KeepFirst,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -59,7 +57,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(

    let mut fid_docid_facet_strings_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        keep_first,
+        KeepFirst,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -83,10 +81,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
        let mut cursor = obkv_documents.into_cursor()?;
        while let Some((docid_bytes, value)) = cursor.move_on_next()? {
-            let obkv = obkv::KvReader::new(value);
+            let obkv = obkv::KvReader::from_slice(value);
            let get_document_json_value = move |field_id, side| {
                obkv.get(field_id)
-                    .map(KvReaderDelAdd::new)
+                    .map(KvReaderDelAdd::from_slice)
                    .and_then(|kv| kv.get(side))
                    .map(from_slice)
                    .transpose()
@@ -330,15 +328,12 @@ fn truncate_str(s: &str) -> &str {

 /// Computes the diff between both Del and Add numbers and
 /// only inserts the parts that differ in the sorter.
-fn insert_numbers_diff<MF>(
-    fid_docid_facet_numbers_sorter: &mut Sorter<MF>,
+fn insert_numbers_diff(
+    fid_docid_facet_numbers_sorter: &mut Sorter<KeepFirst>,
    key_buffer: &mut Vec<u8>,
    mut del_numbers: Vec<f64>,
    mut add_numbers: Vec<f64>,
-) -> Result<()>
-where
-    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
-{
+) -> Result<()> {
    // We sort and dedup the float numbers
    del_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
    add_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
@@ -390,15 +385,12 @@ where

 /// Computes the diff between both Del and Add strings and
 /// only inserts the parts that differ in the sorter.
-fn insert_strings_diff<MF>(
-    fid_docid_facet_strings_sorter: &mut Sorter<MF>,
+fn insert_strings_diff(
+    fid_docid_facet_strings_sorter: &mut Sorter<KeepFirst>,
    key_buffer: &mut Vec<u8>,
    mut del_strings: Vec<(String, String)>,
    mut add_strings: Vec<(String, String)>,
-) -> Result<()>
-where
-    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
-{
+) -> Result<()> {
    // We sort and dedup the normalized and original strings
    del_strings.sort_unstable();
    add_strings.sort_unstable();
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@@ -4,8 +4,8 @@ use std::io::{self, BufReader};
 use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
-    GrenadParameters,
+    create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
+    MergeDeladdCboRoaringBitmaps,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
@@ -30,7 +30,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(

    let mut fid_word_count_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -45,19 +45,23 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let del_add_reader = KvReaderDelAdd::new(value);
+        let del_add_reader = KvReaderDelAdd::from_slice(value);
        let deletion = del_add_reader
            // get deleted words
            .get(DelAdd::Deletion)
            // count deleted words
-            .map(|deletion| KvReaderU16::new(deletion).iter().take(MAX_COUNTED_WORDS + 1).count())
+            .map(|deletion| {
+                KvReaderU16::from_slice(deletion).iter().take(MAX_COUNTED_WORDS + 1).count()
+            })
            // keep the count if under or equal to MAX_COUNTED_WORDS
            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
        let addition = del_add_reader
            // get added words
            .get(DelAdd::Addition)
            // count added words
-            .map(|addition| KvReaderU16::new(addition).iter().take(MAX_COUNTED_WORDS + 1).count())
+            .map(|addition| {
+                KvReaderU16::from_slice(addition).iter().take(MAX_COUNTED_WORDS + 1).count()
+            })
            // keep the count if under or equal to MAX_COUNTED_WORDS
            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);

--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@@ -29,22 +29,20 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(

    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
-        let obkv = obkv::KvReader::new(value);
+        let obkv = obkv::KvReader::from_slice(value);
        // since we only need the primary key when we throw an error
        // we create this getter to lazily get it when needed
        let document_id = || -> Value {
-            let reader = KvReaderDelAdd::new(obkv.get(primary_key_id).unwrap());
+            let reader = KvReaderDelAdd::from_slice(obkv.get(primary_key_id).unwrap());
            let document_id =
                reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap();
            serde_json::from_slice(document_id).unwrap()
        };

        // extract old version
-        let del_lat_lng =
-            extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
+        let del_lat_lng = extract_lat_lng(obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
        // extract new version
-        let add_lat_lng =
-            extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
+        let add_lat_lng = extract_lat_lng(obkv, &settings_diff.new, DelAdd::Addition, document_id)?;

        if del_lat_lng != add_lat_lng {
            let mut obkv = KvWriterDelAdd::memory();
@@ -68,15 +66,17 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(

 /// Extract the finite floats lat and lng from two bytes slices.
 fn extract_lat_lng(
-    document: &obkv::KvReader<'_, FieldId>,
+    document: &obkv::KvReader<FieldId>,
    settings: &InnerIndexSettings,
    deladd: DelAdd,
    document_id: impl Fn() -> Value,
 ) -> Result<Option<[f64; 2]>> {
    match settings.geo_fields_ids {
        Some((lat_fid, lng_fid)) => {
-            let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
-            let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
+            let lat =
+                document.get(lat_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
+            let lng =
+                document.get(lng_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
            let (lat, lng) = match (lat, lng) {
                (Some(lat), Some(lng)) => (lat, lng),
                (Some(_), None) => {
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
 use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
-use crate::vector::settings::{EmbedderAction, ReindexAction};
+use crate::vector::settings::ReindexAction;
 use crate::vector::{Embedder, Embeddings};
 use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};

@@ -208,65 +208,65 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(

    if reindex_vectors {
        for (name, action) in settings_diff.embedding_config_updates.iter() {
-            match action {
-                EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted
-                EmbedderAction::Reindex(action) => {
-                    let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name)
-                    else {
-                        tracing::error!(embedder = name, "Requested embedder config not found");
-                        continue;
-                    };
+            if let Some(action) = action.reindex() {
+                let Some((embedder_name, (embedder, prompt, _quantized))) =
+                    configs.remove_entry(name)
+                else {
+                    tracing::error!(embedder = name, "Requested embedder config not found");
+                    continue;
+                };

-                    // (docid, _index) -> KvWriterDelAdd -> Vector
-                    let manual_vectors_writer = create_writer(
-                        indexer.chunk_compression_type,
-                        indexer.chunk_compression_level,
-                        tempfile::tempfile()?,
-                    );
+                // (docid, _index) -> KvWriterDelAdd -> Vector
+                let manual_vectors_writer = create_writer(
+                    indexer.chunk_compression_type,
+                    indexer.chunk_compression_level,
+                    tempfile::tempfile()?,
+                );

-                    // (docid) -> (prompt)
-                    let prompts_writer = create_writer(
-                        indexer.chunk_compression_type,
-                        indexer.chunk_compression_level,
-                        tempfile::tempfile()?,
-                    );
+                // (docid) -> (prompt)
+                let prompts_writer = create_writer(
+                    indexer.chunk_compression_type,
+                    indexer.chunk_compression_level,
+                    tempfile::tempfile()?,
+                );

-                    // (docid) -> ()
-                    let remove_vectors_writer = create_writer(
-                        indexer.chunk_compression_type,
-                        indexer.chunk_compression_level,
-                        tempfile::tempfile()?,
-                    );
+                // (docid) -> ()
+                let remove_vectors_writer = create_writer(
+                    indexer.chunk_compression_type,
+                    indexer.chunk_compression_level,
+                    tempfile::tempfile()?,
+                );

-                    let action = match action {
-                        ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
-                        ReindexAction::RegeneratePrompts => {
-                            let Some((_, old_prompt)) = old_configs.get(name) else {
-                                tracing::error!(embedder = name, "Old embedder config not found");
-                                continue;
-                            };
+                let action = match action {
+                    ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
+                    ReindexAction::RegeneratePrompts => {
+                        let Some((_, old_prompt, _quantized)) = old_configs.get(name) else {
+                            tracing::error!(embedder = name, "Old embedder config not found");
+                            continue;
+                        };

-                            ExtractionAction::SettingsRegeneratePrompts { old_prompt }
-                        }
-                    };
+                        ExtractionAction::SettingsRegeneratePrompts { old_prompt }
+                    }
+                };

-                    extractors.push(EmbedderVectorExtractor {
-                        embedder_name,
-                        embedder,
-                        prompt,
-                        prompts_writer,
-                        remove_vectors_writer,
-                        manual_vectors_writer,
-                        add_to_user_provided: RoaringBitmap::new(),
-                        action,
-                    });
-                }
+                extractors.push(EmbedderVectorExtractor {
+                    embedder_name,
+                    embedder,
+                    prompt,
+                    prompts_writer,
+                    remove_vectors_writer,
+                    manual_vectors_writer,
+                    add_to_user_provided: RoaringBitmap::new(),
+                    action,
+                });
+            } else {
+                continue;
            }
        }
    } else {
        // document operation

-        for (embedder_name, (embedder, prompt)) in configs.into_iter() {
+        for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() {
            // (docid, _index) -> KvWriterDelAdd -> Vector
            let manual_vectors_writer = create_writer(
                indexer.chunk_compression_type,
@@ -313,7 +313,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
        debug_assert!(from_utf8(external_id_bytes).is_ok());
        let docid = DocumentId::from_be_bytes(docid_bytes);

-        let obkv = obkv::KvReader::new(value);
+        let obkv = obkv::KvReader::from_slice(value);
        key_buffer.clear();
        key_buffer.extend_from_slice(docid_bytes.as_slice());

@@ -481,7 +481,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
 #[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments
 fn extract_vector_document_diff(
    docid: DocumentId,
-    obkv: obkv::KvReader<'_, FieldId>,
+    obkv: &obkv::KvReader<FieldId>,
    prompt: &Prompt,
    (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
    (old, new): (VectorState, VectorState),
@@ -526,7 +526,7 @@ fn extract_vector_document_diff(
            // Do we keep this document?
            let document_is_kept = obkv
                .iter()
-                .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
+                .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
                .any(|deladd| deladd.get(DelAdd::Addition).is_some());

            if document_is_kept {
@@ -562,7 +562,7 @@ fn extract_vector_document_diff(
            // Do we keep this document?
            let document_is_kept = obkv
                .iter()
-                .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
+                .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
                .any(|deladd| deladd.get(DelAdd::Addition).is_some());
            if document_is_kept {
                if embedder_is_manual {
@@ -588,7 +588,7 @@ fn extract_vector_document_diff(
            // Do we keep this document?
            let document_is_kept = obkv
                .iter()
-                .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
+                .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
                .any(|deladd| deladd.get(DelAdd::Addition).is_some());
            if document_is_kept {
                // if the new version of documents has the vectors in the DB,
@@ -606,7 +606,7 @@ fn extract_vector_document_diff(
 }

 fn regenerate_if_prompt_changed(
-    obkv: obkv::KvReader<'_, FieldId>,
+    obkv: &obkv::KvReader<FieldId>,
    (old_prompt, new_prompt): (&Prompt, &Prompt),
    (old_fields_ids_map, new_fields_ids_map): (
        &FieldsIdsMapWithMetadata,
@@ -624,7 +624,7 @@ fn regenerate_if_prompt_changed(
 }

 fn regenerate_prompt(
-    obkv: obkv::KvReader<'_, FieldId>,
+    obkv: &obkv::KvReader<FieldId>,
    prompt: &Prompt,
    new_fields_ids_map: &FieldsIdsMapWithMetadata,
 ) -> Result<VectorStateDelta> {
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@@ -7,8 +7,8 @@ use obkv::KvReaderU16;
 use roaring::RoaringBitmap;

 use super::helpers::{
-    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
-    writer_into_reader, GrenadParameters,
+    create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
+    MergeDeladdCboRoaringBitmaps,
 };
 use crate::error::SerializationError;
 use crate::heed_codec::StrBEU16Codec;
@@ -16,7 +16,6 @@ use crate::index::db_name::DOCID_WORD_POSITIONS;
 use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::helpers::sorter_into_reader;
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::update::MergeFn;
 use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result};

 /// Extracts the word and the documents ids where this word appear.
@@ -40,7 +39,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(

    let mut word_fid_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -58,17 +57,17 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
        let document_id = u32::from_be_bytes(document_id_bytes);
        let fid = u16::from_be_bytes(fid_bytes);

-        let del_add_reader = KvReaderDelAdd::new(value);
+        let del_add_reader = KvReaderDelAdd::from_slice(value);
        // extract all unique words to remove.
        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
-            for (_pos, word) in KvReaderU16::new(deletion).iter() {
+            for (_pos, word) in KvReaderU16::from_slice(deletion).iter() {
                del_words.insert(word.to_vec());
            }
        }

        // extract all unique additional words.
        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-            for (_pos, word) in KvReaderU16::new(addition).iter() {
+            for (_pos, word) in KvReaderU16::from_slice(addition).iter() {
                add_words.insert(word.to_vec());
            }
        }
@@ -94,7 +93,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(

    let mut word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -103,7 +102,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(

    let mut exact_word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -115,7 +114,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    // NOTE: replacing sorters by bitmap merging is less efficient, so, use sorters.
    while let Some((key, value)) = iter.next()? {
        // only keep the value if their is a change to apply in the DB.
-        if !is_noop_del_add_obkv(KvReaderDelAdd::new(value)) {
+        if !is_noop_del_add_obkv(KvReaderDelAdd::from_slice(value)) {
            word_fid_docids_writer.insert(key, value)?;
        }

@@ -123,7 +122,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
            .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;

        // merge all deletions
-        let obkv = KvReaderDelAdd::new(value);
+        let obkv = KvReaderDelAdd::from_slice(value);
        if let Some(value) = obkv.get(DelAdd::Deletion) {
            let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid);
            buffer.clear();
@@ -163,7 +162,7 @@ fn words_into_sorter(
    key_buffer: &mut Vec<u8>,
    del_words: &BTreeSet<Vec<u8>>,
    add_words: &BTreeSet<Vec<u8>>,
-    word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
+    word_fid_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
 ) -> Result<()> {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@@ -6,8 +6,8 @@ use std::{cmp, io};
 use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
-    writer_into_reader, GrenadParameters, MergeFn,
+    create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
+    MergeDeladdCboRoaringBitmaps,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
@@ -44,7 +44,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
        .map(|_| {
            create_sorter(
                grenad::SortAlgorithm::Unstable,
-                merge_deladd_cbo_roaring_bitmaps,
+                MergeDeladdCboRoaringBitmaps,
                indexer.chunk_compression_type,
                indexer.chunk_compression_level,
                indexer.max_nb_chunks,
@@ -92,8 +92,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
                }

                // deletions
-                if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
-                    for (position, word) in KvReaderU16::new(deletion).iter() {
+                if let Some(deletion) = KvReaderDelAdd::from_slice(value).get(DelAdd::Deletion) {
+                    for (position, word) in KvReaderU16::from_slice(deletion).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
                        while del_word_positions.front().map_or(false, |(_w, p)| {
                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
@@ -125,8 +125,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
                }

                // additions
-                if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
-                    for (position, word) in KvReaderU16::new(addition).iter() {
+                if let Some(addition) = KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
+                    for (position, word) in KvReaderU16::from_slice(addition).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
                        while add_word_positions.front().map_or(false, |(_w, p)| {
                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
@@ -197,7 +197,7 @@ fn document_word_positions_into_sorter(
    document_id: DocumentId,
    del_word_pair_proximity: &BTreeMap<(String, String), u8>,
    add_word_pair_proximity: &BTreeMap<(String, String), u8>,
-    word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeFn>],
+    word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeDeladdCboRoaringBitmaps>],
 ) -> Result<()> {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@@ -5,14 +5,13 @@ use std::io::{self, BufReader};
 use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
-    GrenadParameters,
+    create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
+    MergeDeladdCboRoaringBitmaps,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::update::MergeFn;
 use crate::{bucketed_position, DocumentId, Result};

 /// Extracts the word positions and the documents ids where this word appear.
@@ -29,7 +28,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

    let mut word_position_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        MergeDeladdCboRoaringBitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@@ -60,10 +59,10 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

        current_document_id = Some(document_id);

-        let del_add_reader = KvReaderDelAdd::new(value);
+        let del_add_reader = KvReaderDelAdd::from_slice(value);
        // extract all unique words to remove.
        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
-            for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
+            for (position, word_bytes) in KvReaderU16::from_slice(deletion).iter() {
                let position = bucketed_position(position);
                del_word_positions.insert((position, word_bytes.to_vec()));
            }
@@ -71,7 +70,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

        // extract all unique additional words.
        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-            for (position, word_bytes) in KvReaderU16::new(addition).iter() {
+            for (position, word_bytes) in KvReaderU16::from_slice(addition).iter() {
                let position = bucketed_position(position);
                add_word_positions.insert((position, word_bytes.to_vec()));
            }
@@ -100,7 +99,7 @@ fn words_position_into_sorter(
    key_buffer: &mut Vec<u8>,
    del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
    add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
-    word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
+    word_position_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
 ) -> Result<()> {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@@ -1,11 +1,10 @@
-use std::borrow::Cow;
 use std::fs::File;
 use std::io::{self, BufReader, BufWriter, Seek};

-use grenad::{CompressionType, Sorter};
+use grenad::{CompressionType, MergeFunction, Sorter};
 use heed::types::Bytes;

-use super::{ClonableMmap, MergeFn};
+use super::ClonableMmap;
 use crate::update::index_documents::valid_lmdb_key;
 use crate::Result;

@@ -31,14 +30,14 @@ pub fn create_writer<R: io::Write>(
 /// A helper function that creates a grenad sorter
 /// with the given parameters. The max memory is
 /// clamped to something reasonable.
-pub fn create_sorter(
+pub fn create_sorter<MF: MergeFunction>(
    sort_algorithm: grenad::SortAlgorithm,
-    merge: MergeFn,
+    merge: MF,
    chunk_compression_type: grenad::CompressionType,
    chunk_compression_level: Option<u32>,
    max_nb_chunks: Option<usize>,
    max_memory: Option<usize>,
-) -> grenad::Sorter<MergeFn> {
+) -> grenad::Sorter<MF> {
    let mut builder = grenad::Sorter::builder(merge);
    builder.chunk_compression_type(chunk_compression_type);
    if let Some(level) = chunk_compression_level {
@@ -57,10 +56,14 @@ pub fn create_sorter(
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
-pub fn sorter_into_reader(
-    sorter: grenad::Sorter<MergeFn>,
+pub fn sorter_into_reader<MF>(
+    sorter: grenad::Sorter<MF>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<BufReader<File>>>
+where
+    MF: MergeFunction,
+    crate::Error: From<MF::Error>,
+{
    let mut writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
@@ -169,8 +172,8 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
 /// Write provided sorter in database using serialize_value function.
 /// merge_values function is used if an entry already exist in the database.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
-pub fn write_sorter_into_database<K, V, FS, FM>(
-    sorter: Sorter<MergeFn>,
+pub fn write_sorter_into_database<K, V, FS, FM, MF>(
+    sorter: Sorter<MF>,
    database: &heed::Database<K, V>,
    wtxn: &mut heed::RwTxn<'_>,
    index_is_empty: bool,
@@ -180,6 +183,8 @@ pub fn write_sorter_into_database<K, V, FS, FM>(
 where
    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
+    MF: MergeFunction,
+    crate::Error: From<MF::Error>,
 {
    let mut buffer = Vec::new();
    let database = database.remap_types::<Bytes, Bytes>();
@@ -207,8 +212,3 @@ where

    Ok(())
 }
-
-/// Used when trying to merge readers, but you don't actually care about the values.
-pub fn merge_ignore_values<'a>(_key: &[u8], _values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-    Ok(Cow::Owned(Vec::new()))
-}
--- a/milli/src/update/index_documents/helpers/merge_functions.rs
+++ b/milli/src/update/index_documents/helpers/merge_functions.rs
@@ -3,6 +3,8 @@ use std::collections::BTreeSet;
 use std::io;
 use std::result::Result as StdResult;

+use either::Either;
+use grenad::MergeFunction;
 use roaring::RoaringBitmap;

 use crate::heed_codec::CboRoaringBitmapCodec;
@@ -10,7 +12,8 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::transform::Operation;
 use crate::Result;

-pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
+pub type EitherObkvMerge =
+    Either<ObkvsKeepLastAdditionMergeDeletions, ObkvsMergeAdditionsAndDeletions>;

 pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec<u8>) -> io::Result<()> {
    buffer.clear();
@@ -18,35 +21,53 @@ pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec<u8>) ->
    bitmap.serialize_into(buffer)
 }

-pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-    if values.len() == 1 {
-        Ok(values[0].clone())
-    } else {
-        let merged = values
-            .iter()
-            .map(AsRef::as_ref)
-            .map(RoaringBitmap::deserialize_from)
-            .map(StdResult::unwrap)
-            .reduce(|a, b| a | b)
-            .unwrap();
-        let mut buffer = Vec::new();
-        serialize_roaring_bitmap(&merged, &mut buffer)?;
-        Ok(Cow::Owned(buffer))
+pub struct MergeRoaringBitmaps;
+
+impl MergeFunction for MergeRoaringBitmaps {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        if values.len() == 1 {
+            Ok(values[0].clone())
+        } else {
+            let merged = values
+                .iter()
+                .map(AsRef::as_ref)
+                .map(RoaringBitmap::deserialize_from)
+                .map(StdResult::unwrap)
+                .reduce(|a, b| a | b)
+                .unwrap();
+            let mut buffer = Vec::new();
+            serialize_roaring_bitmap(&merged, &mut buffer)?;
+            Ok(Cow::Owned(buffer))
+        }
    }
 }

-pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-    Ok(values[0].clone())
+pub struct KeepFirst;
+
+impl MergeFunction for KeepFirst {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        Ok(values[0].clone())
+    }
 }

 /// Only the last value associated with an id is kept.
-pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-    Ok(obkvs.last().unwrap().clone())
+pub struct KeepLatestObkv;
+
+impl MergeFunction for KeepLatestObkv {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        Ok(obkvs.last().unwrap().clone())
+    }
 }

 pub fn merge_two_del_add_obkvs(
-    base: obkv::KvReaderU16<'_>,
-    update: obkv::KvReaderU16<'_>,
+    base: &obkv::KvReaderU16,
+    update: &obkv::KvReaderU16,
    merge_additions: bool,
    buffer: &mut Vec<u8>,
 ) {
@@ -66,7 +87,7 @@ pub fn merge_two_del_add_obkvs(
                    // If merge_additions is false, recreate an obkv keeping the deletions only.
                    value_buffer.clear();
                    let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                    let base_reader = KvReaderDelAdd::new(v);
+                    let base_reader = KvReaderDelAdd::from_slice(v);

                    if let Some(deletion) = base_reader.get(DelAdd::Deletion) {
                        value_writer.insert(DelAdd::Deletion, deletion).unwrap();
@@ -80,8 +101,8 @@ pub fn merge_two_del_add_obkvs(
                // merge deletions and additions.
                value_buffer.clear();
                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                let base_reader = KvReaderDelAdd::new(base);
-                let update_reader = KvReaderDelAdd::new(update);
+                let base_reader = KvReaderDelAdd::from_slice(base);
+                let update_reader = KvReaderDelAdd::from_slice(update);

                // keep newest deletion.
                if let Some(deletion) = update_reader
@@ -131,8 +152,8 @@ fn inner_merge_del_add_obkvs<'a>(
            break;
        }

-        let newest = obkv::KvReader::new(&acc);
-        let oldest = obkv::KvReader::new(&current[1..]);
+        let newest = obkv::KvReader::from_slice(&acc);
+        let oldest = obkv::KvReader::from_slice(&current[1..]);
        merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer);

        // we want the result of the merge into our accumulator.
@@ -145,65 +166,79 @@ fn inner_merge_del_add_obkvs<'a>(
 }

 /// Merge all the obkvs from the newest to the oldest.
-pub fn obkvs_merge_additions_and_deletions<'a>(
-    _key: &[u8],
-    obkvs: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    inner_merge_del_add_obkvs(obkvs, true)
+#[derive(Copy, Clone)]
+pub struct ObkvsMergeAdditionsAndDeletions;
+
+impl MergeFunction for ObkvsMergeAdditionsAndDeletions {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        inner_merge_del_add_obkvs(obkvs, true)
+    }
 }

 /// Merge all the obkvs deletions from the newest to the oldest and keep only the newest additions.
-pub fn obkvs_keep_last_addition_merge_deletions<'a>(
-    _key: &[u8],
-    obkvs: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    inner_merge_del_add_obkvs(obkvs, false)
+#[derive(Copy, Clone)]
+pub struct ObkvsKeepLastAdditionMergeDeletions;
+
+impl MergeFunction for ObkvsKeepLastAdditionMergeDeletions {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        inner_merge_del_add_obkvs(obkvs, false)
+    }
 }

 /// Do a union of all the CboRoaringBitmaps in the values.
-pub fn merge_cbo_roaring_bitmaps<'a>(
-    _key: &[u8],
-    values: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    if values.len() == 1 {
-        Ok(values[0].clone())
-    } else {
-        let mut vec = Vec::new();
-        CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
-        Ok(Cow::from(vec))
+pub struct MergeCboRoaringBitmaps;
+
+impl MergeFunction for MergeCboRoaringBitmaps {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        if values.len() == 1 {
+            Ok(values[0].clone())
+        } else {
+            let mut vec = Vec::new();
+            CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
+            Ok(Cow::from(vec))
+        }
    }
 }

 /// Do a union of CboRoaringBitmaps on both sides of a DelAdd obkv
 /// separately and outputs a new DelAdd with both unions.
-pub fn merge_deladd_cbo_roaring_bitmaps<'a>(
-    _key: &[u8],
-    values: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    if values.len() == 1 {
-        Ok(values[0].clone())
-    } else {
-        // Retrieve the bitmaps from both sides
-        let mut del_bitmaps_bytes = Vec::new();
-        let mut add_bitmaps_bytes = Vec::new();
-        for value in values {
-            let obkv = KvReaderDelAdd::new(value);
-            if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
-                del_bitmaps_bytes.push(bitmap_bytes);
-            }
-            if let Some(bitmap_bytes) = obkv.get(DelAdd::Addition) {
-                add_bitmaps_bytes.push(bitmap_bytes);
-            }
-        }
+pub struct MergeDeladdCboRoaringBitmaps;

-        let mut output_deladd_obkv = KvWriterDelAdd::memory();
-        let mut buffer = Vec::new();
-        CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
-        output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
-        buffer.clear();
-        CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
-        output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
-        output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
+impl MergeFunction for MergeDeladdCboRoaringBitmaps {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        if values.len() == 1 {
+            Ok(values[0].clone())
+        } else {
+            // Retrieve the bitmaps from both sides
+            let mut del_bitmaps_bytes = Vec::new();
+            let mut add_bitmaps_bytes = Vec::new();
+            for value in values {
+                let obkv = KvReaderDelAdd::from_slice(value);
+                if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
+                    del_bitmaps_bytes.push(bitmap_bytes);
+                }
+                if let Some(bitmap_bytes) = obkv.get(DelAdd::Addition) {
+                    add_bitmaps_bytes.push(bitmap_bytes);
+                }
+            }
+
+            let mut output_deladd_obkv = KvWriterDelAdd::memory();
+            let mut buffer = Vec::new();
+            CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
+            output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
+            buffer.clear();
+            CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
+            output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
+            output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
+        }
    }
 }

@@ -217,7 +252,7 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
    buffer: &'a mut Vec<u8>,
 ) -> Result<Option<&'a [u8]>> {
    Ok(CboRoaringBitmapCodec::merge_deladd_into(
-        KvReaderDelAdd::new(deladd_obkv),
+        KvReaderDelAdd::from_slice(deladd_obkv),
        previous,
        buffer,
    )?)
@@ -225,37 +260,55 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(

 /// Do a union of BtreeSet on both sides of a DelAdd obkv
 /// separately and outputs a new DelAdd with both unions.
-pub fn merge_deladd_btreeset_string<'a>(
-    _key: &[u8],
-    values: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    if values.len() == 1 {
-        Ok(values[0].clone())
-    } else {
-        // Retrieve the bitmaps from both sides
-        let mut del_set = BTreeSet::new();
-        let mut add_set = BTreeSet::new();
-        for value in values {
-            let obkv = KvReaderDelAdd::new(value);
-            if let Some(bytes) = obkv.get(DelAdd::Deletion) {
-                let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
-                for value in set {
-                    del_set.insert(value);
-                }
-            }
-            if let Some(bytes) = obkv.get(DelAdd::Addition) {
-                let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
-                for value in set {
-                    add_set.insert(value);
-                }
-            }
-        }
+pub struct MergeDeladdBtreesetString;

-        let mut output_deladd_obkv = KvWriterDelAdd::memory();
-        let del = serde_json::to_vec(&del_set).unwrap();
-        output_deladd_obkv.insert(DelAdd::Deletion, &del)?;
-        let add = serde_json::to_vec(&add_set).unwrap();
-        output_deladd_obkv.insert(DelAdd::Addition, &add)?;
-        output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
+impl MergeFunction for MergeDeladdBtreesetString {
+    type Error = crate::Error;
+
+    fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
+        if values.len() == 1 {
+            Ok(values[0].clone())
+        } else {
+            // Retrieve the bitmaps from both sides
+            let mut del_set = BTreeSet::new();
+            let mut add_set = BTreeSet::new();
+            for value in values {
+                let obkv = KvReaderDelAdd::from_slice(value);
+                if let Some(bytes) = obkv.get(DelAdd::Deletion) {
+                    let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
+                    for value in set {
+                        del_set.insert(value);
+                    }
+                }
+                if let Some(bytes) = obkv.get(DelAdd::Addition) {
+                    let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
+                    for value in set {
+                        add_set.insert(value);
+                    }
+                }
+            }
+
+            let mut output_deladd_obkv = KvWriterDelAdd::memory();
+            let del = serde_json::to_vec(&del_set).unwrap();
+            output_deladd_obkv.insert(DelAdd::Deletion, &del)?;
+            let add = serde_json::to_vec(&add_set).unwrap();
+            output_deladd_obkv.insert(DelAdd::Addition, &add)?;
+            output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
+        }
+    }
+}
+
+/// Used when trying to merge readers, but you don't actually care about the values.
+pub struct MergeIgnoreValues;
+
+impl MergeFunction for MergeIgnoreValues {
+    type Error = crate::Error;
+
+    fn merge<'a>(
+        &self,
+        _key: &[u8],
+        _values: &[Cow<'a, [u8]>],
+    ) -> std::result::Result<Cow<'a, [u8]>, Self::Error> {
+        Ok(Cow::Owned(Vec::new()))
    }
 }
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@@ -7,17 +7,8 @@ use std::convert::{TryFrom, TryInto};

 pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
 use fst::{IntoStreamer, Streamer};
-pub use grenad_helpers::{
-    as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
-    merge_ignore_values, sorter_into_reader, write_sorter_into_database, writer_into_reader,
-    GrenadParameters,
-};
-pub use merge_functions::{
-    keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_deladd_btreeset_string,
-    merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
-    merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions,
-    obkvs_merge_additions_and_deletions, MergeFn,
-};
+pub use grenad_helpers::*;
+pub use merge_functions::*;

 use crate::MAX_WORD_LENGTH;

--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -27,13 +27,7 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};

 use self::enrich::enrich_documents_batch;
 pub use self::enrich::{extract_finite_float_from_value, DocumentId};
-pub use self::helpers::{
-    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
-    fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps,
-    merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, merge_roaring_bitmaps,
-    valid_lmdb_key, write_sorter_into_database, writer_into_reader, MergeFn,
-};
-use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
+pub use self::helpers::*;
 pub use self::transform::{Transform, TransformOutput};
 use crate::documents::{obkv_to_object, DocumentsBatchBuilder, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
@@ -43,7 +37,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
 use crate::update::{
    IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
 };
-use crate::vector::EmbeddingConfigs;
+use crate::vector::{ArroyWrapper, EmbeddingConfigs};
 use crate::{CboRoaringBitmapCodec, Index, Object, Result};

 static MERGED_DATABASE_COUNT: usize = 7;
@@ -605,7 +599,7 @@ where
                                let cloneable_chunk =
                                    unsafe { as_cloneable_grenad(&word_docids_reader)? };
                                let word_docids = word_docids.get_or_insert_with(|| {
-                                    MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
+                                    MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
                                });
                                word_docids.push(cloneable_chunk.into_cursor()?);
                                let cloneable_chunk =
@@ -613,14 +607,14 @@ where
                                let exact_word_docids =
                                    exact_word_docids.get_or_insert_with(|| {
                                        MergerBuilder::new(
-                                            merge_deladd_cbo_roaring_bitmaps as MergeFn,
+                                            MergeDeladdCboRoaringBitmaps,
                                        )
                                    });
                                exact_word_docids.push(cloneable_chunk.into_cursor()?);
                                let cloneable_chunk =
                                    unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
                                let word_fid_docids = word_fid_docids.get_or_insert_with(|| {
-                                    MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
+                                    MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
                                });
                                word_fid_docids.push(cloneable_chunk.into_cursor()?);
                                TypedChunk::WordDocids {
@@ -634,7 +628,7 @@ where
                                let word_position_docids =
                                    word_position_docids.get_or_insert_with(|| {
                                        MergerBuilder::new(
-                                            merge_deladd_cbo_roaring_bitmaps as MergeFn,
+                                            MergeDeladdCboRoaringBitmaps,
                                        )
                                    });
                                word_position_docids.push(cloneable_chunk.into_cursor()?);
@@ -679,6 +673,24 @@ where
        let number_of_documents = self.index.number_of_documents(self.wtxn)?;
        let mut rng = rand::rngs::StdRng::seed_from_u64(42);

+        // If an embedder wasn't used in the typedchunk but must be binary quantized
+        // we should insert it in `dimension`
+        for (name, action) in settings_diff.embedding_config_updates.iter() {
+            if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
+                let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or(
+                    InternalError::DatabaseMissingEntry {
+                        db_name: "embedder_category_id",
+                        key: None,
+                    },
+                )?;
+                let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
+                let reader =
+                    ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
+                let dim = reader.dimensions(self.wtxn)?;
+                dimension.insert(name.to_string(), dim);
+            }
+        }
+
        for (embedder_name, dimension) in dimension {
            let wtxn = &mut *self.wtxn;
            let vector_arroy = self.index.vector_arroy;
@@ -686,13 +698,23 @@ where
            let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
                InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
            )?;
+            let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
+            let was_quantized = settings_diff
+                .old
+                .embedding_configs
+                .get(&embedder_name)
+                .map_or(false, |conf| conf.2);
+            let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);

            pool.install(|| {
                for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
-                    let writer = arroy::Writer::new(vector_arroy, k, dimension);
-                    if writer.need_build(wtxn)? {
-                        writer.build(wtxn, &mut rng, None)?;
-                    } else if writer.is_empty(wtxn)? {
+                    let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
+                    if is_quantizing {
+                        writer.quantize(wtxn, k, dimension)?;
+                    }
+                    if writer.need_build(wtxn, dimension)? {
+                        writer.build(wtxn, &mut rng, dimension)?;
+                    } else if writer.is_empty(wtxn, dimension)? {
                        break;
                    }
                }
@@ -719,10 +741,10 @@ where
    )]
    pub fn execute_prefix_databases(
        self,
-        word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
-        exact_word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
-        word_position_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
-        word_fid_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
+        word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
+        exact_word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
+        word_position_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
+        word_fid_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
    ) -> Result<()>
    where
        FP: Fn(UpdateIndexingStep) + Sync,
@@ -902,7 +924,7 @@ where
 )]
 fn execute_word_prefix_docids(
    txn: &mut heed::RwTxn<'_>,
-    merger: Merger<CursorClonableMmap, MergeFn>,
+    merger: Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>,
    word_docids_db: Database<Str, CboRoaringBitmapCodec>,
    word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
    indexer_config: &IndexerConfig,
@@ -2746,6 +2768,7 @@ mod tests {
                        response: Setting::NotSet,
                        distribution: Setting::NotSet,
                        headers: Setting::NotSet,
+                        binary_quantized: Setting::NotSet,
                    }),
                );
                settings.set_embedder_settings(embedders);
@@ -2774,7 +2797,7 @@ mod tests {
            std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
        let res = index
            .search(&rtxn)
-            .semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec()))
+            .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
            .execute()
            .unwrap();
        assert_eq!(res.documents_ids.len(), 3);
--- a/milli/src/update/index_documents/parallel.rs
+++ b/milli/src/update/index_documents/parallel.rs
@@ -31,14 +31,14 @@ impl<'t> ImmutableObkvs<'t> {
    }

    /// Returns the OBKVs identified by the given ID.
-    pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<KvReaderU16<'t>>> {
+    pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<&'t KvReaderU16>> {
        match self
            .ids
            .rank(docid)
            .checked_sub(1)
            .and_then(|offset| self.slices.get(offset as usize))
        {
-            Some(bytes) => Ok(Some(KvReaderU16::new(bytes))),
+            Some(&bytes) => Ok(Some(bytes.into())),
            None => Ok(None),
        }
    }
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -5,6 +5,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
 use std::fs::File;
 use std::io::{Read, Seek};

+use either::Either;
 use fxhash::FxHashMap;
 use itertools::Itertools;
 use obkv::{KvReader, KvReaderU16, KvWriter};
@@ -13,10 +14,10 @@ use serde_json::Value;
 use smartstring::SmartString;

 use super::helpers::{
-    create_sorter, create_writer, keep_first, obkvs_keep_last_addition_merge_deletions,
-    obkvs_merge_additions_and_deletions, sorter_into_reader, MergeFn,
+    create_sorter, create_writer, sorter_into_reader, EitherObkvMerge,
+    ObkvsKeepLastAdditionMergeDeletions, ObkvsMergeAdditionsAndDeletions,
 };
-use super::{IndexDocumentsMethod, IndexerConfig};
+use super::{IndexDocumentsMethod, IndexerConfig, KeepFirst};
 use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::{db_name, main_key};
@@ -26,9 +27,10 @@ use crate::update::del_add::{
 };
 use crate::update::index_documents::GrenadParameters;
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
-use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
+use crate::update::{AvailableIds, UpdateIndexingStep};
 use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
-use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
+use crate::vector::settings::WriteBackToDocuments;
+use crate::vector::ArroyWrapper;
 use crate::{
    is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
 };
@@ -54,13 +56,13 @@ pub struct Transform<'a, 'i> {

    indexer_settings: &'a IndexerConfig,
    pub index_documents_method: IndexDocumentsMethod,
-    available_documents_ids: AvailableDocumentsIds,
+    available_documents_ids: AvailableIds,

    // Both grenad follows the same format:
    // key | value
    // u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
-    original_sorter: grenad::Sorter<MergeFn>,
-    flattened_sorter: grenad::Sorter<MergeFn>,
+    original_sorter: grenad::Sorter<EitherObkvMerge>,
+    flattened_sorter: grenad::Sorter<EitherObkvMerge>,

    replaced_documents_ids: RoaringBitmap,
    new_documents_ids: RoaringBitmap,
@@ -108,17 +110,19 @@ impl<'a, 'i> Transform<'a, 'i> {
        index_documents_method: IndexDocumentsMethod,
        _autogenerate_docids: bool,
    ) -> Result<Self> {
+        use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments};
+
        // We must choose the appropriate merge function for when two or more documents
        // with the same user id must be merged or fully replaced in the same batch.
        let merge_function = match index_documents_method {
-            IndexDocumentsMethod::ReplaceDocuments => obkvs_keep_last_addition_merge_deletions,
-            IndexDocumentsMethod::UpdateDocuments => obkvs_merge_additions_and_deletions,
+            ReplaceDocuments => Either::Left(ObkvsKeepLastAdditionMergeDeletions),
+            UpdateDocuments => Either::Right(ObkvsMergeAdditionsAndDeletions),
        };

        // We initialize the sorter with the user indexing settings.
        let original_sorter = create_sorter(
            grenad::SortAlgorithm::Stable,
-            merge_function,
+            merge_function.clone(),
            indexer_settings.chunk_compression_type,
            indexer_settings.chunk_compression_level,
            indexer_settings.max_nb_chunks,
@@ -140,7 +144,7 @@ impl<'a, 'i> Transform<'a, 'i> {
            index,
            fields_ids_map: index.fields_ids_map(wtxn)?,
            indexer_settings,
-            available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids),
+            available_documents_ids: AvailableIds::new(&documents_ids),
            original_sorter,
            flattened_sorter,
            index_documents_method,
@@ -278,21 +282,21 @@ impl<'a, 'i> Transform<'a, 'i> {
                    document_sorter_value_buffer.clear();
                    document_sorter_value_buffer.push(Operation::Addition as u8);
                    into_del_add_obkv(
-                        KvReaderU16::new(base_obkv),
+                        KvReaderU16::from_slice(base_obkv),
                        deladd_operation,
                        &mut document_sorter_value_buffer,
                    )?;
                    self.original_sorter
                        .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
-                    let base_obkv = KvReader::new(base_obkv);
+                    let base_obkv = KvReader::from_slice(base_obkv);
                    if let Some(flattened_obkv) =
-                        Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)?
+                        Self::flatten_from_fields_ids_map(base_obkv, &mut self.fields_ids_map)?
                    {
                        // we recreate our buffer with the flattened documents
                        document_sorter_value_buffer.clear();
                        document_sorter_value_buffer.push(Operation::Addition as u8);
                        into_del_add_obkv(
-                            KvReaderU16::new(&flattened_obkv),
+                            KvReaderU16::from_slice(&flattened_obkv),
                            deladd_operation,
                            &mut document_sorter_value_buffer,
                        )?;
@@ -311,7 +315,7 @@ impl<'a, 'i> Transform<'a, 'i> {
                document_sorter_value_buffer.clear();
                document_sorter_value_buffer.push(Operation::Addition as u8);
                into_del_add_obkv(
-                    KvReaderU16::new(&obkv_buffer),
+                    KvReaderU16::from_slice(&obkv_buffer),
                    DelAddOperation::Addition,
                    &mut document_sorter_value_buffer,
                )?;
@@ -319,14 +323,14 @@ impl<'a, 'i> Transform<'a, 'i> {
                self.original_sorter
                    .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;

-                let flattened_obkv = KvReader::new(&obkv_buffer);
+                let flattened_obkv = KvReader::from_slice(&obkv_buffer);
                if let Some(obkv) =
-                    Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
+                    Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
                {
                    document_sorter_value_buffer.clear();
                    document_sorter_value_buffer.push(Operation::Addition as u8);
                    into_del_add_obkv(
-                        KvReaderU16::new(&obkv),
+                        KvReaderU16::from_slice(&obkv),
                        DelAddOperation::Addition,
                        &mut document_sorter_value_buffer,
                    )?
@@ -519,22 +523,22 @@ impl<'a, 'i> Transform<'a, 'i> {
        document_sorter_value_buffer.clear();
        document_sorter_value_buffer.push(Operation::Deletion as u8);
        into_del_add_obkv(
-            KvReaderU16::new(base_obkv),
+            KvReaderU16::from_slice(base_obkv),
            DelAddOperation::Deletion,
            document_sorter_value_buffer,
        )?;
        self.original_sorter.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;

        // flatten it and push it as to delete in the flattened_sorter
-        let flattened_obkv = KvReader::new(base_obkv);
+        let flattened_obkv = KvReader::from_slice(base_obkv);
        if let Some(obkv) =
-            Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
+            Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
        {
            // we recreate our buffer with the flattened documents
            document_sorter_value_buffer.clear();
            document_sorter_value_buffer.push(Operation::Deletion as u8);
            into_del_add_obkv(
-                KvReaderU16::new(&obkv),
+                KvReaderU16::from_slice(&obkv),
                DelAddOperation::Deletion,
                document_sorter_value_buffer,
            )?;
@@ -552,7 +556,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        target = "indexing::transform"
    )]
    fn flatten_from_fields_ids_map(
-        obkv: &KvReader<'_, FieldId>,
+        obkv: &KvReader<FieldId>,
        fields_ids_map: &mut FieldsIdsMap,
    ) -> Result<Option<Vec<u8>>> {
        if obkv
@@ -720,10 +724,10 @@ impl<'a, 'i> Transform<'a, 'i> {
                total_documents: self.documents_count,
            });

-            for (key, value) in KvReader::new(val) {
-                let reader = KvReaderDelAdd::new(value);
+            for (key, value) in KvReader::from_slice(val) {
+                let reader = KvReaderDelAdd::from_slice(value);
                match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
-                    (None, None) => {}
+                    (None, None) => (),
                    (None, Some(_)) => {
                        // New field
                        let name = self.fields_ids_map.name(key).ok_or(
@@ -837,7 +841,7 @@ impl<'a, 'i> Transform<'a, 'i> {
    /// then fill the provided buffers with delta documents using KvWritterDelAdd.
    #[allow(clippy::too_many_arguments)] // need the vectors + fid, feel free to create a struct xo xo
    fn rebind_existing_document(
-        old_obkv: KvReader<'_, FieldId>,
+        old_obkv: &KvReader<FieldId>,
        settings_diff: &InnerIndexSettingsDiff,
        modified_faceted_fields: &HashSet<String>,
        mut injected_vectors: serde_json::Map<String, serde_json::Value>,
@@ -925,7 +929,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        }

        let data = obkv_writer.into_inner()?;
-        let obkv = KvReader::<FieldId>::new(&data);
+        let obkv = KvReader::<FieldId>::from_slice(&data);

        if let Some(original_obkv_buffer) = original_obkv_buffer {
            original_obkv_buffer.clear();
@@ -935,8 +939,8 @@ impl<'a, 'i> Transform<'a, 'i> {
        if let Some(flattened_obkv_buffer) = flattened_obkv_buffer {
            // take the non-flattened version if flatten_from_fields_ids_map returns None.
            let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
-            let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?;
-            let flattened = flattened.as_deref().map_or(obkv, KvReader::new);
+            let flattened = Self::flatten_from_fields_ids_map(obkv, &mut fields_ids_map)?;
+            let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice);

            flattened_obkv_buffer.clear();
            into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
@@ -979,7 +983,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        let mut original_sorter = if settings_diff.reindex_vectors() {
            Some(create_sorter(
                grenad::SortAlgorithm::Stable,
-                keep_first,
+                KeepFirst,
                self.indexer_settings.chunk_compression_type,
                self.indexer_settings.chunk_compression_level,
                self.indexer_settings.max_nb_chunks,
@@ -989,19 +993,17 @@ impl<'a, 'i> Transform<'a, 'i> {
            None
        };

-        let readers: Result<
-            BTreeMap<&str, (Vec<arroy::Reader<'_, arroy::distances::Angular>>, &RoaringBitmap)>,
-        > = settings_diff
+        let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
            .embedding_config_updates
            .iter()
            .filter_map(|(name, action)| {
-                if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
-                    embedder_id,
-                    user_provided,
-                }) = action
+                if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
+                    action.write_back()
                {
-                    let readers: Result<Vec<_>> =
-                        self.index.arroy_readers(wtxn, *embedder_id).collect();
+                    let readers: Result<Vec<_>> = self
+                        .index
+                        .arroy_readers(wtxn, *embedder_id, action.was_quantized)
+                        .collect();
                    match readers {
                        Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
                        Err(error) => Some(Err(error)),
@@ -1023,7 +1025,7 @@ impl<'a, 'i> Transform<'a, 'i> {
            if settings_diff.reindex_searchable() || settings_diff.reindex_facets() {
                Some(create_sorter(
                    grenad::SortAlgorithm::Stable,
-                    keep_first,
+                    KeepFirst,
                    self.indexer_settings.chunk_compression_type,
                    self.indexer_settings.chunk_compression_level,
                    self.indexer_settings.max_nb_chunks,
@@ -1104,23 +1106,14 @@ impl<'a, 'i> Transform<'a, 'i> {
            }
        }

-        let mut writers = Vec::new();
-
        // delete all vectors from the embedders that need removal
        for (_, (readers, _)) in readers {
            for reader in readers {
-                let dimensions = reader.dimensions();
-                let arroy_index = reader.index();
-                drop(reader);
-                let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions);
-                writers.push(writer);
+                let dimensions = reader.dimensions(wtxn)?;
+                reader.clear(wtxn, dimensions)?;
            }
        }

-        for writer in writers {
-            writer.clear(wtxn)?;
-        }
-
        let grenad_params = GrenadParameters {
            chunk_compression_type: self.indexer_settings.chunk_compression_type,
            chunk_compression_level: self.indexer_settings.chunk_compression_level,
@@ -1162,6 +1155,8 @@ fn drop_and_reuse<U, T>(mut vec: Vec<U>) -> Vec<T> {

 #[cfg(test)]
 mod test {
+    use grenad::MergeFunction;
+
    use super::*;

    #[test]
@@ -1173,21 +1168,21 @@ mod test {
        kv_writer.insert(0_u8, [0]).unwrap();
        let buffer = kv_writer.into_inner().unwrap();
        into_del_add_obkv(
-            KvReaderU16::new(&buffer),
+            KvReaderU16::from_slice(&buffer),
            DelAddOperation::Addition,
            &mut additive_doc_0,
        )
        .unwrap();
        additive_doc_0.insert(0, Operation::Addition as u8);
        into_del_add_obkv(
-            KvReaderU16::new(&buffer),
+            KvReaderU16::from_slice(&buffer),
            DelAddOperation::Deletion,
            &mut deletive_doc_0,
        )
        .unwrap();
        deletive_doc_0.insert(0, Operation::Deletion as u8);
        into_del_add_obkv(
-            KvReaderU16::new(&buffer),
+            KvReaderU16::from_slice(&buffer),
            DelAddOperation::DeletionAndAddition,
            &mut del_add_doc_0,
        )
@@ -1199,7 +1194,7 @@ mod test {
        kv_writer.insert(1_u8, [1]).unwrap();
        let buffer = kv_writer.into_inner().unwrap();
        into_del_add_obkv(
-            KvReaderU16::new(&buffer),
+            KvReaderU16::from_slice(&buffer),
            DelAddOperation::Addition,
            &mut additive_doc_1,
        )
@@ -1212,32 +1207,39 @@ mod test {
        kv_writer.insert(1_u8, [1]).unwrap();
        let buffer = kv_writer.into_inner().unwrap();
        into_del_add_obkv(
-            KvReaderU16::new(&buffer),
+            KvReaderU16::from_slice(&buffer),
            DelAddOperation::Addition,
            &mut additive_doc_0_1,
        )
        .unwrap();
        additive_doc_0_1.insert(0, Operation::Addition as u8);

-        let ret = obkvs_merge_additions_and_deletions(&[], &[Cow::from(additive_doc_0.as_slice())])
-            .unwrap();
+        let ret = MergeFunction::merge(
+            &ObkvsMergeAdditionsAndDeletions,
+            &[],
+            &[Cow::from(additive_doc_0.as_slice())],
+        )
+        .unwrap();
        assert_eq!(*ret, additive_doc_0);

-        let ret = obkvs_merge_additions_and_deletions(
+        let ret = MergeFunction::merge(
+            &ObkvsMergeAdditionsAndDeletions,
            &[],
            &[Cow::from(deletive_doc_0.as_slice()), Cow::from(additive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, del_add_doc_0);

-        let ret = obkvs_merge_additions_and_deletions(
+        let ret = MergeFunction::merge(
+            &ObkvsMergeAdditionsAndDeletions,
            &[],
            &[Cow::from(additive_doc_0.as_slice()), Cow::from(deletive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, deletive_doc_0);

-        let ret = obkvs_merge_additions_and_deletions(
+        let ret = MergeFunction::merge(
+            &ObkvsMergeAdditionsAndDeletions,
            &[],
            &[
                Cow::from(additive_doc_1.as_slice()),
@@ -1248,21 +1250,24 @@ mod test {
        .unwrap();
        assert_eq!(*ret, del_add_doc_0);

-        let ret = obkvs_merge_additions_and_deletions(
+        let ret = MergeFunction::merge(
+            &ObkvsMergeAdditionsAndDeletions,
            &[],
            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, additive_doc_0_1);

-        let ret = obkvs_keep_last_addition_merge_deletions(
+        let ret = MergeFunction::merge(
+            &ObkvsKeepLastAdditionMergeDeletions,
            &[],
            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
        )
        .unwrap();
        assert_eq!(*ret, additive_doc_0);

-        let ret = obkvs_keep_last_addition_merge_deletions(
+        let ret = MergeFunction::merge(
+            &ObkvsKeepLastAdditionMergeDeletions,
            &[],
            &[
                Cow::from(deletive_doc_0.as_slice()),
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -4,18 +4,17 @@ use std::fs::File;
 use std::io::{self, BufReader};

 use bytemuck::allocation::pod_collect_to_vec;
-use grenad::{Merger, MergerBuilder};
+use grenad::{MergeFunction, Merger, MergerBuilder};
 use heed::types::Bytes;
 use heed::{BytesDecode, RwTxn};
 use obkv::{KvReader, KvWriter};
 use roaring::RoaringBitmap;

 use super::helpers::{
-    self, keep_first, merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
-    merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, merge_ignore_values, valid_lmdb_key,
-    CursorClonableMmap,
+    self, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
+    CursorClonableMmap, KeepFirst, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
+    MergeIgnoreValues,
 };
-use super::MergeFn;
 use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
 use crate::facet::FacetType;
 use crate::index::db_name::DOCUMENTS;
@@ -24,9 +23,10 @@ use crate::proximity::MAX_DISTANCE;
 use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
 use crate::update::facet::FacetsUpdate;
 use crate::update::index_documents::helpers::{
-    as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
+    as_cloneable_grenad, try_split_array_at, KeepLatestObkv,
 };
 use crate::update::settings::InnerIndexSettingsDiff;
+use crate::vector::ArroyWrapper;
 use crate::{
    lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
    Result, SerializationError, U8StrStrCodec,
@@ -140,7 +140,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let vectors_fid =
                fields_ids_map.id(crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);

-            let mut builder = MergerBuilder::new(keep_latest_obkv as MergeFn);
+            let mut builder = MergerBuilder::new(KeepLatestObkv);
            for typed_chunk in typed_chunks {
                let TypedChunk::Documents(chunk) = typed_chunk else {
                    unreachable!();
@@ -162,7 +162,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let mut vectors_buffer = Vec::new();
            while let Some((key, reader)) = iter.next()? {
                let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
-                let reader: KvReader<'_, FieldId> = KvReader::new(reader);
+                let reader: &KvReader<FieldId> = reader.into();

                let (document_id_bytes, external_id_bytes) = try_split_array_at(key)
                    .ok_or(SerializationError::Decoding { db_name: Some(DOCUMENTS) })?;
@@ -170,7 +170,7 @@ pub(crate) fn write_typed_chunk_into_index(
                let external_id = std::str::from_utf8(external_id_bytes)?;

                for (field_id, value) in reader.iter() {
-                    let del_add_reader = KvReaderDelAdd::new(value);
+                    let del_add_reader = KvReaderDelAdd::from_slice(value);

                    if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
                        let addition = if vectors_fid == Some(field_id) {
@@ -234,7 +234,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdWordCountDocids(chunk) = typed_chunk else {
                    unreachable!();
@@ -257,13 +257,10 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
            let _entered = span.enter();

-            let mut word_docids_builder =
-                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
-            let mut exact_word_docids_builder =
-                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
-            let mut word_fid_docids_builder =
-                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
-            let mut fst_merger_builder = MergerBuilder::new(merge_ignore_values as MergeFn);
+            let mut word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut exact_word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut word_fid_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            let mut fst_merger_builder = MergerBuilder::new(MergeIgnoreValues);
            for typed_chunk in typed_chunks {
                let TypedChunk::WordDocids {
                    word_docids_reader,
@@ -328,7 +325,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            for typed_chunk in typed_chunks {
                let TypedChunk::WordPositionDocids(chunk) = typed_chunk else {
                    unreachable!();
@@ -352,7 +349,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            let mut data_size = 0;
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids) = typed_chunk
@@ -374,10 +371,9 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
            let _entered = span.enter();

-            let mut facet_id_string_builder =
-                MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut facet_id_string_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            let mut normalized_facet_id_string_builder =
-                MergerBuilder::new(merge_deladd_btreeset_string as MergeFn);
+                MergerBuilder::new(MergeDeladdBtreesetString);
            let mut data_size = 0;
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetStringDocids((
@@ -411,7 +407,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetExistsDocids(chunk) = typed_chunk else {
                    unreachable!();
@@ -435,7 +431,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetIsNullDocids(chunk) = typed_chunk else {
                    unreachable!();
@@ -458,7 +454,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdFacetIsEmptyDocids(chunk) = typed_chunk else {
                    unreachable!();
@@ -482,7 +478,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
            for typed_chunk in typed_chunks {
                let TypedChunk::WordPairProximityDocids(chunk) = typed_chunk else {
                    unreachable!();
@@ -515,7 +511,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(keep_first as MergeFn);
+            let mut builder = MergerBuilder::new(KeepFirst);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdDocidFacetNumbers(chunk) = typed_chunk else {
                    unreachable!();
@@ -529,7 +525,7 @@ pub(crate) fn write_typed_chunk_into_index(
                index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
            let mut iter = merger.into_stream_merger_iter()?;
            while let Some((key, value)) = iter.next()? {
-                let reader = KvReaderDelAdd::new(value);
+                let reader = KvReaderDelAdd::from_slice(value);
                if valid_lmdb_key(key) {
                    match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
                        (None, None) => {}
@@ -549,7 +545,7 @@ pub(crate) fn write_typed_chunk_into_index(
                tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(keep_first as MergeFn);
+            let mut builder = MergerBuilder::new(KeepFirst);
            for typed_chunk in typed_chunks {
                let TypedChunk::FieldIdDocidFacetStrings(chunk) = typed_chunk else {
                    unreachable!();
@@ -563,7 +559,7 @@ pub(crate) fn write_typed_chunk_into_index(
                index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
            let mut iter = merger.into_stream_merger_iter()?;
            while let Some((key, value)) = iter.next()? {
-                let reader = KvReaderDelAdd::new(value);
+                let reader = KvReaderDelAdd::from_slice(value);
                if valid_lmdb_key(key) {
                    match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
                        (None, None) => {}
@@ -582,7 +578,7 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "geo_points");
            let _entered = span.enter();

-            let mut builder = MergerBuilder::new(keep_first as MergeFn);
+            let mut builder = MergerBuilder::new(KeepFirst);
            for typed_chunk in typed_chunks {
                let TypedChunk::GeoPoints(chunk) = typed_chunk else {
                    unreachable!();
@@ -600,7 +596,7 @@ pub(crate) fn write_typed_chunk_into_index(
                // convert the key back to a u32 (4 bytes)
                let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();

-                let deladd_obkv = KvReaderDelAdd::new(value);
+                let deladd_obkv = KvReaderDelAdd::from_slice(value);
                if let Some(value) = deladd_obkv.get(DelAdd::Deletion) {
                    let geopoint = extract_geo_point(value, docid);
                    rtree.remove(&geopoint);
@@ -619,9 +615,9 @@ pub(crate) fn write_typed_chunk_into_index(
            let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
            let _entered = span.enter();

-            let mut remove_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
-            let mut manual_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
-            let mut embeddings_builder = MergerBuilder::new(keep_first as MergeFn);
+            let mut remove_vectors_builder = MergerBuilder::new(KeepFirst);
+            let mut manual_vectors_builder = MergerBuilder::new(KeepFirst);
+            let mut embeddings_builder = MergerBuilder::new(KeepFirst);
            let mut add_to_user_provided = RoaringBitmap::new();
            let mut remove_from_user_provided = RoaringBitmap::new();
            let mut params = None;
@@ -666,9 +662,14 @@ pub(crate) fn write_typed_chunk_into_index(
            let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
                InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
            )?;
+            let binary_quantized = settings_diff
+                .old
+                .embedding_configs
+                .get(&embedder_name)
+                .map_or(false, |conf| conf.2);
            // FIXME: allow customizing distance
            let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
-                .map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension))
+                .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
                .collect();

            // remove vectors for docids we want them removed
@@ -679,7 +680,7 @@ pub(crate) fn write_typed_chunk_into_index(

                for writer in &writers {
                    // Uses invariant: vectors are packed in the first writers.
-                    if !writer.del_item(wtxn, docid)? {
+                    if !writer.del_item(wtxn, expected_dimension, docid)? {
                        break;
                    }
                }
@@ -711,7 +712,7 @@ pub(crate) fn write_typed_chunk_into_index(
                    )));
                }
                for (embedding, writer) in embeddings.iter().zip(&writers) {
-                    writer.add_item(wtxn, docid, embedding)?;
+                    writer.add_item(wtxn, expected_dimension, docid, embedding)?;
                }
            }

@@ -723,7 +724,7 @@ pub(crate) fn write_typed_chunk_into_index(
                let (left, _index) = try_split_array_at(key).unwrap();
                let docid = DocumentId::from_be_bytes(left);

-                let vector_deladd_obkv = KvReaderDelAdd::new(value);
+                let vector_deladd_obkv = KvReaderDelAdd::from_slice(value);
                if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
                    let vector: Vec<f32> = pod_collect_to_vec(value);

@@ -734,7 +735,7 @@ pub(crate) fn write_typed_chunk_into_index(
                            break;
                        };
                        if candidate == vector {
-                            writer.del_item(wtxn, docid)?;
+                            writer.del_item(wtxn, expected_dimension, docid)?;
                            deleted_index = Some(index);
                        }
                    }
@@ -751,8 +752,13 @@ pub(crate) fn write_typed_chunk_into_index(
                        if let Some((last_index, vector)) = last_index_with_a_vector {
                            // unwrap: computed the index from the list of writers
                            let writer = writers.get(last_index).unwrap();
-                            writer.del_item(wtxn, docid)?;
-                            writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?;
+                            writer.del_item(wtxn, expected_dimension, docid)?;
+                            writers.get(deleted_index).unwrap().add_item(
+                                wtxn,
+                                expected_dimension,
+                                docid,
+                                &vector,
+                            )?;
                        }
                    }
                }
@@ -762,8 +768,8 @@ pub(crate) fn write_typed_chunk_into_index(

                    // overflow was detected during vector extraction.
                    for writer in &writers {
-                        if !writer.contains_item(wtxn, docid)? {
-                            writer.add_item(wtxn, docid, &vector)?;
+                        if !writer.contains_item(wtxn, expected_dimension, docid)? {
+                            writer.add_item(wtxn, expected_dimension, docid, &vector)?;
                            break;
                        }
                    }
@@ -786,9 +792,13 @@ fn extract_geo_point(value: &[u8], docid: DocumentId) -> GeoPoint {
    GeoPoint::new(xyz_point, (docid, point))
 }

-fn merge_word_docids_reader_into_fst(
-    merger: Merger<CursorClonableMmap, MergeFn>,
-) -> Result<fst::Set<Vec<u8>>> {
+fn merge_word_docids_reader_into_fst<MF>(
+    merger: Merger<CursorClonableMmap, MF>,
+) -> Result<fst::Set<Vec<u8>>>
+where
+    MF: MergeFunction,
+    crate::Error: From<MF::Error>,
+{
    let mut iter = merger.into_stream_merger_iter()?;
    let mut builder = fst::SetBuilder::memory();

@@ -802,8 +812,8 @@ fn merge_word_docids_reader_into_fst(
 /// Write provided entries in database using serialize_value function.
 /// merge_values function is used if an entry already exist in the database.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
-fn write_entries_into_database<R, K, V, FS, FM>(
-    merger: Merger<R, MergeFn>,
+fn write_entries_into_database<R, K, V, FS, FM, MF>(
+    merger: Merger<R, MF>,
    database: &heed::Database<K, V>,
    wtxn: &mut RwTxn<'_>,
    serialize_value: FS,
@@ -813,6 +823,8 @@ where
    R: io::Read + io::Seek,
    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
+    MF: MergeFunction,
+    crate::Error: From<MF::Error>,
 {
    let mut buffer = Vec::new();
    let database = database.remap_types::<Bytes, Bytes>();
@@ -839,20 +851,22 @@ where
 /// Akin to the `write_entries_into_database` function but specialized
 /// for the case when we only index additional searchable fields only.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
-fn write_proximity_entries_into_database_additional_searchables<R>(
-    merger: Merger<R, MergeFn>,
+fn write_proximity_entries_into_database_additional_searchables<R, MF>(
+    merger: Merger<R, MF>,
    database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
    wtxn: &mut RwTxn<'_>,
 ) -> Result<()>
 where
    R: io::Read + io::Seek,
+    MF: MergeFunction,
+    crate::Error: From<MF::Error>,
 {
    let mut iter = merger.into_stream_merger_iter()?;
    while let Some((key, value)) = iter.next()? {
        if valid_lmdb_key(key) {
            let (proximity_to_insert, word1, word2) =
                U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
-            let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) {
+            let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
                Some(value) => {
                    CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
                }
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@@ -1,11 +1,9 @@
-pub use self::available_documents_ids::AvailableDocumentsIds;
+pub use self::available_ids::AvailableIds;
 pub use self::clear_documents::ClearDocuments;
+pub use self::concurrent_available_ids::ConcurrentAvailableIds;
 pub use self::facet::bulk::FacetsUpdateBulk;
 pub use self::facet::incremental::FacetsUpdateIncrementalInner;
-pub use self::index_documents::{
-    merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId,
-    IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn,
-};
+pub use self::index_documents::*;
 pub use self::indexer_config::IndexerConfig;
 pub use self::settings::{validate_embedding_settings, Setting, Settings};
 pub use self::update_step::UpdateIndexingStep;
@@ -13,12 +11,14 @@ pub use self::word_prefix_docids::WordPrefixDocids;
 pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids;
 pub use self::words_prefixes_fst::WordsPrefixesFst;

-mod available_documents_ids;
+mod available_ids;
 mod clear_documents;
+mod concurrent_available_ids;
 pub(crate) mod del_add;
 pub(crate) mod facet;
 mod index_documents;
 mod indexer_config;
+pub mod new;
 mod settings;
 mod update_step;
 mod word_prefix_docids;
--- a/milli/src/update/new/channel.rs
+++ b/milli/src/update/new/channel.rs
@@ -0,0 +1,522 @@
+use std::marker::PhantomData;
+use std::sync::atomic::Ordering;
+
+use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
+use heed::types::Bytes;
+use memmap2::Mmap;
+
+use super::extract::{FacetKind, HashMapMerger};
+use super::StdResult;
+use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY};
+use crate::update::new::KvReaderFieldId;
+use crate::{DocumentId, Index};
+
+/// The capacity of the channel is currently in number of messages.
+pub fn merger_writer_channel(cap: usize) -> (MergerSender, WriterReceiver) {
+    let (sender, receiver) = crossbeam_channel::bounded(cap);
+    (
+        MergerSender {
+            sender,
+            send_count: Default::default(),
+            writer_contentious_count: Default::default(),
+            merger_contentious_count: Default::default(),
+        },
+        WriterReceiver(receiver),
+    )
+}
+
+/// The capacity of the channel is currently in number of messages.
+pub fn extractors_merger_channels(cap: usize) -> (ExtractorSender, MergerReceiver) {
+    let (sender, receiver) = crossbeam_channel::bounded(cap);
+    (ExtractorSender(sender), MergerReceiver(receiver))
+}
+
+pub enum KeyValueEntry {
+    SmallInMemory { key_length: usize, data: Box<[u8]> },
+    LargeOnDisk { key: Box<[u8]>, value: Mmap },
+}
+
+impl KeyValueEntry {
+    pub fn from_small_key_value(key: &[u8], value: &[u8]) -> Self {
+        let mut data = Vec::with_capacity(key.len() + value.len());
+        data.extend_from_slice(key);
+        data.extend_from_slice(value);
+        KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() }
+    }
+
+    pub fn from_large_key_value(key: &[u8], value: Mmap) -> Self {
+        KeyValueEntry::LargeOnDisk { key: key.to_vec().into_boxed_slice(), value }
+    }
+
+    pub fn key(&self) -> &[u8] {
+        match self {
+            KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[..*key_length],
+            KeyValueEntry::LargeOnDisk { key, value: _ } => key.as_ref(),
+        }
+    }
+
+    pub fn value(&self) -> &[u8] {
+        match self {
+            KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[*key_length..],
+            KeyValueEntry::LargeOnDisk { key: _, value } => value.as_ref(),
+        }
+    }
+}
+
+pub struct KeyEntry {
+    data: Box<[u8]>,
+}
+
+impl KeyEntry {
+    pub fn from_key(key: &[u8]) -> Self {
+        KeyEntry { data: key.to_vec().into_boxed_slice() }
+    }
+
+    pub fn entry(&self) -> &[u8] {
+        self.data.as_ref()
+    }
+}
+
+pub enum EntryOperation {
+    Delete(KeyEntry),
+    Write(KeyValueEntry),
+}
+
+pub struct DocumentEntry {
+    docid: DocumentId,
+    content: Box<[u8]>,
+}
+
+impl DocumentEntry {
+    pub fn new_uncompressed(docid: DocumentId, content: Box<KvReaderFieldId>) -> Self {
+        DocumentEntry { docid, content: content.into() }
+    }
+
+    pub fn new_compressed(docid: DocumentId, content: Box<[u8]>) -> Self {
+        DocumentEntry { docid, content }
+    }
+
+    pub fn key(&self) -> [u8; 4] {
+        self.docid.to_be_bytes()
+    }
+
+    pub fn content(&self) -> &[u8] {
+        &self.content
+    }
+}
+
+pub struct DocumentDeletionEntry(DocumentId);
+
+impl DocumentDeletionEntry {
+    pub fn key(&self) -> [u8; 4] {
+        self.0.to_be_bytes()
+    }
+}
+
+pub struct WriterOperation {
+    database: Database,
+    entry: EntryOperation,
+}
+
+pub enum Database {
+    Documents,
+    ExactWordDocids,
+    FidWordCountDocids,
+    Main,
+    WordDocids,
+    WordFidDocids,
+    WordPairProximityDocids,
+    WordPositionDocids,
+    FacetIdIsNullDocids,
+    FacetIdIsEmptyDocids,
+    FacetIdExistsDocids,
+    FacetIdF64NumberDocids,
+    FacetIdStringDocids,
+}
+
+impl Database {
+    pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
+        match self {
+            Database::Documents => index.documents.remap_types(),
+            Database::ExactWordDocids => index.exact_word_docids.remap_types(),
+            Database::Main => index.main.remap_types(),
+            Database::WordDocids => index.word_docids.remap_types(),
+            Database::WordFidDocids => index.word_fid_docids.remap_types(),
+            Database::WordPositionDocids => index.word_position_docids.remap_types(),
+            Database::FidWordCountDocids => index.field_id_word_count_docids.remap_types(),
+            Database::WordPairProximityDocids => index.word_pair_proximity_docids.remap_types(),
+            Database::FacetIdIsNullDocids => index.facet_id_is_null_docids.remap_types(),
+            Database::FacetIdIsEmptyDocids => index.facet_id_is_empty_docids.remap_types(),
+            Database::FacetIdExistsDocids => index.facet_id_exists_docids.remap_types(),
+            Database::FacetIdF64NumberDocids => index.facet_id_f64_docids.remap_types(),
+            Database::FacetIdStringDocids => index.facet_id_string_docids.remap_types(),
+        }
+    }
+}
+
+impl WriterOperation {
+    pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
+        self.database.database(index)
+    }
+
+    pub fn entry(self) -> EntryOperation {
+        self.entry
+    }
+}
+
+pub struct WriterReceiver(Receiver<WriterOperation>);
+
+impl IntoIterator for WriterReceiver {
+    type Item = WriterOperation;
+    type IntoIter = IntoIter<Self::Item>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
+pub struct MergerSender {
+    sender: Sender<WriterOperation>,
+    /// The number of message we send in total in the channel.
+    send_count: std::sync::atomic::AtomicUsize,
+    /// The number of times we sent something in a channel that was full.
+    writer_contentious_count: std::sync::atomic::AtomicUsize,
+    /// The number of times we sent something in a channel that was empty.
+    merger_contentious_count: std::sync::atomic::AtomicUsize,
+}
+
+impl Drop for MergerSender {
+    fn drop(&mut self) {
+        eprintln!(
+            "Merger channel stats: {} sends, {} writer contentions ({}%), {} merger contentions ({}%)",
+            self.send_count.load(Ordering::SeqCst),
+            self.writer_contentious_count.load(Ordering::SeqCst),
+            (self.writer_contentious_count.load(Ordering::SeqCst) as f32 / self.send_count.load(Ordering::SeqCst) as f32) * 100.0,
+            self.merger_contentious_count.load(Ordering::SeqCst),
+            (self.merger_contentious_count.load(Ordering::SeqCst) as f32 / self.send_count.load(Ordering::SeqCst) as f32) * 100.0
+        )
+    }
+}
+
+impl MergerSender {
+    pub fn main(&self) -> MainSender<'_> {
+        MainSender(self)
+    }
+
+    pub fn docids<D: DatabaseType>(&self) -> WordDocidsSender<'_, D> {
+        WordDocidsSender { sender: self, _marker: PhantomData }
+    }
+
+    pub fn facet_docids(&self) -> FacetDocidsSender<'_> {
+        FacetDocidsSender { sender: self }
+    }
+
+    pub fn documents(&self) -> DocumentsSender<'_> {
+        DocumentsSender(self)
+    }
+
+    pub fn send_documents_ids(&self, bitmap: &[u8]) -> StdResult<(), SendError<()>> {
+        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
+            DOCUMENTS_IDS_KEY.as_bytes(),
+            bitmap,
+        ));
+        match self.send(WriterOperation { database: Database::Main, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+
+    fn send(&self, op: WriterOperation) -> StdResult<(), SendError<()>> {
+        if self.sender.is_full() {
+            self.writer_contentious_count.fetch_add(1, Ordering::SeqCst);
+        }
+        if self.sender.is_empty() {
+            self.merger_contentious_count.fetch_add(1, Ordering::SeqCst);
+        }
+        self.send_count.fetch_add(1, Ordering::SeqCst);
+        match self.sender.send(op) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+}
+
+pub struct MainSender<'a>(&'a MergerSender);
+
+impl MainSender<'_> {
+    pub fn write_words_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> {
+        let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value(
+            WORDS_FST_KEY.as_bytes(),
+            value,
+        ));
+        match self.0.send(WriterOperation { database: Database::Main, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+
+    pub fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
+        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
+        match self.0.send(WriterOperation { database: Database::Main, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+}
+
+pub enum ExactWordDocids {}
+pub enum FidWordCountDocids {}
+pub enum WordDocids {}
+pub enum WordFidDocids {}
+pub enum WordPairProximityDocids {}
+pub enum WordPositionDocids {}
+pub enum FacetDocids {}
+
+pub trait DatabaseType {
+    const DATABASE: Database;
+}
+
+pub trait MergerOperationType {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation;
+}
+
+impl DatabaseType for ExactWordDocids {
+    const DATABASE: Database = Database::ExactWordDocids;
+}
+
+impl MergerOperationType for ExactWordDocids {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
+        MergerOperation::ExactWordDocidsMerger(merger)
+    }
+}
+
+impl DatabaseType for FidWordCountDocids {
+    const DATABASE: Database = Database::FidWordCountDocids;
+}
+
+impl MergerOperationType for FidWordCountDocids {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
+        MergerOperation::FidWordCountDocidsMerger(merger)
+    }
+}
+
+impl DatabaseType for WordDocids {
+    const DATABASE: Database = Database::WordDocids;
+}
+
+impl MergerOperationType for WordDocids {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
+        MergerOperation::WordDocidsMerger(merger)
+    }
+}
+
+impl DatabaseType for WordFidDocids {
+    const DATABASE: Database = Database::WordFidDocids;
+}
+
+impl MergerOperationType for WordFidDocids {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
+        MergerOperation::WordFidDocidsMerger(merger)
+    }
+}
+
+impl DatabaseType for WordPairProximityDocids {
+    const DATABASE: Database = Database::WordPairProximityDocids;
+}
+
+impl MergerOperationType for WordPairProximityDocids {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
+        MergerOperation::WordPairProximityDocidsMerger(merger)
+    }
+}
+
+impl DatabaseType for WordPositionDocids {
+    const DATABASE: Database = Database::WordPositionDocids;
+}
+
+impl MergerOperationType for WordPositionDocids {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
+        MergerOperation::WordPositionDocidsMerger(merger)
+    }
+}
+
+impl MergerOperationType for FacetDocids {
+    fn new_merger_operation(merger: HashMapMerger) -> MergerOperation {
+        MergerOperation::FacetDocidsMerger(merger)
+    }
+}
+
+pub trait DocidsSender {
+    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>;
+    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>;
+}
+
+pub struct WordDocidsSender<'a, D> {
+    sender: &'a MergerSender,
+    _marker: PhantomData<D>,
+}
+
+impl<D: DatabaseType> DocidsSender for WordDocidsSender<'_, D> {
+    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
+        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
+        match self.sender.send(WriterOperation { database: D::DATABASE, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+
+    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
+        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
+        match self.sender.send(WriterOperation { database: D::DATABASE, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+}
+
+pub struct FacetDocidsSender<'a> {
+    sender: &'a MergerSender,
+}
+
+impl DocidsSender for FacetDocidsSender<'_> {
+    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
+        let (database, key) = self.extract_database(key);
+        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
+        match self.sender.send(WriterOperation { database, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+
+    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
+        let (database, key) = self.extract_database(key);
+        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
+        match self.sender.send(WriterOperation { database, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+}
+
+impl FacetDocidsSender<'_> {
+    fn extract_database<'a>(&self, key: &'a [u8]) -> (Database, &'a [u8]) {
+        let database = match FacetKind::from(key[0]) {
+            FacetKind::Number => Database::FacetIdF64NumberDocids,
+            FacetKind::String => Database::FacetIdStringDocids,
+            FacetKind::Null => Database::FacetIdIsNullDocids,
+            FacetKind::Empty => Database::FacetIdIsEmptyDocids,
+            FacetKind::Exists => Database::FacetIdExistsDocids,
+        };
+        (database, &key[1..])
+    }
+}
+
+pub struct DocumentsSender<'a>(&'a MergerSender);
+
+impl DocumentsSender<'_> {
+    /// TODO do that efficiently
+    pub fn uncompressed(
+        &self,
+        docid: DocumentId,
+        document: &KvReaderFieldId,
+    ) -> StdResult<(), SendError<()>> {
+        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
+            &docid.to_be_bytes(),
+            document.as_bytes(),
+        ));
+        match self.0.send(WriterOperation { database: Database::Documents, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+
+    pub fn delete(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
+        let entry = EntryOperation::Delete(KeyEntry::from_key(&docid.to_be_bytes()));
+        match self.0.send(WriterOperation { database: Database::Documents, entry }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+}
+
+pub enum MergerOperation {
+    ExactWordDocidsMerger(HashMapMerger),
+    FidWordCountDocidsMerger(HashMapMerger),
+    WordDocidsMerger(HashMapMerger),
+    WordFidDocidsMerger(HashMapMerger),
+    WordPairProximityDocidsMerger(HashMapMerger),
+    WordPositionDocidsMerger(HashMapMerger),
+    FacetDocidsMerger(HashMapMerger),
+    DeleteDocument { docid: DocumentId },
+    InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> },
+    FinishedDocument,
+}
+
+pub struct MergerReceiver(Receiver<MergerOperation>);
+
+impl IntoIterator for MergerReceiver {
+    type Item = MergerOperation;
+    type IntoIter = IntoIter<Self::Item>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
+pub struct ExtractorSender(Sender<MergerOperation>);
+
+impl ExtractorSender {
+    pub fn document_sender(&self) -> DocumentSender<'_> {
+        DocumentSender(Some(&self.0))
+    }
+
+    pub fn send_searchable<D: MergerOperationType>(
+        &self,
+        merger: HashMapMerger,
+    ) -> StdResult<(), SendError<()>> {
+        match self.0.send(D::new_merger_operation(merger)) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+}
+
+pub struct DocumentSender<'a>(Option<&'a Sender<MergerOperation>>);
+
+impl DocumentSender<'_> {
+    pub fn insert(
+        &self,
+        docid: DocumentId,
+        document: Box<KvReaderFieldId>,
+    ) -> StdResult<(), SendError<()>> {
+        let sender = self.0.unwrap();
+        match sender.send(MergerOperation::InsertDocument { docid, document }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+
+    pub fn delete(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
+        let sender = self.0.unwrap();
+        match sender.send(MergerOperation::DeleteDocument { docid }) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+
+    pub fn finish(mut self) -> StdResult<(), SendError<()>> {
+        let sender = self.0.take().unwrap();
+        match sender.send(MergerOperation::FinishedDocument) {
+            Ok(()) => Ok(()),
+            Err(SendError(_)) => Err(SendError(())),
+        }
+    }
+}
+
+impl Drop for DocumentSender<'_> {
+    fn drop(&mut self) {
+        if let Some(sender) = self.0.take() {
+            sender.send(MergerOperation::FinishedDocument);
+        }
+    }
+}
--- a/Show More
+++ b/Show More