Fix benches

Fix clippy issues
Correctly mark non-tests as non-tests
2025-07-18 20:30:47 +00:00 · 2023-11-06 11:56:46 +01:00 · 2023-11-06 11:19:31 +01:00 · 2023-11-06 11:03:56 +01:00 · 2023-11-06 10:46:22 +01:00 · 2023-11-06 10:31:14 +01:00
206 changed files with 3022 additions and 5577 deletions
--- a/.github/ISSUE_TEMPLATE/sprint_issue.md
+++ b/.github/ISSUE_TEMPLATE/sprint_issue.md
@ -7,19 +7,17 @@ assignees: ''

 ---

-Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
+Related product team resources: [PRD]() (_internal only_)
 Related product discussion:
 Related spec: WIP

 ## Motivation

-<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
+<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->

 ## Usage

-<!---Write a quick description of the usage if the usage has already been defined-->
-
-Refer to the final spec to know the details and the final decisions about the usage.
+<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->

 ## TODO

--- a/.github/workflows/trigger-benchmarks-on-message.yml
+++ b/.github/workflows/trigger-benchmarks-on-message.yml
@ -8,11 +8,11 @@ env:

 jobs:
  run-benchmarks-on-comment:
+    if: startsWith(github.event.comment.body, '/benchmark')
    name: Run and upload benchmarks
    runs-on: benchmarks
    timeout-minutes: 4320 # 72h
    steps:
-      - uses: actions/checkout@v3
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
@ -27,14 +27,25 @@ jobs:
          reaction-type: "eyes"
          repo-token: ${{ env.GH_TOKEN }}

+      - uses: xt0rted/pull-request-comment-branch@v2
+        id: comment-branch
+        with:
+          repo_token: ${{ env.GH_TOKEN }}
+
+      - uses: actions/checkout@v3
+        if: success()
+        with:
+          fetch-depth: 0 # fetch full history to be able to get main commit sha
+          ref: ${{ steps.comment-branch.outputs.head_ref }}
+
      # Set variables
      - name: Set current branch name
        shell: bash
-        run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
        id: current_branch
      - name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
        shell: bash
-        run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
        id: normalized_current_branch
      - name: Set shorter commit SHA
        shell: bash
@ -76,9 +87,11 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
        run: |
-          export base=$(git log --pretty=%p -n 1)
+          set -x
+          export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
+          export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
          echo 'Here are your benchmarks diff 👊' >> body.txt
          echo '```' >> body.txt
-          ./benchmarks/scripts/compare.sh $base ${{ steps.file.outputs.basename }}.json >> body.txt
+          ./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
          echo '```' >> body.txt
-          gh pr comment ${GITHUB_REF#refs/heads/} --body-file body.txt
+          gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
--- a/Cargo.lock
+++ b/Cargo.lock
@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"

 [[package]]
 name = "benchmarks"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "anyhow",
 "bytes",
@ -1206,7 +1206,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "anyhow",
 "big_s",
@ -1417,7 +1417,7 @@ dependencies = [

 [[package]]
 name = "file-store"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "faux",
 "tempfile",
@ -1439,7 +1439,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "insta",
 "nom",
@ -1459,7 +1459,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "criterion",
 "serde_json",
@ -1577,7 +1577,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "arbitrary",
 "clap",
@ -1663,12 +1663,13 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"

 [[package]]
 name = "grenad"
-version = "0.4.4"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5232b2d157b7bf63d7abe1b12177039e58db2f29e377517c0cdee1578cca4c93"
+checksum = "6a007932af5475ebb5c63bef8812bb1c36f317983bb4ca663e9d6dd58d6a0f8c"
 dependencies = [
 "bytemuck",
 "byteorder",
+ "rayon",
 "tempfile",
 ]

@ -1891,7 +1892,7 @@ dependencies = [

 [[package]]
 name = "index-scheduler"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "anyhow",
 "big_s",
@ -2088,7 +2089,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "criterion",
 "serde_json",
@ -2500,7 +2501,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "insta",
 "md5",
@ -2509,7 +2510,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "actix-cors",
 "actix-http",
@ -2564,7 +2565,6 @@ dependencies = [
 "platform-dirs",
 "prometheus",
 "puffin",
- "puffin_http",
 "rand",
 "rayon",
 "regex",
@ -2600,7 +2600,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "base64 0.21.2",
 "enum-iterator",
@ -2619,7 +2619,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "actix-web",
 "anyhow",
@ -2673,7 +2673,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "big_s",
 "bimap",
@ -2867,9 +2867,9 @@ dependencies = [

 [[package]]
 name = "obkv"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385"
+checksum = "6c459142426056c639ff88d053ebaaaeca0ee1411c94362892398ef4ccd81080"

 [[package]]
 name = "once_cell"
@ -2996,7 +2996,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.4.0"
+version = "1.4.1"
 dependencies = [
 "big_s",
 "serde_json",
@ -3194,7 +3194,7 @@ dependencies = [
 "byteorder",
 "hex",
 "lazy_static",
- "rustix 0.36.15",
+ "rustix 0.36.16",
 ]

 [[package]]
@ -3237,18 +3237,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "puffin_http"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
-dependencies = [
- "anyhow",
- "crossbeam-channel",
- "log",
- "puffin",
-]
-
 [[package]]
 name = "quote"
 version = "1.0.32"
@ -3479,9 +3467,9 @@ dependencies = [

 [[package]]
 name = "rustix"
-version = "0.36.15"
+version = "0.36.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c37f1bd5ef1b5422177b7646cba67430579cfe2ace80f284fee876bca52ad941"
+checksum = "6da3636faa25820d8648e0e31c5d519bbb01f72fdf57131f0f5f7da5fed36eab"
 dependencies = [
 "bitflags 1.3.2",
 "errno",
@ -4444,9 +4432,9 @@ dependencies = [

 [[package]]
 name = "webpki"
-version = "0.22.1"
+version = "0.22.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e"
+checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f"
 dependencies = [
 "ring",
 "untrusted",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -18,7 +18,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.0"
+version = "1.4.1"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/PROFILING.md
+++ b/PROFILING.md
@ -1,14 +1,14 @@
 # Profiling Meilisearch

-Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
+Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).

 ![An example profiling with Puffin viewer](assets/profiling-example.png)

 ## Profiling the Indexing Process

-When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
+When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.

-Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
+[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.

 Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.

--- a/README.md
+++ b/README.md
@ -25,6 +25,12 @@

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

+---
+
+### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
+
+---
+
 Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.

 <p align="center" name="demo">
--- a/benchmarks/benches/indexing.rs
+++ b/benchmarks/benches/indexing.rs
@ -6,9 +6,7 @@ use std::path::Path;

 use criterion::{criterion_group, criterion_main, Criterion};
 use milli::heed::{EnvOpenOptions, RwTxn};
-use milli::update::{
-    DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
-};
+use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
 use milli::Index;
 use rand::seq::SliceRandom;
 use rand_chacha::rand_core::SeedableRng;
@ -266,17 +264,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -613,17 +601,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -875,22 +853,41 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
 }

+fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
+    let mut wtxn = index.write_txn().unwrap();
+
+    let indexer_config = IndexerConfig::default();
+    for ids in document_ids_to_delete {
+        let external_documents_ids = index.external_documents_ids();
+        // FIXME: for filters matching a lot of documents, this will allocate a huge vec of external docids (strings).
+        // Since what we have is an iterator, it would be better to delete in chunks
+        let external_to_internal: std::result::Result<Vec<_>, RoaringBitmap> =
+            external_documents_ids
+                .find_external_id_of(&wtxn, ids)
+                .unwrap()
+                .only_external_ids()
+                .collect();
+        let ids = external_to_internal.unwrap();
+        let config = IndexDocumentsConfig::default();
+
+        let mut builder =
+            IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
+                .unwrap();
+        (builder, _) = builder.remove_documents(ids).unwrap();
+        builder.execute().unwrap();
+    }
+
+    wtxn.commit().unwrap();
+
+    index.prepare_for_closing().wait();
+}
+
 fn indexing_movies_in_three_batches(c: &mut Criterion) {
    let mut group = c.benchmark_group("indexing");
    group.sample_size(BENCHMARK_ITERATION);
@ -1112,17 +1109,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -1338,17 +1325,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@ -526,12 +526,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@ -541,12 +541,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@ -571,12 +571,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@ -617,12 +617,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@ -632,12 +632,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@ -647,12 +647,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
@ -1,24 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: spells.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
@ -1,38 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: products.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {
-    "android": [
-      "phone",
-      "smartphone"
-    ],
-    "iphone": [
-      "phone",
-      "smartphone"
-    ],
-    "phone": [
-      "android",
-      "iphone",
-      "smartphone"
-    ]
-  },
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
@ -1,31 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: movies.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [
-    "genres",
-    "id"
-  ],
-  "sortableAttributes": [
-    "genres",
-    "id"
-  ],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness",
-    "release_date:asc"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/v2/mod.rs
+++ b/dump/src/reader/v2/mod.rs
@ -46,6 +46,7 @@ pub type Checked = settings::Checked;
 pub type Unchecked = settings::Unchecked;

 pub type Task = updates::UpdateEntry;
+pub type Kind = updates::UpdateMeta;

 // everything related to the errors
 pub type ResponseError = errors::ResponseError;
@ -107,8 +108,11 @@ impl V2Reader {
    pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
        Ok(self.index_uuid.iter().map(|index| -> Result<_> {
            V2IndexReader::new(
-                index.uid.clone(),
                &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
+                index,
+                BufReader::new(
+                    File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
+                ),
            )
        }))
    }
@ -143,16 +147,41 @@ pub struct V2IndexReader {
 }

 impl V2IndexReader {
-    pub fn new(name: String, path: &Path) -> Result<Self> {
+    pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
        let meta = File::open(path.join("meta.json"))?;
        let meta: DumpMeta = serde_json::from_reader(meta)?;

+        let mut created_at = None;
+        let mut updated_at = None;
+
+        for line in tasks.lines() {
+            let task: Task = serde_json::from_str(&line?)?;
+            if !(task.uuid == index_uuid.uuid && task.is_finished()) {
+                continue;
+            }
+
+            let new_created_at = match task.update.meta() {
+                Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
+                _ => None,
+            };
+            let new_updated_at = task.update.finished_at();
+
+            if created_at.is_none() || created_at > new_created_at {
+                created_at = new_created_at;
+            }
+
+            if updated_at.is_none() || updated_at < new_updated_at {
+                updated_at = new_updated_at;
+            }
+        }
+
+        let current_time = OffsetDateTime::now_utc();
+
        let metadata = IndexMetadata {
-            uid: name,
+            uid: index_uuid.uid.clone(),
            primary_key: meta.primary_key,
-            // FIXME: Iterate over the whole task queue to find the creation and last update date.
-            created_at: OffsetDateTime::now_utc(),
-            updated_at: OffsetDateTime::now_utc(),
+            created_at: created_at.unwrap_or(current_time),
+            updated_at: updated_at.unwrap_or(current_time),
        };

        let ret = V2IndexReader {
@ -248,12 +277,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@ -263,12 +292,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@ -293,12 +322,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@ -340,12 +369,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@ -355,12 +384,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@ -370,12 +399,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/v2/updates.rs
+++ b/dump/src/reader/v2/updates.rs
@ -227,4 +227,14 @@ impl UpdateStatus {
            _ => None,
        }
    }
+
+    pub fn finished_at(&self) -> Option<OffsetDateTime> {
+        match self {
+            UpdateStatus::Processing(_) => None,
+            UpdateStatus::Enqueued(_) => None,
+            UpdateStatus::Processed(u) => Some(u.processed_at),
+            UpdateStatus::Aborted(_) => None,
+            UpdateStatus::Failed(u) => Some(u.failed_at),
+        }
+    }
 }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -19,18 +19,18 @@ one indexing operation.

 use std::collections::{BTreeSet, HashSet};
 use std::ffi::OsStr;
+use std::fmt;
 use std::fs::{self, File};
 use std::io::BufWriter;

 use dump::IndexMetadata;
-use log::{debug, error, info};
+use log::{debug, error, info, trace};
 use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
-    DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
-    Settings as MilliSettings,
+    IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
 use meilisearch_types::milli::{self, Filter, BEU32};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@ -43,7 +43,7 @@ use uuid::Uuid;

 use crate::autobatcher::{self, BatchKind};
 use crate::utils::{self, swap_index_uid_in_task};
-use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
+use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};

 /// Represents a combination of tasks that can all be processed at the same time.
 ///
@ -199,6 +199,29 @@ impl Batch {
    }
 }

+impl fmt::Display for Batch {
+    /// A text used when we debug the profiling reports.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let index_uid = self.index_uid();
+        let tasks = self.ids();
+        match self {
+            Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
+            Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
+            Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
+            Batch::Dump(_) => f.write_str("Dump")?,
+            Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
+            Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
+            Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
+            Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
+            Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
+        };
+        match index_uid {
+            Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
+            None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
+        }
+    }
+}
+
 impl IndexOperation {
    pub fn index_uid(&self) -> &str {
        match self {
@ -213,6 +236,30 @@ impl IndexOperation {
    }
 }

+impl fmt::Display for IndexOperation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            IndexOperation::DocumentOperation { .. } => {
+                f.write_str("IndexOperation::DocumentOperation")
+            }
+            IndexOperation::DocumentDeletion { .. } => {
+                f.write_str("IndexOperation::DocumentDeletion")
+            }
+            IndexOperation::IndexDocumentDeletionByFilter { .. } => {
+                f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
+            }
+            IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
+            IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
+            IndexOperation::DocumentClearAndSetting { .. } => {
+                f.write_str("IndexOperation::DocumentClearAndSetting")
+            }
+            IndexOperation::SettingsAndDocumentOperation { .. } => {
+                f.write_str("IndexOperation::SettingsAndDocumentOperation")
+            }
+        }
+    }
+}
+
 impl IndexScheduler {
    /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
    ///
@ -581,7 +628,7 @@ impl IndexScheduler {
            self.breakpoint(crate::Breakpoint::InsideProcessBatch);
        }

-        puffin::profile_function!(format!("{:?}", batch));
+        puffin::profile_function!(batch.to_string());

        match batch {
            Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
@ -848,7 +895,7 @@ impl IndexScheduler {
                })?;

                // 4. Dump experimental feature settings
-                let features = self.features()?.runtime_features();
+                let features = self.features().runtime_features();
                dump.create_experimental_features(features)?;

                let dump_uid = started_at.format(format_description!(
@ -1143,7 +1190,7 @@ impl IndexScheduler {
                    index,
                    indexer_config,
                    config,
-                    |indexing_step| debug!("update: {:?}", indexing_step),
+                    |indexing_step| trace!("update: {:?}", indexing_step),
                    || must_stop_processing.get(),
                )?;

@ -1190,7 +1237,8 @@ impl IndexScheduler {
                            let (new_builder, user_result) =
                                builder.remove_documents(document_ids)?;
                            builder = new_builder;
-
+                            // Uses Invariant: remove documents actually always returns Ok for the inner result
+                            let count = user_result.unwrap();
                            let provided_ids =
                                if let Some(Details::DocumentDeletion { provided_ids, .. }) =
                                    task.details
@ -1201,23 +1249,11 @@ impl IndexScheduler {
                                    unreachable!();
                                };

-                            match user_result {
-                                Ok(count) => {
-                                    task.status = Status::Succeeded;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(count),
-                                    });
-                                }
-                                Err(e) => {
-                                    task.status = Status::Failed;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(0),
-                                    });
-                                    task.error = Some(milli::Error::from(e).into());
-                                }
-                            }
+                            task.status = Status::Succeeded;
+                            task.details = Some(Details::DocumentDeletion {
+                                provided_ids,
+                                deleted_documents: Some(count),
+                            });
                        }
                    }
                }
@ -1232,7 +1268,7 @@ impl IndexScheduler {
                        milli::update::Settings::new(index_wtxn, index, indexer_config);
                    builder.reset_primary_key();
                    builder.execute(
-                        |indexing_step| debug!("update: {:?}", indexing_step),
+                        |indexing_step| trace!("update: {:?}", indexing_step),
                        || must_stop_processing.clone().get(),
                    )?;
                }
@ -1240,21 +1276,42 @@ impl IndexScheduler {
                Ok(tasks)
            }
            IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
-                let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
-                documents.iter().flatten().for_each(|id| {
-                    builder.delete_external_id(id);
-                });
+                let indexer_config = self.index_mapper.indexer_config();
+                let config = IndexDocumentsConfig {
+                    update_method: IndexDocumentsMethod::ReplaceDocuments,
+                    ..Default::default()
+                };
+                let must_stop_processing = self.must_stop_processing.clone();

-                let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
+                let mut builder = milli::update::IndexDocuments::new(
+                    index_wtxn,
+                    index,
+                    indexer_config,
+                    config,
+                    |indexing_step| trace!("update: {:?}", indexing_step),
+                    || must_stop_processing.get(),
+                )?;
+
+                let document_ids = documents.iter().flatten().cloned().collect();
+
+                let (new_builder, user_result) = builder.remove_documents(document_ids)?;
+                builder = new_builder;
+                // Uses Invariant: remove documents actually always returns Ok for the inner result
+                let count = user_result.unwrap();

                for (task, documents) in tasks.iter_mut().zip(documents) {
                    task.status = Status::Succeeded;
                    task.details = Some(Details::DocumentDeletion {
                        provided_ids: documents.len(),
-                        deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
+                        deleted_documents: Some(count.min(documents.len() as u64)),
                    });
                }

+                if !tasks.iter().all(|res| res.error.is_some()) {
+                    let addition = builder.execute()?;
+                    info!("document deletion done: {:?}", addition);
+                }
+
                Ok(tasks)
            }
            IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
@ -1266,7 +1323,13 @@ impl IndexScheduler {
                    } else {
                        unreachable!()
                    };
-                let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
+                let deleted_documents = delete_document_by_filter(
+                    index_wtxn,
+                    filter,
+                    self.index_mapper.indexer_config(),
+                    self.must_stop_processing.clone(),
+                    index,
+                );
                let original_filter = if let Some(Details::DocumentDeletionByFilter {
                    original_filter,
                    deleted_documents: _,
@ -1500,6 +1563,8 @@ impl IndexScheduler {
 fn delete_document_by_filter<'a>(
    wtxn: &mut RwTxn<'a, '_>,
    filter: &serde_json::Value,
+    indexer_config: &IndexerConfig,
+    must_stop_processing: MustStopProcessing,
    index: &'a Index,
 ) -> Result<u64> {
    let filter = Filter::from_json(filter)?;
@ -1510,9 +1575,41 @@ fn delete_document_by_filter<'a>(
            }
            e => e.into(),
        })?;
-        let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
-        delete_operation.delete_documents(&candidates);
-        delete_operation.execute().map(|result| result.deleted_documents)?
+        let external_documents_ids = index.external_documents_ids();
+        // FIXME: for filters matching a lot of documents, this will allocate a huge vec of external docids (strings).
+        // Since what we have is an iterator, it would be better to delete in chunks
+        let external_to_internal: std::result::Result<Vec<_>, RoaringBitmap> =
+            external_documents_ids
+                .find_external_id_of(wtxn, candidates)?
+                .only_external_ids()
+                .collect();
+        let document_ids = match external_to_internal {
+            Ok(external_ids) => external_ids,
+            Err(remaining_ids) => panic!("Couldn't find some external ids {:?}", remaining_ids),
+        };
+
+        let config = IndexDocumentsConfig {
+            update_method: IndexDocumentsMethod::ReplaceDocuments,
+            ..Default::default()
+        };
+
+        let mut builder = milli::update::IndexDocuments::new(
+            wtxn,
+            index,
+            indexer_config,
+            config,
+            |indexing_step| debug!("update: {:?}", indexing_step),
+            || must_stop_processing.get(),
+        )?;
+
+        let (new_builder, user_result) = builder.remove_documents(document_ids)?;
+        builder = new_builder;
+        // Uses Invariant: remove documents actually always returns Ok for the inner result
+        let count = user_result.unwrap();
+
+        let _ = builder.execute()?;
+
+        count
    } else {
        0
    })
--- a/index-scheduler/src/features.rs
+++ b/index-scheduler/src/features.rs
@ -1,6 +1,8 @@
+use std::sync::{Arc, RwLock};
+
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::types::{SerdeJson, Str};
-use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
+use meilisearch_types::heed::{Database, Env, RwTxn};

 use crate::error::FeatureNotEnabledError;
 use crate::Result;
@ -9,20 +11,19 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";

 #[derive(Clone)]
 pub(crate) struct FeatureData {
-    runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
-    instance: InstanceTogglableFeatures,
+    persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
+    runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
 }

 #[derive(Debug, Clone, Copy)]
 pub struct RoFeatures {
    runtime: RuntimeTogglableFeatures,
-    instance: InstanceTogglableFeatures,
 }

 impl RoFeatures {
-    fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
-        let runtime = data.runtime_features(txn)?;
-        Ok(Self { runtime, instance: data.instance })
+    fn new(data: &FeatureData) -> Self {
+        let runtime = data.runtime_features();
+        Self { runtime }
    }

    pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
@ -43,13 +44,13 @@ impl RoFeatures {
    }

    pub fn check_metrics(&self) -> Result<()> {
-        if self.instance.metrics {
+        if self.runtime.metrics {
            Ok(())
        } else {
            Err(FeatureNotEnabledError {
                disabled_action: "Getting metrics",
                feature: "metrics",
-                issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
+                issue_link: "https://github.com/meilisearch/product/discussions/625",
            }
            .into())
        }
@ -67,15 +68,36 @@ impl RoFeatures {
            .into())
        }
    }
+
+    pub fn check_puffin(&self) -> Result<()> {
+        if self.runtime.export_puffin_reports {
+            Ok(())
+        } else {
+            Err(FeatureNotEnabledError {
+                disabled_action: "Outputting Puffin reports to disk",
+                feature: "export puffin reports",
+                issue_link: "https://github.com/meilisearch/product/discussions/693",
+            }
+            .into())
+        }
+    }
 }

 impl FeatureData {
    pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
        let mut wtxn = env.write_txn()?;
-        let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
+        let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
        wtxn.commit()?;

-        Ok(Self { runtime: runtime_features, instance: instance_features })
+        let txn = env.read_txn()?;
+        let persisted_features: RuntimeTogglableFeatures =
+            runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
+        let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
+            metrics: instance_features.metrics || persisted_features.metrics,
+            ..persisted_features
+        }));
+
+        Ok(Self { persisted: runtime_features_db, runtime })
    }

    pub fn put_runtime_features(
@ -83,16 +105,25 @@ impl FeatureData {
        mut wtxn: RwTxn,
        features: RuntimeTogglableFeatures,
    ) -> Result<()> {
-        self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
+        self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
        wtxn.commit()?;
+
+        // safe to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for an assignment here.
+        let mut toggled_features = self.runtime.write().unwrap();
+        *toggled_features = features;
        Ok(())
    }

-    fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
-        Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
+    fn runtime_features(&self) -> RuntimeTogglableFeatures {
+        // sound to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for copying the data here
+        *self.runtime.read().unwrap()
    }

-    pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
-        RoFeatures::new(txn, self)
+    pub fn features(&self) -> RoFeatures {
+        RoFeatures::new(self)
    }
 }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -30,6 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        index_mapper,
        features: _,
        max_number_of_tasks: _,
+        puffin_frame: _,
        wake_up: _,
        dumps_path: _,
        snapshots_path: _,
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -33,6 +33,7 @@ pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

 use std::collections::{BTreeMap, HashMap};
+use std::fs::File;
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@ -52,6 +53,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
+use puffin::FrameView;
 use roaring::RoaringBitmap;
 use synchronoise::SignalEvent;
 use time::format_description::well_known::Rfc3339;
@ -314,6 +316,9 @@ pub struct IndexScheduler {
    /// the finished tasks automatically.
    pub(crate) max_number_of_tasks: usize,

+    /// A frame to output the indexation profiling files to disk.
+    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
+
    /// The path used to create the dumps.
    pub(crate) dumps_path: PathBuf,

@ -364,6 +369,7 @@ impl IndexScheduler {
            wake_up: self.wake_up.clone(),
            autobatching_enabled: self.autobatching_enabled,
            max_number_of_tasks: self.max_number_of_tasks,
+            puffin_frame: self.puffin_frame.clone(),
            snapshots_path: self.snapshots_path.clone(),
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
@ -457,6 +463,7 @@ impl IndexScheduler {
            env,
            // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
            wake_up: Arc::new(SignalEvent::auto(true)),
+            puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
            autobatching_enabled: options.autobatching_enabled,
            max_number_of_tasks: options.max_number_of_tasks,
            dumps_path: options.dumps_path,
@ -572,17 +579,46 @@ impl IndexScheduler {
                run.wake_up.wait();

                loop {
+                    let puffin_enabled = run.features().check_puffin().is_ok();
+                    puffin::set_scopes_on(puffin_enabled);
+                    puffin::GlobalProfiler::lock().new_frame();
+
                    match run.tick() {
                        Ok(TickOutcome::TickAgain(_)) => (),
                        Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
                        Err(e) => {
-                            log::error!("{}", e);
+                            log::error!("{e}");
                            // Wait one second when an irrecoverable error occurs.
                            if !e.is_recoverable() {
                                std::thread::sleep(Duration::from_secs(1));
                            }
                        }
                    }
+
+                    // Let's write the previous frame to disk but only if
+                    // the user wanted to profile with puffin.
+                    if puffin_enabled {
+                        let mut frame_view = run.puffin_frame.lock();
+                        if !frame_view.is_empty() {
+                            let now = OffsetDateTime::now_utc();
+                            let mut file = match File::create(format!("{}.puffin", now)) {
+                                Ok(file) => file,
+                                Err(e) => {
+                                    log::error!("{e}");
+                                    continue;
+                                }
+                            };
+                            if let Err(e) = frame_view.save_to_writer(&mut file) {
+                                log::error!("{e}");
+                            }
+                            if let Err(e) = file.sync_all() {
+                                log::error!("{e}");
+                            }
+                            // We erase this frame view as it is no more useful. We want to
+                            // measure the new frames now that we exported the previous ones.
+                            *frame_view = FrameView::default();
+                        }
+                    }
                }
            })
            .unwrap();
@ -1062,8 +1098,6 @@ impl IndexScheduler {
            self.breakpoint(Breakpoint::Start);
        }

-        puffin::GlobalProfiler::lock().new_frame();
-
        self.cleanup_task_queue()?;

        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1259,9 +1293,8 @@ impl IndexScheduler {
        Ok(IndexStats { is_indexing, inner_stats: index_stats })
    }

-    pub fn features(&self) -> Result<RoFeatures> {
-        let rtxn = self.read_txn()?;
-        self.features.features(rtxn)
+    pub fn features(&self) -> RoFeatures {
+        self.features.features()
    }

    pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -324,7 +324,6 @@ impl ErrorCode for milli::Error {
                    UserError::SerdeJson(_)
                    | UserError::InvalidLmdbOpenOptions
                    | UserError::DocumentLimitReached
-                    | UserError::AccessingSoftDeletedDocument { .. }
                    | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
                    UserError::InvalidStoreFile => Code::InvalidStoreFile,
                    UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
--- a/meilisearch-types/src/features.rs
+++ b/meilisearch-types/src/features.rs
@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
 pub struct RuntimeTogglableFeatures {
    pub score_details: bool,
    pub vector_store: bool,
+    pub metrics: bool,
+    pub export_puffin_reports: bool,
 }

 #[derive(Default, Debug, Clone, Copy)]
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
 pin-project-lite = "0.2.9"
 platform-dirs = "0.3.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-puffin = "0.16.0"
-puffin_http = { version = "0.13.0", optional = true }
+puffin = { version = "0.16.0", features = ["serialization"] }
 rand = "0.8.5"
 rayon = "1.7.0"
 regex = "1.7.3"
@ -135,7 +134,6 @@ zip = { version = "0.6.4", optional = true }
 [features]
 default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
 analytics = ["segment"]
-profile-with-puffin = ["dep:puffin_http"]
 mini-dashboard = [
    "actix-web-static-files",
    "static-files",
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -114,10 +114,7 @@ pub fn create_app(
        .configure(routes::configure)
        .configure(|s| dashboard(s, enable_dashboard));

-    let app = app.wrap(actix_web::middleware::Condition::new(
-        opt.experimental_enable_metrics,
-        middleware::RouteMetrics,
-    ));
+    let app = app.wrap(middleware::RouteMetrics);
    app.wrap(
        Cors::default()
            .send_wildcard()
@ -365,7 +362,7 @@ fn import_dump(
                update_method: IndexDocumentsMethod::ReplaceDocuments,
                ..Default::default()
            },
-            |indexing_step| log::debug!("update: {:?}", indexing_step),
+            |indexing_step| log::trace!("update: {:?}", indexing_step),
            || false,
        )?;

--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@ -30,10 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
 async fn main() -> anyhow::Result<()> {
    let (opt, config_read_from) = Opt::try_build()?;

-    #[cfg(feature = "profile-with-puffin")]
-    let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
-    puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
-
    anyhow::ensure!(
        !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
        "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
--- a/meilisearch/src/middleware.rs
+++ b/meilisearch/src/middleware.rs
@ -3,8 +3,10 @@
 use std::future::{ready, Ready};

 use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
+use actix_web::web::Data;
 use actix_web::Error;
 use futures_util::future::LocalBoxFuture;
+use index_scheduler::IndexScheduler;
 use prometheus::HistogramTimer;

 pub struct RouteMetrics;
@ -47,19 +49,27 @@ where

    fn call(&self, req: ServiceRequest) -> Self::Future {
        let mut histogram_timer: Option<HistogramTimer> = None;
-        let request_path = req.path();
-        let is_registered_resource = req.resource_map().has_resource(request_path);
-        if is_registered_resource {
-            let request_method = req.method().to_string();
-            histogram_timer = Some(
-                crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+
+        // calling unwrap here is safe because index scheduler is added to app data while creating actix app.
+        // also, the tests will fail if this is not present.
+        let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
+        let features = index_scheduler.features();
+
+        if features.check_metrics().is_ok() {
+            let request_path = req.path();
+            let is_registered_resource = req.resource_map().has_resource(request_path);
+            if is_registered_resource {
+                let request_method = req.method().to_string();
+                histogram_timer = Some(
+                    crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+                        .with_label_values(&[&request_method, request_path])
+                        .start_timer(),
+                );
+                crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
                    .with_label_values(&[&request_method, request_path])
-                    .start_timer(),
-            );
-            crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
-                .with_label_values(&[&request_method, request_path])
-                .inc();
-        }
+                    .inc();
+            }
+        };

        let fut = self.service.call(req);

--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@ -29,12 +29,12 @@ async fn get_features(
    >,
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
-) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+) -> HttpResponse {
+    let features = index_scheduler.features();

    analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
    debug!("returns: {:?}", features.runtime_features());
-    Ok(HttpResponse::Ok().json(features.runtime_features()))
+    HttpResponse::Ok().json(features.runtime_features())
 }

 #[derive(Debug, Deserr)]
@ -44,6 +44,10 @@ pub struct RuntimeTogglableFeatures {
    pub score_details: Option<bool>,
    #[deserr(default)]
    pub vector_store: Option<bool>,
+    #[deserr(default)]
+    pub metrics: Option<bool>,
+    #[deserr(default)]
+    pub export_puffin_reports: Option<bool>,
 }

 async fn patch_features(
@ -55,26 +59,36 @@ async fn patch_features(
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    let old_features = features.runtime_features();
-
    let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
        score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
        vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
+        metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
+        export_puffin_reports: new_features
+            .0
+            .export_puffin_reports
+            .unwrap_or(old_features.export_puffin_reports),
    };

    // explicitly destructure for analytics rather than using the `Serialize` implementation, because
    // the it renames to camelCase, which we don't want for analytics.
    // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
-    let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
-        new_features;
+    let meilisearch_types::features::RuntimeTogglableFeatures {
+        score_details,
+        vector_store,
+        metrics,
+        export_puffin_reports,
+    } = new_features;

    analytics.publish(
        "Experimental features Updated".to_string(),
        json!({
            "score_details": score_details,
            "vector_store": vector_store,
+            "metrics": metrics,
+            "export_puffin_reports": export_puffin_reports,
        }),
        Some(&req),
    );
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

    let internal_id = index
-        .external_documents_ids(&txn)?
-        .get(doc_id.as_bytes())
+        .external_documents_ids()
+        .get(&txn, doc_id)?
        .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;

    let document = index
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -68,7 +68,7 @@ pub async fn search(
    }

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result = tokio::task::spawn_blocking(move || {
        perform_facet_search(&index, search_query, facet_query, facet_name, features)
    })
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -157,7 +157,7 @@ pub async fn search_with_url_query(
    let mut aggregate = SearchAggregator::from_query(&query, &req);

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result =
        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
    if let Ok(ref search_result) = search_result {
@ -192,7 +192,7 @@ pub async fn search_with_post(

    let index = index_scheduler.index(&index_uid)?;

-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result =
        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
    if let Ok(ref search_result) = search_result {
--- a/meilisearch/src/routes/metrics.rs
+++ b/meilisearch/src/routes/metrics.rs
@ -19,7 +19,7 @@ pub async fn get_metrics(
    index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
    auth_controller: Data<AuthController>,
 ) -> Result<HttpResponse, ResponseError> {
-    index_scheduler.features()?.check_metrics()?;
+    index_scheduler.features().check_metrics()?;
    let auth_filters = index_scheduler.filters();
    if !auth_filters.all_indexes_authorized() {
        let mut error = ResponseError::from(AuthenticationError::InvalidToken);
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
    let queries = params.into_inner().queries;

    let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
    // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
--- a/meilisearch/tests/auth/authorization.rs
+++ b/meilisearch/tests/auth/authorization.rs
@ -2,10 +2,12 @@ use std::collections::{HashMap, HashSet};

 use ::time::format_description::well_known::Rfc3339;
 use maplit::{hashmap, hashset};
+use meilisearch::Opt;
 use once_cell::sync::Lazy;
+use tempfile::TempDir;
 use time::{Duration, OffsetDateTime};

-use crate::common::{Server, Value};
+use crate::common::{default_settings, Server, Value};
 use crate::json;

 pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
@ -195,7 +197,9 @@ async fn access_authorized_master_key() {

 #[actix_rt::test]
 async fn access_authorized_restricted_index() {
-    let mut server = Server::new_auth().await;
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
    for ((method, route), actions) in AUTHORIZATIONS.iter() {
        for action in actions {
            // create a new API key letting only the needed action.
--- a/meilisearch/tests/common/server.rs
+++ b/meilisearch/tests/common/server.rs
@ -202,6 +202,10 @@ impl Server {
    pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
        self.service.patch("/experimental-features", value).await
    }
+
+    pub async fn get_metrics(&self) -> (Value, StatusCode) {
+        self.service.get("/metrics").await
+    }
 }

 pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@ -221,7 +225,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
            skip_index_budget: true,
            ..Parser::parse_from(None as Option<&str>)
        },
-        experimental_enable_metrics: true,
+        experimental_enable_metrics: false,
        ..Parser::parse_from(None as Option<&str>)
    }
 }
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
      "canceledBy": null,
      "details": {
        "providedIds": 0,
-        "deletedDocuments": 4,
+        "deletedDocuments": 2,
        "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
      },
      "error": null,
--- a/meilisearch/tests/features/mod.rs
+++ b/meilisearch/tests/features/mod.rs
@ -1,4 +1,7 @@
-use crate::common::Server;
+use meilisearch::Opt;
+use tempfile::TempDir;
+
+use crate::common::{default_settings, Server};
 use crate::json;

 /// Feature name to test against.
@ -16,7 +19,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": false
+      "vectorStore": false,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -26,7 +31,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -36,7 +43,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -47,7 +56,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -58,11 +69,73 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);
 }

+#[actix_rt::test]
+async fn experimental_feature_metrics() {
+    // instance flag for metrics enables metrics at startup
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let server = Server::new_with_options(enable_metrics).await.unwrap();
+
+    let (response, code) = server.get_features().await;
+
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "scoreDetails": false,
+      "vectorStore": false,
+      "metrics": true,
+      "exportPuffinReports": false
+    }
+    "###);
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+
+    // metrics are not returned in json format
+    // so the test server will return null
+    meili_snap::snapshot!(response, @"null");
+
+    // disabling metrics results in invalid request
+    let (response, code) = server.set_features(json!({"metrics": false})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"false");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"400 Bad Request");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
+      "code": "feature_not_enabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
+    }
+    "###);
+
+    // enabling metrics via HTTP results in valid request
+    let (response, code) = server.set_features(json!({"metrics": true})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"true");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+
+    // startup without flag respects persisted metrics value
+    let disable_metrics =
+        Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
+    let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
+    let (response, code) = server_no_flag.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+}
+
 #[actix_rt::test]
 async fn errors() {
    let server = Server::new().await;
@ -73,7 +146,7 @@ async fn errors() {
    meili_snap::snapshot!(code, @"400 Bad Request");
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
-      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
+      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
      "code": "bad_request",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#bad_request"
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@ -0,0 +1,63 @@
+use meili_snap::snapshot;
+use once_cell::sync::Lazy;
+
+use crate::common::{Server, Value};
+use crate::json;
+
+pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+        {"productId": 1, "shopId": 1},
+        {"productId": 2, "shopId": 1},
+        {"productId": 3, "shopId": 2},
+        {"productId": 4, "shopId": 2},
+        {"productId": 5, "shopId": 3},
+        {"productId": 6, "shopId": 3},
+        {"productId": 7, "shopId": 4},
+        {"productId": 8, "shopId": 4},
+        {"productId": 9, "shopId": 5},
+        {"productId": 10, "shopId": 5}
+    ])
+});
+
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4078
+#[actix_rt::test]
+async fn distinct_search_with_offset_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(Value(response): Value) -> Vec<i64> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
+    let hits = get_hits(response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @"[1, 2]");
+
+    let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
+    let hits = get_hits(response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @"[3, 4]");
+
+    let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
+    let hits = get_hits(response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"1");
+    snapshot!(format!("{:?}", hits), @"[5]");
+
+    let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
+    let hits = get_hits(response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@ -1,6 +1,7 @@
 // This modules contains all the test concerning search. Each particular feature of the search
 // should be tested in its own module to isolate tests and keep the tests readable.

+mod distinct;
 mod errors;
 mod facet_search;
 mod formatted;
@ -816,7 +817,7 @@ async fn experimental_feature_score_details() {
                      },
                      "proximity": {
                        "order": 2,
-                        "score": 0.875
+                        "score": 0.75
                      },
                      "attribute": {
                        "order": 3,
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -26,8 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.4", default-features = false, features = [
-    "tempfile",
+grenad = { version = "0.4.5", default-features = false, features = [
+    "rayon", "tempfile"
 ] }
 heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
    "lmdb", "read-txn-no-tls"
--- a/milli/src/documents/enriched.rs
+++ b/milli/src/documents/enriched.rs
@ -1,4 +1,5 @@
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, str};

 use obkv::KvReader;
@ -19,14 +20,14 @@ use crate::FieldId;
 pub struct EnrichedDocumentsBatchReader<R> {
    documents: DocumentsBatchReader<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
    pub fn new(
        documents: DocumentsBatchReader<R>,
        primary_key: String,
-        external_ids: grenad::Reader<File>,
+        external_ids: grenad::Reader<BufReader<File>>,
    ) -> Result<Self, Error> {
        if documents.documents_count() as u64 == external_ids.len() {
            Ok(EnrichedDocumentsBatchReader {
@ -75,7 +76,7 @@ pub struct EnrichedDocument<'a> {
 pub struct EnrichedDocumentsBatchCursor<R> {
    documents: DocumentsBatchCursor<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R> EnrichedDocumentsBatchCursor<R> {
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -89,8 +89,6 @@ pub enum FieldIdMapMissingEntry {

 #[derive(Error, Debug)]
 pub enum UserError {
-    #[error("A soft deleted internal document id have been used: `{document_id}`.")]
-    AccessingSoftDeletedDocument { document_id: DocumentId },
    #[error("A document cannot contain more than 65,535 fields.")]
    AttributeLimitReached,
    #[error(transparent)]
--- a/milli/src/external_documents_ids.rs
+++ b/milli/src/external_documents_ids.rs
@ -1,159 +1,146 @@
-use std::borrow::Cow;
 use std::collections::HashMap;
-use std::convert::TryInto;
-use std::{fmt, str};

-use fst::map::IndexedValue;
-use fst::{IntoStreamer, Streamer};
+use heed::types::{OwnedType, Str};
+use heed::{Database, RoIter, RoTxn, RwTxn};
 use roaring::RoaringBitmap;

-const DELETED_ID: u64 = u64::MAX;
+use crate::{DocumentId, BEU32};

-pub struct ExternalDocumentsIds<'a> {
-    pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
-    pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
-    soft_deleted_docids: RoaringBitmap,
+pub enum DocumentOperationKind {
+    Create,
+    Delete,
 }

-impl<'a> ExternalDocumentsIds<'a> {
-    pub fn new(
-        hard: fst::Map<Cow<'a, [u8]>>,
-        soft: fst::Map<Cow<'a, [u8]>>,
-        soft_deleted_docids: RoaringBitmap,
-    ) -> ExternalDocumentsIds<'a> {
-        ExternalDocumentsIds { hard, soft, soft_deleted_docids }
-    }
+pub struct DocumentOperation {
+    pub external_id: String,
+    pub internal_id: DocumentId,
+    pub kind: DocumentOperationKind,
+}

-    pub fn into_static(self) -> ExternalDocumentsIds<'static> {
-        ExternalDocumentsIds {
-            hard: self.hard.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft: self.soft.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft_deleted_docids: self.soft_deleted_docids,
-        }
+pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>);
+
+impl ExternalDocumentsIds {
+    pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds {
+        ExternalDocumentsIds(db)
    }

    /// Returns `true` if hard and soft external documents lists are empty.
-    pub fn is_empty(&self) -> bool {
-        self.hard.is_empty() && self.soft.is_empty()
+    pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
+        self.0.is_empty(rtxn).map_err(Into::into)
    }

-    pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
-        let external_id = external_id.as_ref();
-        match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
-            Some(id) if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) => {
-                Some(id.try_into().unwrap())
-            }
-            _otherwise => None,
-        }
-    }
-
-    /// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
-    /// don't contain any soft deleted document id.
-    pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
-        let mut new_hard_builder = fst::MapBuilder::memory();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, docids)) = iter.next() {
-            // prefer selecting the ids from soft, always
-            let id = indexed_last_value(docids).unwrap();
-            if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
-                new_hard_builder.insert(external_id, id)?;
-            }
-        }
-        drop(iter);
-
-        // Delete soft map completely
-        self.soft = fst::Map::default().map_data(Cow::Owned)?;
-        // We save the new map as the new hard map.
-        self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
-
-        Ok(())
-    }
-
-    pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
-        let union_op = self.soft.op().add(other).r#union();
-
-        let mut new_soft_builder = fst::MapBuilder::memory();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            new_soft_builder.insert(external_id, id)?;
-        }
-
-        drop(iter);
-
-        // We save the new map as the new soft map.
-        self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
-        self.merge_soft_into_hard()
+    pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
+        Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get()))
    }

    /// An helper function to debug this type, returns an `HashMap` of both,
    /// soft and hard fst maps, combined.
-    pub fn to_hash_map(&self) -> HashMap<String, u32> {
-        let mut map = HashMap::new();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            if id != DELETED_ID {
-                let external_id = str::from_utf8(external_id).unwrap();
-                map.insert(external_id.to_owned(), id.try_into().unwrap());
-            }
+    pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
+        let mut map = HashMap::default();
+        for result in self.0.iter(rtxn)? {
+            let (external, internal) = result?;
+            map.insert(external.to_owned(), internal.get());
        }
-
-        map
+        Ok(map)
    }

-    /// Return an fst of the combined hard and soft deleted ID.
-    pub fn to_fst<'b>(&'b self) -> fst::Result<Cow<'b, fst::Map<Cow<'a, [u8]>>>> {
-        if self.soft.is_empty() {
-            return Ok(Cow::Borrowed(&self.hard));
-        }
-        let union_op = self.hard.op().add(&self.soft).r#union();
-
-        let mut iter = union_op.into_stream();
-        let mut new_hard_builder = fst::MapBuilder::memory();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let value = indexed_last_value(marked_docids).unwrap();
-            if value != DELETED_ID {
-                new_hard_builder.insert(external_id, value)?;
-            }
-        }
-
-        drop(iter);
-
-        Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?))
+    /// Looks for the internal ids in the passed bitmap, and returns an iterator over the mapping between
+    /// these internal ids and their external id.
+    ///
+    /// The returned iterator has `Result<(String, DocumentId), RoaringBitmap>` as `Item`,
+    /// where the returned values can be:
+    /// - `Ok((external_id, internal_id))`: if a mapping was found
+    /// - `Err(remaining_ids)`: if the external ids for some of the requested internal ids weren't found.
+    ///   In that case the returned bitmap contains the internal ids whose external ids were not found after traversing
+    ///   the entire fst.
+    pub fn find_external_id_of<'t>(
+        &self,
+        rtxn: &'t RoTxn,
+        internal_ids: RoaringBitmap,
+    ) -> heed::Result<ExternalToInternalOwnedIterator<'t>> {
+        self.0.iter(rtxn).map(|iter| ExternalToInternalOwnedIterator { iter, internal_ids })
    }

-    fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
-        if self.soft.len() >= self.hard.len() / 2 {
-            self.hard = self.to_fst()?.into_owned();
-            self.soft = fst::Map::default().map_data(Cow::Owned)?;
+    /// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
+    ///
+    /// If the list contains multiple operations on the same external id, then the result is unspecified.
+    ///
+    /// # Panics
+    ///
+    /// - If attempting to delete a document that doesn't exist
+    /// - If attempting to create a document that already exists
+    pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
+        for DocumentOperation { external_id, internal_id, kind } in operations {
+            match kind {
+                DocumentOperationKind::Create => {
+                    self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?;
+                }
+                DocumentOperationKind::Delete => {
+                    if !self.0.delete(wtxn, &external_id)? {
+                        panic!("Attempting to delete a non-existing document")
+                    }
+                }
+            }
        }

        Ok(())
    }
-}

-impl fmt::Debug for ExternalDocumentsIds<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
+    /// Returns an iterator over all the external ids.
+    pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> {
+        self.0.iter(rtxn)
    }
 }

-impl Default for ExternalDocumentsIds<'static> {
-    fn default() -> Self {
-        ExternalDocumentsIds {
-            hard: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft_deleted_docids: RoaringBitmap::new(),
+/// An iterator over mappings between requested internal ids and external ids.
+///
+/// See [`ExternalDocumentsIds::find_external_id_of`] for details.
+pub struct ExternalToInternalOwnedIterator<'t> {
+    iter: RoIter<'t, Str, OwnedType<BEU32>>,
+    internal_ids: RoaringBitmap,
+}
+
+impl<'t> Iterator for ExternalToInternalOwnedIterator<'t> {
+    /// A result indicating if a mapping was found, or if the stream was exhausted without finding all internal ids.
+    type Item = Result<(&'t str, DocumentId), RoaringBitmap>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // if all requested ids were found, we won't find any other, so short-circuit
+        if self.internal_ids.is_empty() {
+            return None;
+        }
+        loop {
+            let (external, internal) = match self.iter.next() {
+                Some(Ok((external, internal))) => (external, internal),
+                // TODO manage this better, remove panic
+                Some(Err(e)) => panic!("{}", e),
+                _ => {
+                    // we exhausted the stream but we still have some internal ids to find
+                    let remaining_ids = std::mem::take(&mut self.internal_ids);
+                    return Some(Err(remaining_ids));
+                    // note: next calls to `next` will return `None` since we replaced the internal_ids
+                    // with the default empty bitmap
+                }
+            };
+            let internal = internal.get();
+            let was_contained = self.internal_ids.remove(internal);
+            if was_contained {
+                return Some(Ok((external, internal)));
+            }
        }
    }
 }

-/// Returns the value of the `IndexedValue` with the highest _index_.
-fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
-    indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
+impl<'t> ExternalToInternalOwnedIterator<'t> {
+    /// Returns the bitmap of internal ids whose external id are yet to be found
+    pub fn remaining_internal_ids(&self) -> &RoaringBitmap {
+        &self.internal_ids
+    }
+
+    /// Consumes this iterator and returns an iterator over only the external ids, ignoring the internal ids.
+    ///
+    /// Use this when you don't need the mapping between the external and the internal ids.
+    pub fn only_external_ids(self) -> impl Iterator<Item = Result<String, RoaringBitmap>> + 't {
+        self.map(|res| res.map(|(external, _internal)| external.to_owned()))
+    }
 }
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -6,6 +6,7 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
 use roaring::RoaringBitmap;

 use crate::heed_codec::BytesDecodeOwned;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};

 /// This is the limit where using a byteorder became less size efficient
 /// than using a direct roaring encoding, it is also the point where we are able
@ -99,6 +100,33 @@ impl CboRoaringBitmapCodec {

        Ok(())
    }
+
+    /// Merges a DelAdd delta into a CboRoaringBitmap.
+    pub fn merge_deladd_into<'a>(
+        deladd: KvReaderDelAdd<'_>,
+        previous: &[u8],
+        buffer: &'a mut Vec<u8>,
+    ) -> io::Result<Option<&'a [u8]>> {
+        // Deserialize the bitmap that is already there
+        let mut previous = Self::deserialize_from(previous)?;
+
+        // Remove integers we no more want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Deletion) {
+            previous -= Self::deserialize_from(value)?;
+        }
+
+        // Insert the new integers we want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Addition) {
+            previous |= Self::deserialize_from(value)?;
+        }
+
+        if previous.is_empty() {
+            return Ok(None);
+        }
+
+        Self::serialize_into(&previous, buffer);
+        Ok(Some(&buffer[..]))
+    }
 }

 impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
--- a/milli/src/proximity.rs
+++ b/milli/src/proximity.rs
@ -2,7 +2,7 @@ use std::cmp;

 use crate::{relative_from_absolute_position, Position};

-pub const MAX_DISTANCE: u32 = 8;
+pub const MAX_DISTANCE: u32 = 4;

 pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
    if lhs <= rhs {
--- a/milli/src/search/facet/facet_sort_ascending.rs
+++ b/milli/src/search/facet/facet_sort_ascending.rs
@ -13,7 +13,7 @@ use crate::heed_codec::ByteSliceRefCodec;
 /// The documents returned by the iterator are grouped by the facet values that
 /// determined their rank. For example, given the documents:
 ///
-/// ```ignore
+/// ```text
 /// 0: { "colour": ["blue", "green"] }
 /// 1: { "colour": ["blue", "red"] }
 /// 2: { "colour": ["orange", "red"] }
@ -22,7 +22,7 @@ use crate::heed_codec::ByteSliceRefCodec;
 /// ```
 /// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
 /// over the following elements:
-/// ```ignore
+/// ```text
 /// [0, 4]  // corresponds to all the documents within the candidates that have the facet value "blue"
 /// [3]     // same for "green"
 /// [2]     // same for "orange"
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -223,12 +223,9 @@ impl<'a> Filter<'a> {
 impl<'a> Filter<'a> {
    pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
-        let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
        let filterable_fields = index.filterable_fields(rtxn)?;

-        // and finally we delete all the soft_deleted_documents, again, only once at the very end
        self.inner_evaluate(rtxn, index, &filterable_fields)
-            .map(|result| result - soft_deleted_documents)
    }

    fn evaluate_operator(
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -46,18 +46,27 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        if let Some(distinct_fid) = distinct_fid {
            let mut excluded = RoaringBitmap::new();
            let mut results = vec![];
+            let mut skip = 0;
            for docid in universe.iter() {
-                if results.len() >= from + length {
+                if results.len() >= length {
                    break;
                }
                if excluded.contains(docid) {
                    continue;
                }
+
                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
+                skip += 1;
+                if skip <= from {
+                    continue;
+                }
+
                results.push(docid);
            }
+
            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
+
            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
                docids: results,
--- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs
@ -1,6 +1,7 @@
 #![allow(clippy::too_many_arguments)]

 use super::ProximityCondition;
+use crate::proximity::MAX_DISTANCE;
 use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::SearchContext;
@ -35,7 +36,7 @@ pub fn build_edges(
    }

    let mut conditions = vec![];
-    for cost in right_ngram_max..(7 + right_ngram_max) {
+    for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
        conditions.push((
            cost as u32,
            conditions_interner.insert(ProximityCondition::Uninit {
@ -47,7 +48,7 @@ pub fn build_edges(
    }

    conditions.push((
-        (7 + right_ngram_max) as u32,
+        ((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
        conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
    ));

--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@ -273,7 +273,7 @@ fn test_proximity_simple() {
    s.terms_matching_strategy(TermsMatchingStrategy::All);
    s.query("the quick brown fox jumps over the lazy dog");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
    [
@ -282,11 +282,11 @@ fn test_proximity_simple() {
        "\"the quickbrown fox jumps over the lazy dog\"",
        "\"the really quick brown fox jumps over the lazy dog\"",
        "\"the really quick brown fox jumps over the very lazy dog\"",
-        "\"brown quick fox jumps over the lazy dog\"",
        "\"the quick brown fox jumps over the lazy. dog\"",
        "\"dog the quick brown fox jumps over the lazy\"",
-        "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
+        "\"brown quick fox jumps over the lazy dog\"",
        "\"the. quick brown fox jumps over the lazy. dog\"",
+        "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
    ]
    "###);
 }
@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best s");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -382,9 +382,9 @@ fn test_proximity_prefix_db() {
        "\"summer best\"",
        "\"this is the best meal of summer\"",
        "\"summer x best\"",
-        "\"this is the best meal of the summer\"",
        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best cooked meal of the summer\"",
+        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best su");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
        "\"summer best\"",
        "\"this is the best meal of summer\"",
        "\"summer x best\"",
+        "\"this is the best meal I have ever had in such a beautiful summer day\"",
+        "\"this is the best cooked meal of the summer\"",
        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
-        "\"this is the best cooked meal of the summer\"",
-        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);
@ -423,20 +423,20 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best win");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

    insta::assert_debug_snapshot!(texts, @r###"
    [
+        "\"this is the best winter meal\"",
+        "\"winter best\"",
+        "\"this is the best meal of winter\"",
+        "\"winter x best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
-        "\"this is the best meal of winter\"",
-        "\"this is the best winter meal\"",
        "\"winter x y best\"",
-        "\"winter x best\"",
-        "\"winter best\"",
    ]
    "###);

@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wint");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
        "\"winter best\"",
        "\"this is the best meal of winter\"",
        "\"winter x best\"",
+        "\"this is the best meal I have ever had in such a beautiful winter day\"",
+        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
-        "\"this is the best cooked meal of the winter\"",
-        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);

@ -471,7 +471,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wi");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -481,9 +481,9 @@ fn test_proximity_prefix_db() {
        "\"winter best\"",
        "\"this is the best meal of winter\"",
        "\"winter x best\"",
-        "\"this is the best meal of the winter\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
+        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
    ]
    "###);
--- a/milli/src/search/new/tests/proximity_typo.rs
+++ b/milli/src/search/new/tests/proximity_typo.rs
@ -68,8 +68,8 @@ fn test_trap_basic() {
        [
            Proximity(
                Rank {
-                    rank: 8,
-                    max_rank: 8,
+                    rank: 4,
+                    max_rank: 4,
                },
            ),
            Typo(
@ -82,8 +82,8 @@ fn test_trap_basic() {
        [
            Proximity(
                Rank {
-                    rank: 8,
-                    max_rank: 8,
+                    rank: 4,
+                    max_rank: 4,
                },
            ),
            Typo(
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness-4.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness-4.snap
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness.snap
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 57,
-                    max_rank: 57,
+                    rank: 25,
+                    max_rank: 25,
                },
            ),
        ],
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 56,
-                    max_rank: 57,
+                    rank: 24,
+                    max_rank: 25,
                },
            ),
        ],
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 21,
-                    max_rank: 22,
+                    rank: 9,
+                    max_rank: 10,
                },
            ),
        ],
@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 17,
-                    max_rank: 22,
+                    rank: 5,
+                    max_rank: 10,
                },
            ),
        ],
@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 17,
-                    max_rank: 22,
+                    rank: 5,
+                    max_rank: 10,
                },
            ),
        ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-11.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-11.snap
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
 expression: "format!(\"{document_scores:#?}\")"
 ---
 [
-    [
-        Proximity(
-            Rank {
-                rank: 8,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 7,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
                rank: 4,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 3,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +39,31 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +55,15 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -71,7 +63,15 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-5.snap
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
 expression: "format!(\"{document_scores:#?}\")"
 ---
 [
-    [
-        Proximity(
-            Rank {
-                rank: 8,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 7,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
                rank: 4,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 3,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -71,7 +47,31 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
@ -6,8 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 1,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 3,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -15,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -31,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -39,31 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 1,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 1,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-2.snap
@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-5.snap
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-8.snap
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_basic_and_complex1-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_basic_and_complex1-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_complex2-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_complex2-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 15,
+                rank: 4,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 50,
-                max_rank: 50,
+                rank: 22,
+                max_rank: 22,
            },
        ),
    ],
@ -24,132 +24,6 @@ expression: "format!(\"{document_scores:#?}\")"
                max_matching_words: 9,
            },
        ),
-        Proximity(
-            Rank {
-                rank: 50,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 49,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 49,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 48,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 41,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 40,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 8,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 43,
-                max_rank: 43,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 7,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 36,
-                max_rank: 36,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 7,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 31,
-                max_rank: 36,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 5,
-                max_matching_words: 9,
-            },
-        ),
        Proximity(
            Rank {
                rank: 22,
@ -160,14 +34,126 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Words(
            Words {
-                matching_words: 4,
+                matching_words: 9,
                max_matching_words: 9,
            },
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 21,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 21,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 20,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 17,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 16,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 8,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 19,
+                max_rank: 19,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 7,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 16,
+                max_rank: 16,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 7,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 13,
+                max_rank: 16,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 5,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -180,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -194,8 +180,22 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 4,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-5.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 42,
-                max_rank: 43,
+                rank: 18,
+                max_rank: 19,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 42,
-                max_rank: 43,
+                rank: 18,
+                max_rank: 19,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 41,
-                max_rank: 43,
+                rank: 17,
+                max_rank: 19,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 34,
-                max_rank: 43,
+                rank: 14,
+                max_rank: 19,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 33,
-                max_rank: 43,
+                rank: 13,
+                max_rank: 19,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 36,
-                max_rank: 36,
+                rank: 16,
+                max_rank: 16,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 29,
-                max_rank: 29,
+                rank: 13,
+                max_rank: 13,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 24,
-                max_rank: 29,
+                rank: 10,
+                max_rank: 13,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 51,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 48,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 50,
-                max_rank: 50,
+                rank: 22,
+                max_rank: 22,
            },
        ),
    ],
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 38,
-                max_rank: 43,
+                rank: 16,
+                max_rank: 19,
            },
        ),
    ],
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 29,
-                max_rank: 29,
+                rank: 13,
+                max_rank: 13,
            },
        ),
    ],
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-5.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 18,
+                max_rank: 25,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 45,
-                max_rank: 57,
+                rank: 18,
+                max_rank: 25,
            },
        ),
    ],
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 50,
+                rank: 19,
+                max_rank: 22,
            },
        ),
    ],
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 40,
-                max_rank: 43,
+                rank: 16,
+                max_rank: 19,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 35,
-                max_rank: 43,
+                rank: 13,
+                max_rank: 19,
            },
        ),
    ],
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 26,
-                max_rank: 29,
+                rank: 10,
+                max_rank: 13,
            },
        ),
    ],
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 13,
-                max_rank: 15,
+                rank: 5,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_tms_all-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_tms_all-2.snap
@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 51,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 48,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
--- a/milli/src/search/new/tests/stop_words.rs
+++ b/milli/src/search/new/tests/stop_words.rs
@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
            ),
            Proximity(
                Rank {
-                    rank: 7,
-                    max_rank: 8,
+                    rank: 3,
+                    max_rank: 4,
                },
            ),
            Fid(
@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
            ),
            Proximity(
                Rank {
-                    rank: 6,
-                    max_rank: 8,
+                    rank: 2,
+                    max_rank: 4,
                },
            ),
            Fid(
--- a/milli/src/search/new/tests/words_tms.rs
+++ b/milli/src/search/new/tests/words_tms.rs
@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

    // 7 is better than 6 because of the proximity between "the" and its surrounding terms
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
        "\"the quick brown fox jumps over the lazy\"",
        "\"the quick brown fox jumps over the\"",
@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

    // 10 is better than 9 because of the proximity between "quick" and "brown"
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
        "\"the great quick brown fox jumps over the lazy dog\"",
        "\"the quick brown fox jumps over the really lazy dog\"",
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
-        "\"this quick brown and scary fox jumps over the lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
        "\"the quick brown fox jumps over the lazy\"",
        "\"the quick brown fox jumps over the\"",
@ -427,7 +427,7 @@ fn test_words_tms_all() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -439,10 +439,10 @@ fn test_words_tms_all() {
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
    ]
    "###);
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@ -4,9 +4,8 @@ use std::path::Path;

 use roaring::RoaringBitmap;

-use crate::facet::FacetType;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
-use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
+use crate::{make_db_snap_from_iter, obkv_to_json, Index};

 #[track_caller]
 pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@ -98,7 +97,6 @@ Create a snapshot test of the given database.
    - `facet_id_string_docids`
    - `documents_ids`
    - `stop_words`
-    - `soft_deleted_documents_ids`
    - `field_distribution`
    - `fields_ids_map`
    - `geo_faceted_documents_ids`
@ -308,12 +306,6 @@ pub fn snap_stop_words(index: &Index) -> String {
    let snap = format!("{stop_words:?}");
    snap
 }
-pub fn snap_soft_deleted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap();
-
-    display_bitmap(&soft_deleted_documents_ids)
-}
 pub fn snap_field_distributions(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let mut snap = String::new();
@ -340,50 +332,21 @@ pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
 }
 pub fn snap_external_documents_ids(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
-    let ExternalDocumentsIds { soft, hard, .. } = index.external_documents_ids(&rtxn).unwrap();
+    let external_ids = index.external_documents_ids().to_hash_map(&rtxn).unwrap();
+    // ensure fixed order (not guaranteed by hashmap)
+    let mut external_ids: Vec<(String, u32)> = external_ids.into_iter().collect();
+    external_ids.sort_by(|(l, _), (r, _)| l.cmp(r));

    let mut snap = String::new();

-    writeln!(&mut snap, "soft:").unwrap();
-    let stream_soft = soft.stream();
-    let soft_external_ids = stream_soft.into_str_vec().unwrap();
-    for (key, id) in soft_external_ids {
-        writeln!(&mut snap, "{key:<24} {id}").unwrap();
-    }
-    writeln!(&mut snap, "hard:").unwrap();
-    let stream_hard = hard.stream();
-    let hard_external_ids = stream_hard.into_str_vec().unwrap();
-    for (key, id) in hard_external_ids {
+    writeln!(&mut snap, "docids:").unwrap();
+    for (key, id) in external_ids {
        writeln!(&mut snap, "{key:<24} {id}").unwrap();
    }

    snap
 }
-pub fn snap_number_faceted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
-    let mut snap = String::new();
-    for field_id in fields_ids_map.ids() {
-        let number_faceted_documents_ids =
-            index.faceted_documents_ids(&rtxn, field_id, FacetType::Number).unwrap();
-        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&number_faceted_documents_ids))
-            .unwrap();
-    }
-    snap
-}
-pub fn snap_string_faceted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();

-    let mut snap = String::new();
-    for field_id in fields_ids_map.ids() {
-        let string_faceted_documents_ids =
-            index.faceted_documents_ids(&rtxn, field_id, FacetType::String).unwrap();
-        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&string_faceted_documents_ids))
-            .unwrap();
-    }
-    snap
-}
 pub fn snap_words_fst(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let words_fst = index.words_fst(&rtxn).unwrap();
@ -516,9 +479,6 @@ macro_rules! full_snap_of_db {
    ($index:ident, stop_words) => {{
        $crate::snapshot_tests::snap_stop_words(&$index)
    }};
-    ($index:ident, soft_deleted_documents_ids) => {{
-        $crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index)
-    }};
    ($index:ident, field_distribution) => {{
        $crate::snapshot_tests::snap_field_distributions(&$index)
    }};
@ -531,12 +491,6 @@ macro_rules! full_snap_of_db {
    ($index:ident, external_documents_ids) => {{
        $crate::snapshot_tests::snap_external_documents_ids(&$index)
    }};
-    ($index:ident, number_faceted_documents_ids) => {{
-        $crate::snapshot_tests::snap_number_faceted_documents_ids(&$index)
-    }};
-    ($index:ident, string_faceted_documents_ids) => {{
-        $crate::snapshot_tests::snap_string_faceted_documents_ids(&$index)
-    }};
    ($index:ident, words_fst) => {{
        $crate::snapshot_tests::snap_words_fst(&$index)
    }};
--- a/milli/src/update/available_documents_ids.rs
+++ b/milli/src/update/available_documents_ids.rs
@ -8,16 +8,11 @@ pub struct AvailableDocumentsIds {
 }

 impl AvailableDocumentsIds {
-    pub fn from_documents_ids(
-        docids: &RoaringBitmap,
-        soft_deleted_docids: &RoaringBitmap,
-    ) -> AvailableDocumentsIds {
-        let used_docids = docids | soft_deleted_docids;
-
-        match used_docids.max() {
+    pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
+        match docids.max() {
            Some(last_id) => {
                let mut available = RoaringBitmap::from_iter(0..last_id);
-                available -= used_docids;
+                available -= docids;

                let iter = match last_id.checked_add(1) {
                    Some(id) => id..=u32::max_value(),
@ -50,7 +45,7 @@ mod tests {
    #[test]
    fn empty() {
        let base = RoaringBitmap::new();
-        let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = 0..=u32::max_value();
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
@ -63,28 +58,8 @@ mod tests {
        base.insert(100);
        base.insert(405);

-        let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
-
-    #[test]
-    fn soft_deleted() {
-        let mut base = RoaringBitmap::new();
-        base.insert(0);
-        base.insert(10);
-        base.insert(100);
-        base.insert(405);
-
-        let mut soft_deleted = RoaringBitmap::new();
-        soft_deleted.insert(1);
-        soft_deleted.insert(11);
-        soft_deleted.insert(101);
-        soft_deleted.insert(406);
-
-        let left = AvailableDocumentsIds::from_documents_ids(&base, &soft_deleted);
-        let right =
-            (0..=u32::max_value()).filter(|&n| ![0, 1, 10, 11, 100, 101, 405, 406].contains(&n));
-        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
-    }
 }
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@ -1,8 +1,7 @@
 use roaring::RoaringBitmap;
 use time::OffsetDateTime;

-use crate::facet::FacetType;
-use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
+use crate::{FieldDistribution, Index, Result};

 pub struct ClearDocuments<'t, 'u, 'i> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -21,6 +20,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
        let Index {
            env: _env,
            main: _main,
+            external_documents_ids,
            word_docids,
            exact_word_docids,
            word_prefix_docids,
@ -51,36 +51,18 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {

        // We retrieve the number of documents ids that we are deleting.
        let number_of_documents = self.index.number_of_documents(self.wtxn)?;
-        let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;

        // We clean some of the main engine datastructures.
        self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
        self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
-        self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
        self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
-        self.index.put_soft_deleted_documents_ids(self.wtxn, &empty_roaring)?;
        self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
        self.index.delete_geo_rtree(self.wtxn)?;
        self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
        self.index.delete_vector_hnsw(self.wtxn)?;

-        // We clean all the faceted documents ids.
-        for field_id in faceted_fields {
-            self.index.put_faceted_documents_ids(
-                self.wtxn,
-                field_id,
-                FacetType::Number,
-                &empty_roaring,
-            )?;
-            self.index.put_faceted_documents_ids(
-                self.wtxn,
-                field_id,
-                FacetType::String,
-                &empty_roaring,
-            )?;
-        }
-
        // Clear the other databases.
+        external_documents_ids.clear(self.wtxn)?;
        word_docids.clear(self.wtxn)?;
        exact_word_docids.clear(self.wtxn)?;
        word_prefix_docids.clear(self.wtxn)?;
@ -140,7 +122,7 @@ mod tests {

        assert!(index.words_fst(&rtxn).unwrap().is_empty());
        assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
-        assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
+        assert!(index.external_documents_ids().is_empty(&rtxn).unwrap());
        assert!(index.documents_ids(&rtxn).unwrap().is_empty());
        assert!(index.field_distribution(&rtxn).unwrap().is_empty());
        assert!(index.geo_rtree(&rtxn).unwrap().is_none());
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@ -1,9 +1,9 @@
-use std::borrow::Cow;
 use std::fs::File;
+use std::io::BufReader;

 use grenad::CompressionType;
 use heed::types::ByteSlice;
-use heed::{BytesEncode, Error, RoTxn, RwTxn};
+use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
 use roaring::RoaringBitmap;

 use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
@ -12,17 +12,15 @@ use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
 };
 use crate::heed_codec::ByteSliceRefCodec;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
 use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
-use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
+use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};

 /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
 /// by rebuilding the database "from scratch".
 ///
 /// First, the new elements are inserted into the level 0 of the database. Then, the
 /// higher levels are cleared and recomputed from the content of level 0.
-///
-/// Finally, the `faceted_documents_ids` value in the main database of `Index`
-/// is updated to contain the new set of faceted documents.
 pub struct FacetsUpdateBulk<'i> {
    index: &'i Index,
    group_size: u8,
@ -30,7 +28,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    new_data: Option<grenad::Reader<File>>,
+    delta_data: Option<grenad::Reader<BufReader<File>>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@ -38,7 +36,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        new_data: grenad::Reader<File>,
+        delta_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@ -48,7 +46,7 @@ impl<'i> FacetsUpdateBulk<'i> {
            group_size,
            min_level_size,
            facet_type,
-            new_data: Some(new_data),
+            delta_data: Some(delta_data),
        }
    }

@ -63,13 +61,13 @@ impl<'i> FacetsUpdateBulk<'i> {
            group_size: FACET_GROUP_SIZE,
            min_level_size: FACET_MIN_LEVEL_SIZE,
            facet_type,
-            new_data: None,
+            delta_data: None,
        }
    }

    #[logging_timer::time("FacetsUpdateBulk::{}")]
    pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
-        let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
+        let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;

        let db = match facet_type {
            FacetType::String => index
@ -80,12 +78,9 @@ impl<'i> FacetsUpdateBulk<'i> {
            }
        };

-        let inner = FacetsUpdateBulkInner { db, new_data, group_size, min_level_size };
+        let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size };

-        inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| {
-            index.put_faceted_documents_ids(wtxn, field_id, facet_type, &all_docids)?;
-            Ok(())
-        })?;
+        inner.update(wtxn, &field_ids)?;

        Ok(())
    }
@ -94,26 +89,19 @@ impl<'i> FacetsUpdateBulk<'i> {
 /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
 pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
    pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
-    pub new_data: Option<grenad::Reader<R>>,
+    pub delta_data: Option<grenad::Reader<R>>,
    pub group_size: u8,
    pub min_level_size: u8,
 }
 impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
-    pub fn update(
-        mut self,
-        wtxn: &mut RwTxn,
-        field_ids: &[u16],
-        mut handle_all_docids: impl FnMut(&mut RwTxn, FieldId, RoaringBitmap) -> Result<()>,
-    ) -> Result<()> {
+    pub fn update(mut self, wtxn: &mut RwTxn, field_ids: &[u16]) -> Result<()> {
        self.update_level0(wtxn)?;
        for &field_id in field_ids.iter() {
            self.clear_levels(wtxn, field_id)?;
        }

        for &field_id in field_ids.iter() {
-            let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, wtxn)?;
-
-            handle_all_docids(wtxn, field_id, all_docids)?;
+            let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;

            for level_reader in level_readers {
                let mut cursor = level_reader.into_cursor()?;
@ -133,20 +121,26 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        Ok(())
    }

-    // TODO the new_data is an Reader<Obkv<Key, Obkv<DelAdd, RoaringBitmap>>>
    fn update_level0(&mut self, wtxn: &mut RwTxn) -> Result<()> {
-        let new_data = match self.new_data.take() {
+        let delta_data = match self.delta_data.take() {
            Some(x) => x,
            None => return Ok(()),
        };
        if self.db.is_empty(wtxn)? {
            let mut buffer = Vec::new();
            let mut database = self.db.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
-            let mut cursor = new_data.into_cursor()?;
+            let mut cursor = delta_data.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
                if !valid_lmdb_key(key) {
                    continue;
                }
+                let value = KvReaderDelAdd::new(value);
+
+                // DB is empty, it is safe to ignore Del operations
+                let Some(value) = value.get(DelAdd::Addition) else {
+                    continue;
+                };
+
                buffer.clear();
                // the group size for level 0
                buffer.push(1);
@ -158,11 +152,14 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
            let mut buffer = Vec::new();
            let database = self.db.remap_types::<ByteSlice, ByteSlice>();

-            let mut cursor = new_data.into_cursor()?;
+            let mut cursor = delta_data.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
                if !valid_lmdb_key(key) {
                    continue;
                }
+
+                let value = KvReaderDelAdd::new(value);
+
                // the value is a CboRoaringBitmap, but I still need to prepend the
                // group size for level 0 (= 1) to it
                buffer.clear();
@ -170,17 +167,27 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
                // then we extend the buffer with the docids bitmap
                match database.get(wtxn, key)? {
                    Some(prev_value) => {
+                        // prev_value is the group size for level 0, followed by the previous bitmap.
                        let old_bitmap = &prev_value[1..];
-                        CboRoaringBitmapCodec::merge_into(
-                            &[Cow::Borrowed(value), Cow::Borrowed(old_bitmap)],
-                            &mut buffer,
-                        )?;
+                        CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
                    }
                    None => {
+                        // it is safe to ignore the del in that case.
+                        let Some(value) = value.get(DelAdd::Addition) else {
+                            // won't put the key in DB as the value would be empty
+                            continue;
+                        };
+
                        buffer.extend_from_slice(value);
                    }
                };
-                database.put(wtxn, key, &buffer)?;
+                let new_bitmap = &buffer[1..];
+                // if the new bitmap is empty, let's remove it
+                if CboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
+                    database.delete(wtxn, key)?;
+                } else {
+                    database.put(wtxn, key, &buffer)?;
+                }
            }
        }
        Ok(())
@ -189,16 +196,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        &self,
        field_id: FieldId,
        txn: &RoTxn,
-    ) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
-        let mut all_docids = RoaringBitmap::new();
-        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
-            for bitmap in bitmaps {
-                all_docids |= bitmap;
-            }
-            Ok(())
-        })?;
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
+        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |_, _| Ok(()))?;

-        Ok((subwriters, all_docids))
+        Ok(subwriters)
    }
    #[allow(clippy::type_complexity)]
    fn read_level_0<'t>(
@ -261,7 +262,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        field_id: u16,
        level: u8,
        handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
-    ) -> Result<Vec<grenad::Reader<File>>> {
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
        if level == 0 {
            self.read_level_0(rtxn, field_id, handle_group)?;
            // Level 0 is already in the database
@ -492,7 +493,6 @@ mod tests {
        index.add_documents(documents).unwrap();

        db_snap!(index, facet_id_f64_docids, "initial", @"c34f499261f3510d862fa0283bbe843a");
-        db_snap!(index, number_faceted_documents_ids, "initial", @"01594fecbb316798ce3651d6730a4521");
    }

    #[test]
--- a/milli/src/update/facet/delete.rs
+++ b/milli/src/update/facet/delete.rs
@ -1,360 +0,0 @@
-use std::collections::{HashMap, HashSet};
-
-use heed::RwTxn;
-use log::debug;
-use roaring::RoaringBitmap;
-use time::OffsetDateTime;
-
-use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
-use crate::facet::FacetType;
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
-use crate::heed_codec::ByteSliceRefCodec;
-use crate::update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner};
-use crate::{FieldId, Index, Result};
-
-/// A builder used to remove elements from the `facet_id_string_docids` or `facet_id_f64_docids` databases.
-///
-/// Depending on the number of removed elements and the existing size of the database, we use either
-/// a bulk delete method or an incremental delete method.
-pub struct FacetsDelete<'i, 'b> {
-    index: &'i Index,
-    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
-    facet_type: FacetType,
-    affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
-    docids_to_delete: &'b RoaringBitmap,
-    group_size: u8,
-    max_group_size: u8,
-    min_level_size: u8,
-}
-impl<'i, 'b> FacetsDelete<'i, 'b> {
-    pub fn new(
-        index: &'i Index,
-        facet_type: FacetType,
-        affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
-        docids_to_delete: &'b RoaringBitmap,
-    ) -> Self {
-        let database = match facet_type {
-            FacetType::String => index
-                .facet_id_string_docids
-                .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
-            FacetType::Number => {
-                index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
-            }
-        };
-        Self {
-            index,
-            database,
-            facet_type,
-            affected_facet_values,
-            docids_to_delete,
-            group_size: FACET_GROUP_SIZE,
-            max_group_size: FACET_MAX_GROUP_SIZE,
-            min_level_size: FACET_MIN_LEVEL_SIZE,
-        }
-    }
-
-    pub fn execute(self, wtxn: &mut RwTxn) -> Result<()> {
-        debug!("Computing and writing the facet values levels docids into LMDB on disk...");
-        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
-
-        for (field_id, affected_facet_values) in self.affected_facet_values {
-            // This is an incorrect condition, since we assume that the length of the database is equal
-            // to the number of facet values for the given field_id. It means that in some cases, we might
-            // wrongly choose the incremental indexer over the bulk indexer. But the only case where that could
-            // really be a performance problem is when we fully delete a large ratio of all facet values for
-            // each field id. This would almost never happen. Still, to be overly cautious, I have added a
-            // 2x penalty to the incremental indexer. That is, instead of assuming a 70x worst-case performance
-            // penalty to the incremental indexer, we assume a 150x worst-case performance penalty instead.
-            if affected_facet_values.len() >= (self.database.len(wtxn)? / 150) {
-                // Bulk delete
-                let mut modified = false;
-
-                for facet_value in affected_facet_values {
-                    let key =
-                        FacetGroupKey { field_id, level: 0, left_bound: facet_value.as_slice() };
-                    let mut old = self.database.get(wtxn, &key)?.unwrap();
-                    let previous_len = old.bitmap.len();
-                    old.bitmap -= self.docids_to_delete;
-                    if old.bitmap.is_empty() {
-                        modified = true;
-                        self.database.delete(wtxn, &key)?;
-                    } else if old.bitmap.len() != previous_len {
-                        modified = true;
-                        self.database.put(wtxn, &key, &old)?;
-                    }
-                }
-                if modified {
-                    let builder = FacetsUpdateBulk::new_not_updating_level_0(
-                        self.index,
-                        vec![field_id],
-                        self.facet_type,
-                    );
-                    builder.execute(wtxn)?;
-                }
-            } else {
-                // Incremental
-                let inc = FacetsUpdateIncrementalInner {
-                    db: self.database,
-                    group_size: self.group_size,
-                    min_level_size: self.min_level_size,
-                    max_group_size: self.max_group_size,
-                };
-                for facet_value in affected_facet_values {
-                    inc.delete(wtxn, field_id, facet_value.as_slice(), self.docids_to_delete)?;
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::iter::FromIterator;
-
-    use big_s::S;
-    use maplit::hashset;
-    use rand::seq::SliceRandom;
-    use rand::SeedableRng;
-    use roaring::RoaringBitmap;
-
-    use crate::db_snap;
-    use crate::documents::documents_batch_reader_from_objects;
-    use crate::index::tests::TempIndex;
-    use crate::update::facet::test_helpers::ordered_string;
-    use crate::update::{DeleteDocuments, DeletionStrategy};
-
-    #[test]
-    fn delete_mixed_incremental_and_bulk() {
-        // The point of this test is to create an index populated with documents
-        // containing different filterable attributes. Then, we delete a bunch of documents
-        // such that a mix of the incremental and bulk indexer is used (depending on the field id)
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": i / 10,
-                        "colour": i / 100,
-                        "timestamp": i / 2,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, 1, @"550cd138d6fe31ccdd42cd5392fbd576");
-        db_snap!(index, number_faceted_documents_ids, 1, @"9a0ea88e7c9dcf6dc0ef0b601736ffcf");
-
-        let mut wtxn = index.env.write_txn().unwrap();
-
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_documents(&RoaringBitmap::from_iter(0..100));
-        // by deleting the first 100 documents, we expect that:
-        // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
-        // - the "label" part will be updated incrementally, since #affected_facet_value = 10 which is < 13
-        // - the "colour" part will also be updated incrementally, since #affected_values = 1 which is < 13
-        // - the "timestamp" part will be updated in bulk, since #affected_values = 50 which is > 13
-        // This has to be verified manually by inserting breakpoint/adding print statements to the code when running the test
-        builder.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_f64_docids, 2, @"d4d5f14e7f1e1f09b86821a0b6defcc6");
-        db_snap!(index, number_faceted_documents_ids, 2, @"3570e0ac0fdb21be9ebe433f59264b56");
-    }
-
-    // Same test as above but working with string values for the facets
-    #[test]
-    fn delete_mixed_incremental_and_bulk_string() {
-        // The point of this test is to create an index populated with documents
-        // containing different filterable attributes. Then, we delete a bunch of documents
-        // such that a mix of the incremental and bulk indexer is used (depending on the field id)
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": ordered_string(i / 10),
-                        "colour": ordered_string(i / 100),
-                        "timestamp": ordered_string(i / 2),
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022)
-        db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503");
-        db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5");
-
-        let mut wtxn = index.env.write_txn().unwrap();
-
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_documents(&RoaringBitmap::from_iter(0..100));
-        // by deleting the first 100 documents, we expect that:
-        // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
-        // - the "label" part will be updated incrementally, since #affected_facet_value = 10 which is < 13
-        // - the "colour" part will also be updated incrementally, since #affected_values = 1 which is < 13
-        // - the "timestamp" part will be updated in bulk, since #affected_values = 50 which is > 13
-        // This has to be verified manually by inserting breakpoint/adding print statements to the code when running the test
-        builder.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_string_docids, 2, @"7f9c00b29e04d58c1821202a5dda0ebc");
-        db_snap!(index, string_faceted_documents_ids, 2, @"504152afa5c94fd4e515dcdfa4c7161f");
-    }
-
-    #[test]
-    fn delete_almost_all_incrementally_string() {
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": ordered_string(i / 10),
-                        "colour": ordered_string(i / 100),
-                        "timestamp": ordered_string(i / 2),
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022)
-        db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503");
-        db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5");
-
-        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
-
-        let mut docids_to_delete = (0..1000).collect::<Vec<_>>();
-        docids_to_delete.shuffle(&mut rng);
-        for docid in docids_to_delete.into_iter().take(990) {
-            let mut wtxn = index.env.write_txn().unwrap();
-            let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-            builder.strategy(DeletionStrategy::AlwaysHard);
-            builder.delete_documents(&RoaringBitmap::from_iter([docid]));
-            builder.execute().unwrap();
-            wtxn.commit().unwrap();
-        }
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_string_docids, 2, @"ece56086e76d50e661fb2b58475b9f7d");
-        db_snap!(index, string_faceted_documents_ids, 2, @r###"
-        0   []
-        1   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ]
-        2   [292, 324, 358, 381, 493, 839, 852, ]
-        3   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ]
-        "###);
-    }
-}
-
-#[allow(unused)]
-#[cfg(test)]
-mod comparison_bench {
-    use std::iter::once;
-
-    use rand::Rng;
-    use roaring::RoaringBitmap;
-
-    use crate::heed_codec::facet::OrderedF64Codec;
-    use crate::update::facet::test_helpers::FacetIndex;
-
-    // This is a simple test to get an intuition on the relative speed
-    // of the incremental vs. bulk indexer.
-    //
-    // The benchmark shows the worst-case scenario for the incremental indexer, since
-    // each facet value contains only one document ID.
-    //
-    // In that scenario, it appears that the incremental indexer is about 70 times slower than the
-    // bulk indexer.
-    // #[test]
-    fn benchmark_facet_indexing_delete() {
-        let mut r = rand::thread_rng();
-
-        for i in 1..=20 {
-            let size = 50_000 * i;
-            let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
-
-            let mut txn = index.env.write_txn().unwrap();
-            let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
-            for i in 0..size {
-                // field id = 0, left_bound = i, docids = [i]
-                elements.push(((0, i as f64), once(i).collect()));
-            }
-            let timer = std::time::Instant::now();
-            index.bulk_insert(&mut txn, &[0], elements.iter());
-            let time_spent = timer.elapsed().as_millis();
-            println!("bulk {size} : {time_spent}ms");
-
-            txn.commit().unwrap();
-
-            for nbr_doc in [1, 100, 1000, 10_000] {
-                let mut txn = index.env.write_txn().unwrap();
-                let timer = std::time::Instant::now();
-                //
-                // delete one document
-                //
-                for _ in 0..nbr_doc {
-                    let deleted_u32 = r.gen::<u32>() % size;
-                    let deleted_f64 = deleted_u32 as f64;
-                    index.delete_single_docid(&mut txn, 0, &deleted_f64, deleted_u32)
-                }
-                let time_spent = timer.elapsed().as_millis();
-                println!("    delete {nbr_doc} : {time_spent}ms");
-                txn.abort().unwrap();
-            }
-        }
-    }
-}
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@ -1,8 +1,9 @@
-use std::collections::HashMap;
 use std::fs::File;
+use std::io::BufReader;

 use heed::types::{ByteSlice, DecodeIgnore};
 use heed::{BytesDecode, Error, RoTxn, RwTxn};
+use obkv::KvReader;
 use roaring::RoaringBitmap;

 use crate::facet::FacetType;
@ -11,8 +12,9 @@ use crate::heed_codec::facet::{
 };
 use crate::heed_codec::ByteSliceRefCodec;
 use crate::search::facet::get_highest_level;
+use crate::update::del_add::DelAdd;
 use crate::update::index_documents::valid_lmdb_key;
-use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
+use crate::{CboRoaringBitmapCodec, Index, Result};

 enum InsertionResult {
    InPlace,
@ -27,27 +29,21 @@ enum DeletionResult {

 /// Algorithm to incrementally insert and delete elememts into the
 /// `facet_id_(string/f64)_docids` databases.
-///
-/// Rhe `faceted_documents_ids` value in the main database of `Index`
-/// is also updated to contain the new set of faceted documents.
-pub struct FacetsUpdateIncremental<'i> {
-    index: &'i Index,
+pub struct FacetsUpdateIncremental {
    inner: FacetsUpdateIncrementalInner,
-    facet_type: FacetType,
-    new_data: grenad::Reader<File>,
+    delta_data: grenad::Reader<BufReader<File>>,
 }

-impl<'i> FacetsUpdateIncremental<'i> {
+impl FacetsUpdateIncremental {
    pub fn new(
-        index: &'i Index,
+        index: &Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<File>,
+        delta_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
    ) -> Self {
        FacetsUpdateIncremental {
-            index,
            inner: FacetsUpdateIncrementalInner {
                db: match facet_type {
                    FacetType::String => index
@ -61,31 +57,41 @@ impl<'i> FacetsUpdateIncremental<'i> {
                max_group_size,
                min_level_size,
            },
-            facet_type,
-            new_data,
+            delta_data,
        }
    }

-    pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
-        let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
-
-        let mut cursor = self.new_data.into_cursor()?;
+    pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
+        let mut cursor = self.delta_data.into_cursor()?;
        while let Some((key, value)) = cursor.move_on_next()? {
            if !valid_lmdb_key(key) {
                continue;
            }
            let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
                .ok_or(heed::Error::Encoding)?;
-            let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
-            self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
-            *new_faceted_docids.entry(key.field_id).or_default() |= docids;
+            let value = KvReader::new(value);
+
+            let docids_to_delete = value
+                .get(DelAdd::Deletion)
+                .map(CboRoaringBitmapCodec::bytes_decode)
+                .map(|o| o.ok_or(heed::Error::Encoding));
+
+            let docids_to_add = value
+                .get(DelAdd::Addition)
+                .map(CboRoaringBitmapCodec::bytes_decode)
+                .map(|o| o.ok_or(heed::Error::Encoding));
+
+            if let Some(docids_to_delete) = docids_to_delete {
+                let docids_to_delete = docids_to_delete?;
+                self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?;
+            }
+
+            if let Some(docids_to_add) = docids_to_add {
+                let docids_to_add = docids_to_add?;
+                self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?;
+            }
        }

-        for (field_id, new_docids) in new_faceted_docids {
-            let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
-            docids |= new_docids;
-            self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?;
-        }
        Ok(())
    }
 }
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -14,7 +14,7 @@ The databases must be able to return results for queries such as:
 The algorithms that implement these queries are found in the `src/search/facet` folder.

 To make these queries fast to compute, the database adopts a tree structure:
-```ignore
+```text
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
 ┌───────┐   │           "ab" (2)            │           "gaf" (2)           │   "woz" (1)   │
 │Level 2│   │                               │                               │               │
@ -41,7 +41,7 @@ These documents all contain a facet value that is contained within `ab .. gaf`.
 In the database, each node is represented by a key/value pair encoded as a [`FacetGroupKey`] and a
 [`FacetGroupValue`], which have the following format:

-```ignore
+```text
 FacetGroupKey:
 - field id  : u16
 - level     : u8
@ -78,6 +78,7 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;

 use std::collections::BTreeSet;
 use std::fs::File;
+use std::io::BufReader;
 use std::iter::FromIterator;

 use charabia::normalizer::{Normalize, NormalizerOption};
@ -97,7 +98,6 @@ use crate::update::merge_btreeset_string;
 use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};

 pub mod bulk;
-pub mod delete;
 pub mod incremental;

 /// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
@ -108,14 +108,17 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    new_data: grenad::Reader<File>,
+    delta_data: grenad::Reader<BufReader<File>>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
 }
 impl<'i> FacetsUpdate<'i> {
-    // TODO grenad::Reader<Key, Obkv<DelAdd, RoaringBitmap>>
-    pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
+    pub fn new(
+        index: &'i Index,
+        facet_type: FacetType,
+        delta_data: grenad::Reader<BufReader<File>>,
+    ) -> Self {
        let database = match facet_type {
            FacetType::String => index
                .facet_id_string_docids
@ -131,26 +134,26 @@ impl<'i> FacetsUpdate<'i> {
            max_group_size: FACET_MAX_GROUP_SIZE,
            min_level_size: FACET_MIN_LEVEL_SIZE,
            facet_type,
-            new_data,
+            delta_data,
        }
    }

    pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
-        if self.new_data.is_empty() {
+        if self.delta_data.is_empty() {
            return Ok(());
        }
        debug!("Computing and writing the facet values levels docids into LMDB on disk...");
        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;

        // See self::comparison_bench::benchmark_facet_indexing
-        if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
+        if self.delta_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
            let field_ids =
                self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
            let bulk_update = FacetsUpdateBulk::new(
                self.index,
                field_ids,
                self.facet_type,
-                self.new_data,
+                self.delta_data,
                self.group_size,
                self.min_level_size,
            );
@ -159,7 +162,7 @@ impl<'i> FacetsUpdate<'i> {
            let incremental_update = FacetsUpdateIncremental::new(
                self.index,
                self.facet_type,
-                self.new_data,
+                self.delta_data,
                self.group_size,
                self.min_level_size,
                self.max_group_size,
@ -275,6 +278,7 @@ pub(crate) mod test_helpers {
    use crate::heed_codec::ByteSliceRefCodec;
    use crate::search::facet::get_highest_level;
    use crate::snapshot_tests::display_bitmap;
+    use crate::update::del_add::{DelAdd, KvWriterDelAdd};
    use crate::update::FacetsUpdateIncrementalInner;
    use crate::CboRoaringBitmapCodec;

@ -451,20 +455,22 @@ pub(crate) mod test_helpers {
                let key: FacetGroupKey<&[u8]> =
                    FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
                let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
+                let mut inner_writer = KvWriterDelAdd::memory();
                let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
-                writer.insert(&key, &value).unwrap();
+                inner_writer.insert(DelAdd::Addition, value).unwrap();
+                writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
            }
            writer.finish().unwrap();
            let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();

            let update = FacetsUpdateBulkInner {
                db: self.content,
-                new_data: Some(reader),
+                delta_data: Some(reader),
                group_size: self.group_size.get(),
                min_level_size: self.min_level_size.get(),
            };

-            update.update(wtxn, field_ids, |_, _, _| Ok(())).unwrap();
+            update.update(wtxn, field_ids).unwrap();
        }

        pub fn verify_structure_validity(&self, txn: &RoTxn, field_id: u16) {
@ -552,101 +558,6 @@ pub(crate) mod test_helpers {
    }
 }

-#[cfg(test)]
-mod tests {
-    use big_s::S;
-    use maplit::hashset;
-
-    use crate::db_snap;
-    use crate::documents::documents_batch_reader_from_objects;
-    use crate::index::tests::TempIndex;
-    use crate::update::DeletionStrategy;
-
-    #[test]
-    fn replace_all_identical_soft_deletion_then_hard_deletion() {
-        let mut index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
-
-        index
-            .update_settings(|settings| {
-                settings.set_primary_key("id".to_owned());
-                settings.set_filterable_fields(hashset! { S("size") });
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "initial", @"777e0e221d778764b472c512617eeb3b");
-        db_snap!(index, number_faceted_documents_ids, "initial", @"bd916ef32b05fd5c3c4c518708f431a9");
-        db_snap!(index, soft_deleted_documents_ids, "initial", @"[]");
-
-        let mut documents = vec![];
-        for i in 0..999 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                        "other": 0,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "replaced_1_soft", @"abba175d7bed727d0efadaef85a4388f");
-        db_snap!(index, number_faceted_documents_ids, "replaced_1_soft", @"de76488bd05ad94c6452d725acf1bd06");
-        db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123");
-
-        // Then replace the last document while disabling soft_deletion
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
-        let mut documents = vec![];
-        for i in 999..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                        "other": 0,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "replaced_2_hard", @"029e27a46d09c574ae949aa4289b45e6");
-        db_snap!(index, number_faceted_documents_ids, "replaced_2_hard", @"60b19824f136affe6b240a7200779028");
-        db_snap!(index, soft_deleted_documents_ids, "replaced_2_hard", @"[]");
-    }
-}
-
 #[allow(unused)]
 #[cfg(test)]
 mod comparison_bench {
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@ -1,4 +1,4 @@
-use std::io::{Read, Seek};
+use std::io::{BufWriter, Read, Seek};
 use std::result::Result as StdResult;
 use std::{fmt, iter};

@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(

    let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();

-    let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
+    let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
    let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];

    // The primary key *field id* that has already been set for this index or the one
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -1,6 +1,7 @@
 use std::collections::{HashMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, mem, str};

 use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
@ -29,7 +30,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
-) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
+) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
    puffin::profile_function!();

    let max_positions_per_attributes = max_positions_per_attributes
@ -55,7 +56,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    let mut value_buffer = Vec::new();

    // initialize tokenizer.
-    let mut builder = tokenizer_builder(stop_words, dictionary, allowed_separators, None);
+    let mut builder = tokenizer_builder(stop_words, allowed_separators, dictionary, None);
    let tokenizer = builder.build();

    // iterate over documents.
@ -114,6 +115,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        let (add_obkv, add_script_language_word_count) = add?;

        // merge deletions and additions.
+        // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
        value_buffer.clear();
        del_add_from_two_obkvs(
            KvReader::<FieldId>::new(del_obkv),
@ -121,8 +123,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            &mut value_buffer,
        )?;

-        // write them into the sorter.
-        let obkv = KvReader::<FieldId>::new(value);
+        // write each KV<DelAdd, KV<u16, String>> into the sorter, field by field.
+        let obkv = KvReader::<FieldId>::new(&value_buffer);
        for (field_id, value) in obkv.iter() {
            key_buffer.truncate(mem::size_of::<u32>());
            key_buffer.extend_from_slice(&field_id.to_be_bytes());
@ -150,8 +152,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        }
    }

+    // the returned sorter is serialized as: key: (DocId, FieldId), value: KV<DelAdd, KV<u16, String>>.
    sorter_into_reader(docid_word_positions_sorter, indexer)
-        .map(|reader| (documents_ids, reader, script_language_docids))
+        .map(|reader| (reader, script_language_docids))
 }

 /// Check if any searchable fields of a document changed.
@ -195,7 +198,7 @@ fn tokenizer_builder<'a>(
    }

    if let Some(script_language) = script_language {
-        tokenizer_builder.allow_list(&script_language);
+        tokenizer_builder.allow_list(script_language);
    }

    tokenizer_builder
@ -203,6 +206,7 @@ fn tokenizer_builder<'a>(

 /// Extract words maped with their positions of a document,
 /// ensuring no Language detection mistakes was made.
+#[allow(clippy::too_many_arguments)] // FIXME: consider grouping arguments in a struct
 fn lang_safe_tokens_from_document<'a>(
    obkv: &KvReader<FieldId>,
    searchable_fields: &Option<HashSet<FieldId>>,
@ -217,9 +221,9 @@ fn lang_safe_tokens_from_document<'a>(
    let mut script_language_word_count = HashMap::new();

    tokens_from_document(
-        &obkv,
+        obkv,
        searchable_fields,
-        &tokenizer,
+        tokenizer,
        max_positions_per_attributes,
        del_add,
        buffers,
@ -244,8 +248,8 @@ fn lang_safe_tokens_from_document<'a>(
            // build a new temporary tokenizer including the allow list.
            let mut builder = tokenizer_builder(
                stop_words,
-                dictionary,
                allowed_separators,
+                dictionary,
                Some(&script_language),
            );
            let tokenizer = builder.build();
@ -254,7 +258,7 @@ fn lang_safe_tokens_from_document<'a>(

            // rerun the extraction.
            tokens_from_document(
-                &obkv,
+                obkv,
                searchable_fields,
                &tokenizer,
                max_positions_per_attributes,
@ -265,6 +269,7 @@ fn lang_safe_tokens_from_document<'a>(
        }
    }

+    // returns a (KV<FieldId, KV<u16, String>>, HashMap<Script, Vec<(Language, usize)>>)
    Ok((&buffers.obkv_buffer, script_language_word_count))
 }

@ -330,6 +335,7 @@ fn tokens_from_document<'a>(
        }
    }

+    // returns a KV<FieldId, KV<u16, String>>
    Ok(document_writer.into_inner().map(|v| v.as_slice())?)
 }

--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use heed::{BytesDecode, BytesEncode};

@ -20,7 +20,7 @@ use crate::Result;
 pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
    fid_docid_facet_number: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -1,4 +1,5 @@
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, str};

 use heed::BytesEncode;
@ -18,7 +19,7 @@ use crate::{FieldId, Result};
 pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
    docid_fid_facet_string: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -2,7 +2,7 @@ use std::borrow::Cow;
 use std::collections::{BTreeMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};
 use std::mem::size_of;
 use std::result::Result as StdResult;

@ -29,11 +29,11 @@ const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();

 /// The extracted facet values stored in grenad files by type.
 pub struct ExtractedFacetValues {
-    pub fid_docid_facet_numbers_chunk: grenad::Reader<File>,
-    pub fid_docid_facet_strings_chunk: grenad::Reader<File>,
-    pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
-    pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
-    pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
+    pub fid_docid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_docid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
 }

 /// Extracts the facet values of each faceted field of each document.
@ -102,11 +102,11 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(

                let del_add_obkv = obkv::KvReader::new(field_bytes);
                let del_value = match del_add_obkv.get(DelAdd::Deletion) {
-                    Some(bytes) => from_slice(bytes).map_err(InternalError::SerdeJson)?,
+                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
                    None => None,
                };
                let add_value = match del_add_obkv.get(DelAdd::Addition) {
-                    Some(bytes) => from_slice(bytes).map_err(InternalError::SerdeJson)?,
+                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
                    None => None,
                };

--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@ -1,14 +1,15 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
    GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::Result;

 const MAX_COUNTED_WORDS: usize = 30;
@ -22,14 +23,14 @@ const MAX_COUNTED_WORDS: usize = 30;
 pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

    let mut fid_word_count_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
@ -37,18 +38,52 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
    );

    let mut key_buffer = Vec::new();
+    let mut value_buffer = Vec::new();
    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
        let (document_id_bytes, fid_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let word_count = KvReaderU16::new(&value).iter().take(MAX_COUNTED_WORDS + 1).count();
-        if word_count <= MAX_COUNTED_WORDS {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(fid_bytes);
-            key_buffer.push(word_count as u8);
-            fid_word_count_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        let del_add_reader = KvReaderDelAdd::new(value);
+        let deletion = del_add_reader
+            // get deleted words
+            .get(DelAdd::Deletion)
+            // count deleted words
+            .map(|deletion| KvReaderU16::new(deletion).iter().take(MAX_COUNTED_WORDS + 1).count())
+            // keep the count if under or equal to MAX_COUNTED_WORDS
+            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
+        let addition = del_add_reader
+            // get added words
+            .get(DelAdd::Addition)
+            // count added words
+            .map(|addition| KvReaderU16::new(addition).iter().take(MAX_COUNTED_WORDS + 1).count())
+            // keep the count if under or equal to MAX_COUNTED_WORDS
+            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
+
+        if deletion != addition {
+            // Insert deleted word count in sorter if exist.
+            if let Some(word_count) = deletion {
+                value_buffer.clear();
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                key_buffer.clear();
+                key_buffer.extend_from_slice(fid_bytes);
+                key_buffer.push(word_count as u8);
+                fid_word_count_docids_sorter
+                    .insert(&key_buffer, value_writer.into_inner().unwrap())?;
+            }
+            // Insert added word count in sorter if exist.
+            if let Some(word_count) = addition {
+                value_buffer.clear();
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key_buffer.clear();
+                key_buffer.extend_from_slice(fid_bytes);
+                key_buffer.push(word_count as u8);
+                fid_word_count_docids_sorter
+                    .insert(&key_buffer, value_writer.into_inner().unwrap())?;
+            }
        }
    }

--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -1,11 +1,12 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use concat_arrays::concat_arrays;
 use serde_json::Value;

 use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::GeoError;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::extract_finite_float_from_value;
 use crate::{FieldId, InternalError, Result};

@ -18,7 +19,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    primary_key_id: FieldId,
    (lat_fid, lng_fid): (FieldId, FieldId),
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let mut writer = create_writer(
@ -30,39 +31,71 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
        let obkv = obkv::KvReader::new(value);
-        // since we only needs the primary key when we throw an error we create this getter to
-        // lazily get it when needed
+        // since we only need the primary key when we throw an error
+        // we create this getter to lazily get it when needed
        let document_id = || -> Value {
            let document_id = obkv.get(primary_key_id).unwrap();
            serde_json::from_slice(document_id).unwrap()
        };

        // first we get the two fields
-        let lat = obkv.get(lat_fid);
-        let lng = obkv.get(lng_fid);
+        match (obkv.get(lat_fid), obkv.get(lng_fid)) {
+            (Some(lat), Some(lng)) => {
+                let deladd_lat_obkv = KvReaderDelAdd::new(lat);
+                let deladd_lng_obkv = KvReaderDelAdd::new(lng);

-        if let Some((lat, lng)) = lat.zip(lng) {
-            // then we extract the values
-            let lat = extract_finite_float_from_value(
-                serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
-            )
-            .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+                // then we extract the values
+                let del_lat_lng = deladd_lat_obkv
+                    .get(DelAdd::Deletion)
+                    .zip(deladd_lng_obkv.get(DelAdd::Deletion))
+                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
+                    .transpose()?;
+                let add_lat_lng = deladd_lat_obkv
+                    .get(DelAdd::Addition)
+                    .zip(deladd_lng_obkv.get(DelAdd::Addition))
+                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
+                    .transpose()?;

-            let lng = extract_finite_float_from_value(
-                serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
-            )
-            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
-
-            #[allow(clippy::drop_non_drop)]
-            let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
-            writer.insert(docid_bytes, bytes)?;
-        } else if lat.is_none() && lng.is_some() {
-            return Err(GeoError::MissingLatitude { document_id: document_id() })?;
-        } else if lat.is_some() && lng.is_none() {
-            return Err(GeoError::MissingLongitude { document_id: document_id() })?;
+                if del_lat_lng != add_lat_lng {
+                    let mut obkv = KvWriterDelAdd::memory();
+                    if let Some([lat, lng]) = del_lat_lng {
+                        #[allow(clippy::drop_non_drop)]
+                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                        obkv.insert(DelAdd::Deletion, bytes)?;
+                    }
+                    if let Some([lat, lng]) = add_lat_lng {
+                        #[allow(clippy::drop_non_drop)]
+                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                        obkv.insert(DelAdd::Addition, bytes)?;
+                    }
+                    let bytes = obkv.into_inner()?;
+                    writer.insert(docid_bytes, bytes)?;
+                }
+            }
+            (None, Some(_)) => {
+                return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+            }
+            (Some(_), None) => {
+                return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
+            }
+            (None, None) => (),
        }
-        // else => the _geo object was `null`, there is nothing to do
    }

    writer_into_reader(writer)
 }
+
+/// Extract the finite floats lat and lng from two bytes slices.
+fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
+    let lat = extract_finite_float_from_value(
+        serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
+    )
+    .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+
+    let lng = extract_finite_float_from_value(
+        serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
+    )
+    .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
+
+    Ok([lat, lng])
+}
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@ -1,13 +1,24 @@
+use std::cmp::Ordering;
 use std::convert::TryFrom;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader, BufWriter};
+use std::mem::size_of;
+use std::str::from_utf8;

 use bytemuck::cast_slice;
+use grenad::Writer;
+use itertools::EitherOrBoth;
+use ordered_float::OrderedFloat;
 use serde_json::{from_slice, Value};

 use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::UserError;
-use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::index_documents::helpers::try_split_at;
+use crate::{DocumentId, FieldId, InternalError, Result, VectorOrArrayOfVectors};
+
+/// The length of the elements that are always in the buffer when inserting new values.
+const TRUNCATE_SIZE: usize = size_of::<DocumentId>();

 /// Extracts the embedding vector contained in each document under the `_vectors` field.
 ///
@ -16,9 +27,8 @@ use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
 pub fn extract_vector_points<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
-    primary_key_id: FieldId,
    vectors_fid: FieldId,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let mut writer = create_writer(
@ -27,43 +37,112 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
        tempfile::tempfile()?,
    );

+    let mut key_buffer = Vec::new();
    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
+    while let Some((key, value)) = cursor.move_on_next()? {
+        // this must always be serialized as (docid, external_docid);
+        let (docid_bytes, external_id_bytes) =
+            try_split_at(key, std::mem::size_of::<DocumentId>()).unwrap();
+        debug_assert!(from_utf8(external_id_bytes).is_ok());
+
        let obkv = obkv::KvReader::new(value);
+        key_buffer.clear();
+        key_buffer.extend_from_slice(docid_bytes);

        // since we only needs the primary key when we throw an error we create this getter to
        // lazily get it when needed
-        let document_id = || -> Value {
-            let document_id = obkv.get(primary_key_id).unwrap();
-            from_slice(document_id).unwrap()
-        };
+        let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };

        // first we retrieve the _vectors field
-        if let Some(vectors) = obkv.get(vectors_fid) {
-            // extract the vectors
-            let vectors = match from_slice(vectors) {
-                Ok(vectors) => VectorOrArrayOfVectors::into_array_of_vectors(vectors),
-                Err(_) => {
-                    return Err(UserError::InvalidVectorsType {
-                        document_id: document_id(),
-                        value: from_slice(vectors).map_err(InternalError::SerdeJson)?,
-                    }
-                    .into())
-                }
-            };
+        if let Some(value) = obkv.get(vectors_fid) {
+            let vectors_obkv = KvReaderDelAdd::new(value);

-            if let Some(vectors) = vectors {
-                for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
-                    let index = u16::try_from(i).unwrap();
-                    let mut key = docid_bytes.to_vec();
-                    key.extend_from_slice(&index.to_be_bytes());
-                    let bytes = cast_slice(&vector);
-                    writer.insert(key, bytes)?;
-                }
-            }
+            // then we extract the values
+            let del_vectors = vectors_obkv
+                .get(DelAdd::Deletion)
+                .map(|vectors| extract_vectors(vectors, document_id))
+                .transpose()?
+                .flatten();
+            let add_vectors = vectors_obkv
+                .get(DelAdd::Addition)
+                .map(|vectors| extract_vectors(vectors, document_id))
+                .transpose()?
+                .flatten();
+
+            // and we finally push the unique vectors into the writer
+            push_vectors_diff(
+                &mut writer,
+                &mut key_buffer,
+                del_vectors.unwrap_or_default(),
+                add_vectors.unwrap_or_default(),
+            )?;
        }
-        // else => the `_vectors` object was `null`, there is nothing to do
    }

    writer_into_reader(writer)
 }
+
+/// Computes the diff between both Del and Add numbers and
+/// only inserts the parts that differ in the sorter.
+fn push_vectors_diff(
+    writer: &mut Writer<BufWriter<File>>,
+    key_buffer: &mut Vec<u8>,
+    mut del_vectors: Vec<Vec<f32>>,
+    mut add_vectors: Vec<Vec<f32>>,
+) -> Result<()> {
+    // We sort and dedup the vectors
+    del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
+    add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
+    del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
+    add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
+
+    let merged_vectors_iter =
+        itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
+
+    // insert vectors into the writer
+    for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) {
+        // Generate the key by extending the unique index to it.
+        key_buffer.truncate(TRUNCATE_SIZE);
+        let index = u16::try_from(i).unwrap();
+        key_buffer.extend_from_slice(&index.to_be_bytes());
+
+        match eob {
+            EitherOrBoth::Both(_, _) => (), // no need to touch anything
+            EitherOrBoth::Left(vector) => {
+                // We insert only the Del part of the Obkv to inform
+                // that we only want to remove all those vectors.
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
+                let bytes = obkv.into_inner()?;
+                writer.insert(&key_buffer, bytes)?;
+            }
+            EitherOrBoth::Right(vector) => {
+                // We insert only the Add part of the Obkv to inform
+                // that we only want to remove all those vectors.
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
+                let bytes = obkv.into_inner()?;
+                writer.insert(&key_buffer, bytes)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Compares two vectors by using the OrderingFloat helper.
+fn compare_vectors(a: &[f32], b: &[f32]) -> Ordering {
+    a.iter().copied().map(OrderedFloat).cmp(b.iter().copied().map(OrderedFloat))
+}
+
+/// Extracts the vectors from a JSON value.
+fn extract_vectors(value: &[u8], document_id: impl Fn() -> Value) -> Result<Option<Vec<Vec<f32>>>> {
+    match from_slice(value) {
+        Ok(vectors) => Ok(VectorOrArrayOfVectors::into_array_of_vectors(vectors)),
+        Err(_) => Err(UserError::InvalidVectorsType {
+            document_id: document_id(),
+            value: from_slice(value).map_err(InternalError::SerdeJson)?,
+        }
+        .into()),
+    }
+}
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -1,6 +1,6 @@
 use std::collections::{BTreeSet, HashSet};
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use heed::BytesDecode;
 use obkv::KvReaderU16;
@ -28,7 +28,11 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
    exact_attributes: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> {
+) -> Result<(
+    grenad::Reader<BufReader<File>>,
+    grenad::Reader<BufReader<File>>,
+    grenad::Reader<BufReader<File>>,
+)> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
@ -53,17 +57,17 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
        let document_id = u32::from_be_bytes(document_id_bytes);
        let fid = u16::from_be_bytes(fid_bytes);

-        let del_add_reader = KvReaderDelAdd::new(&value);
+        let del_add_reader = KvReaderDelAdd::new(value);
        // extract all unique words to remove.
        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
-            for (_pos, word) in KvReaderU16::new(&deletion).iter() {
+            for (_pos, word) in KvReaderU16::new(deletion).iter() {
                del_words.insert(word.to_vec());
            }
        }

        // extract all unique additional words.
        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-            for (_pos, word) in KvReaderU16::new(&addition).iter() {
+            for (_pos, word) in KvReaderU16::new(addition).iter() {
                add_words.insert(word.to_vec());
            }
        }
@ -118,9 +122,9 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(

        // every words contained in an attribute set to exact must be pushed in the exact_words list.
        if exact_attributes.contains(&fid) {
-            exact_word_docids_sorter.insert(word.as_bytes(), &value)?;
+            exact_word_docids_sorter.insert(word.as_bytes(), value)?;
        } else {
-            word_docids_sorter.insert(word.as_bytes(), &value)?;
+            word_docids_sorter.insert(word.as_bytes(), value)?;
        }
    }

@ -165,7 +169,7 @@ fn words_into_sorter(
        };

        key_buffer.clear();
-        key_buffer.extend_from_slice(&word_bytes);
+        key_buffer.extend_from_slice(word_bytes);
        key_buffer.push(0);
        key_buffer.extend_from_slice(&fid.to_be_bytes());
        word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@ -1,5 +1,6 @@
 use std::collections::{BTreeMap, VecDeque};
 use std::fs::File;
+use std::io::BufReader;
 use std::{cmp, io};

 use obkv::KvReaderU16;
@ -22,13 +23,12 @@ use crate::{DocumentId, Result};
 pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

    let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
-        .into_iter()
        .map(|_| {
            create_sorter(
                grenad::SortAlgorithm::Unstable,
@ -74,7 +74,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
        let (del, add): (Result<_>, Result<_>) = rayon::join(
            || {
                // deletions
-                if let Some(deletion) = KvReaderDelAdd::new(&value).get(DelAdd::Deletion) {
+                if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
                    for (position, word) in KvReaderU16::new(deletion).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
                        while del_word_positions.get(0).map_or(false, |(_w, p)| {
@ -103,7 +103,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
            },
            || {
                // additions
-                if let Some(addition) = KvReaderDelAdd::new(&value).get(DelAdd::Addition) {
+                if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
                    for (position, word) in KvReaderU16::new(addition).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
                        while add_word_positions.get(0).map_or(false, |(_w, p)| {
@ -169,7 +169,7 @@ fn document_word_positions_into_sorter(
    document_id: DocumentId,
    del_word_pair_proximity: &BTreeMap<(String, String), u8>,
    add_word_pair_proximity: &BTreeMap<(String, String), u8>,
-    word_pair_proximity_docids_sorters: &mut Vec<grenad::Sorter<MergeFn>>,
+    word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeFn>],
 ) -> Result<()> {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};
@ -200,7 +200,7 @@ fn document_word_positions_into_sorter(
        };

        key_buffer.clear();
-        key_buffer.push(*prox as u8);
+        key_buffer.push(*prox);
        key_buffer.extend_from_slice(w1.as_bytes());
        key_buffer.push(0);
        key_buffer.extend_from_slice(w2.as_bytes());
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@ -1,6 +1,6 @@
 use std::collections::BTreeSet;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use obkv::KvReaderU16;

@ -22,7 +22,7 @@ use crate::{bucketed_position, DocumentId, Result};
 pub fn extract_word_position_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
@ -60,7 +60,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

        current_document_id = Some(document_id);

-        let del_add_reader = KvReaderDelAdd::new(&value);
+        let del_add_reader = KvReaderDelAdd::new(value);
        // extract all unique words to remove.
        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
            for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -11,6 +11,7 @@ mod extract_word_position_docids;

 use std::collections::HashSet;
 use std::fs::File;
+use std::io::BufReader;

 use crossbeam_channel::Sender;
 use log::debug;
@ -27,8 +28,8 @@ use self::extract_word_docids::extract_word_docids;
 use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
 use self::extract_word_position_docids::extract_word_position_docids;
 use super::helpers::{
-    as_cloneable_grenad, merge_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
-    MergeableReader,
+    as_cloneable_grenad, merge_deladd_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters,
+    MergeFn, MergeableReader,
 };
 use super::{helpers, TypedChunk};
 use crate::{FieldId, Result};
@ -37,8 +38,8 @@ use crate::{FieldId, Result};
 /// Send data in grenad file over provided Sender.
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn data_from_obkv_documents(
-    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
-    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
+    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
+    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: Option<HashSet<FieldId>>,
@ -62,7 +63,6 @@ pub(crate) fn data_from_obkv_documents(
                indexer,
                lmdb_writer_sx.clone(),
                vectors_field_id,
-                primary_key_id,
            )
        })
        .collect::<Result<()>>()?;
@ -107,7 +107,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-exists-docids");
-            match facet_exists_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
                }
@ -123,7 +123,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-is-null-docids");
-            match facet_is_null_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
                }
@ -139,7 +139,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-is-empty-docids");
-            match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
                }
@ -150,36 +150,40 @@ pub(crate) fn data_from_obkv_documents(
        });
    }

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
        extract_word_pair_proximity_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::WordPairProximityDocids,
        "word-pair-proximity-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
        extract_fid_word_count_docids,
-        merge_cbo_roaring_bitmaps,
-        TypedChunk::FieldIdWordcountDocids,
+        merge_deladd_cbo_roaring_bitmaps,
+        TypedChunk::FieldIdWordCountDocids,
        "field-id-wordcount-docids",
    );

    spawn_extraction_task::<
        _,
        _,
-        Vec<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)>,
+        Vec<(
+            grenad::Reader<BufReader<File>>,
+            grenad::Reader<BufReader<File>>,
+            grenad::Reader<BufReader<File>>,
+        )>,
    >(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
        move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        |(word_docids_reader, exact_word_docids_reader, word_fid_docids_reader)| {
            TypedChunk::WordDocids {
                word_docids_reader,
@ -190,32 +194,32 @@ pub(crate) fn data_from_obkv_documents(
        "word-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
        extract_word_position_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::WordPositionDocids,
        "word-position-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        fid_docid_facet_strings_chunks,
        indexer,
        lmdb_writer_sx.clone(),
        extract_facet_string_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::FieldIdFacetStringDocids,
        "field-id-facet-string-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        fid_docid_facet_numbers_chunks,
        indexer,
        lmdb_writer_sx,
        extract_facet_number_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::FieldIdFacetNumberDocids,
        "field-id-facet-number-docids",
    );
@ -265,11 +269,10 @@ fn spawn_extraction_task<FE, FS, M>(
 /// Extract chunked data and send it into lmdb_writer_sx sender:
 /// - documents
 fn send_original_documents_data(
-    original_documents_chunk: Result<grenad::Reader<File>>,
+    original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    vectors_field_id: Option<FieldId>,
-    primary_key_id: FieldId,
 ) -> Result<()> {
    let original_documents_chunk =
        original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
@ -278,12 +281,7 @@ fn send_original_documents_data(
        let documents_chunk_cloned = original_documents_chunk.clone();
        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
        rayon::spawn(move || {
-            let result = extract_vector_points(
-                documents_chunk_cloned,
-                indexer,
-                primary_key_id,
-                vectors_field_id,
-            );
+            let result = extract_vector_points(documents_chunk_cloned, indexer, vectors_field_id);
            let _ = match result {
                Ok(vector_points) => {
                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
@ -307,7 +305,7 @@ fn send_original_documents_data(
 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
 fn send_and_extract_flattened_documents_data(
-    flattened_documents_chunk: Result<grenad::Reader<File>>,
+    flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: &Option<HashSet<FieldId>>,
@ -324,7 +322,10 @@ fn send_and_extract_flattened_documents_data(
        grenad::Reader<CursorClonableMmap>,
        (
            grenad::Reader<CursorClonableMmap>,
-            (grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
+            (
+                grenad::Reader<BufReader<File>>,
+                (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
+            ),
        ),
    ),
 )> {
@ -347,7 +348,7 @@ fn send_and_extract_flattened_documents_data(
    let (docid_word_positions_chunk, fid_docid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
-                let (documents_ids, docid_word_positions_chunk, script_language_pair) =
+                let (docid_word_positions_chunk, script_language_pair) =
                    extract_docid_word_positions(
                        flattened_documents_chunk.clone(),
                        indexer,
@ -358,9 +359,6 @@ fn send_and_extract_flattened_documents_data(
                        max_positions_per_attributes,
                    )?;

-                // send documents_ids to DB writer
-                let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids)));
-
                // send docid_word_positions_chunk to DB writer
                let docid_word_positions_chunk =
                    unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::fs::File;
-use std::io::{self, Seek};
+use std::io::{self, BufReader, BufWriter, Seek};
 use std::time::Instant;

 use grenad::{CompressionType, Sorter};
@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
    typ: grenad::CompressionType,
    level: Option<u32>,
    file: R,
-) -> grenad::Writer<R> {
+) -> grenad::Writer<BufWriter<R>> {
    let mut builder = grenad::Writer::builder();
    builder.compression_type(typ);
    if let Some(level) = level {
        builder.compression_level(level);
    }
-    builder.build(file)
+    builder.build(BufWriter::new(file))
 }

 pub fn create_sorter(
@ -47,13 +47,14 @@ pub fn create_sorter(
        builder.allow_realloc(false);
    }
    builder.sort_algorithm(sort_algorithm);
+    builder.sort_in_parallel(true);
    builder.build()
 }

 pub fn sorter_into_reader(
    sorter: grenad::Sorter<MergeFn>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
    let mut writer = create_writer(
        indexer.chunk_compression_type,
@ -65,16 +66,18 @@ pub fn sorter_into_reader(
    writer_into_reader(writer)
 }

-pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
-    let mut file = writer.into_inner()?;
+pub fn writer_into_reader(
+    writer: grenad::Writer<BufWriter<File>>,
+) -> Result<grenad::Reader<BufReader<File>>> {
+    let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
    file.rewind()?;
-    grenad::Reader::new(file).map_err(Into::into)
+    grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
 }

 pub unsafe fn as_cloneable_grenad(
-    reader: &grenad::Reader<File>,
+    reader: &grenad::Reader<BufReader<File>>,
 ) -> Result<grenad::Reader<CursorClonableMmap>> {
-    let file = reader.get_ref();
+    let file = reader.get_ref().get_ref();
    let mmap = memmap2::Mmap::map(file)?;
    let cursor = io::Cursor::new(ClonableMmap::from(mmap));
    let reader = grenad::Reader::new(cursor)?;
@ -90,8 +93,8 @@ where
    fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
 }

-impl MergeableReader for Vec<grenad::Reader<File>> {
-    type Output = grenad::Reader<File>;
+impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
+    type Output = grenad::Reader<BufReader<File>>;

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut merger = MergerBuilder::new(merge_fn);
@ -100,8 +103,8 @@ impl MergeableReader for Vec<grenad::Reader<File>> {
    }
 }

-impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
-    type Output = (grenad::Reader<File>, grenad::Reader<File>);
+impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
+    type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut m1 = MergerBuilder::new(merge_fn);
@ -114,8 +117,18 @@ impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
    }
 }

-impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> {
-    type Output = (grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>);
+impl MergeableReader
+    for Vec<(
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+    )>
+{
+    type Output = (
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+    );

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut m1 = MergerBuilder::new(merge_fn);
@ -142,7 +155,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
        Ok(())
    }

-    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
+    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
        let merger = self.0.build();
        let mut writer = create_writer(
            params.chunk_compression_type,
@ -193,7 +206,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
    reader: grenad::Reader<R>,
    indexer: GrenadParameters,
    documents_chunk_size: usize,
-) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
+) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
    let mut continue_reading = true;
    let mut cursor = reader.into_cursor()?;

@ -210,11 +223,13 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
        );

        while let Some((document_id, obkv)) = cursor.move_on_next()? {
-            obkv_documents.insert(document_id, obkv)?;
-            current_chunk_size += document_id.len() as u64 + obkv.len() as u64;
+            if !obkv.is_empty() {
+                obkv_documents.insert(document_id, obkv)?;
+                current_chunk_size += document_id.len() as u64 + obkv.len() as u64;

-            if current_chunk_size >= documents_chunk_size as u64 {
-                return writer_into_reader(obkv_documents).map(Some);
+                if current_chunk_size >= documents_chunk_size as u64 {
+                    return writer_into_reader(obkv_documents).map(Some);
+                }
            }
        }

--- a/milli/src/update/index_documents/helpers/merge_functions.rs
+++ b/milli/src/update/index_documents/helpers/merge_functions.rs
@ -157,7 +157,7 @@ fn inner_merge_del_add_obkvs<'a>(
    let mut acc = newest[1..].to_vec();
    let mut buffer = Vec::new();
    // reverse iter from the most recent to the oldest.
-    for current in obkvs.into_iter().rev() {
+    for current in obkvs.iter().rev() {
        // if in the previous iteration there was a complete deletion,
        // stop the merge process.
        if acc_operation_type == Operation::Deletion as u8 {
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -35,8 +35,8 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 pub use crate::update::index_documents::helpers::CursorClonableMmap;
 use crate::update::{
-    self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
-    WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
+    IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, WordPrefixDocids,
+    WordPrefixIntegerDocids, WordsPrefixesFst,
 };
 use crate::{CboRoaringBitmapCodec, Index, Result};

@ -89,7 +89,6 @@ pub struct IndexDocumentsConfig {
    pub words_positions_level_group_size: Option<NonZeroU32>,
    pub words_positions_min_level_size: Option<NonZeroU32>,
    pub update_method: IndexDocumentsMethod,
-    pub deletion_strategy: DeletionStrategy,
    pub autogenerate_docids: bool,
 }

@ -181,6 +180,7 @@ where

        // Early return when there is no document to add
        if to_delete.is_empty() {
+            // Maintains Invariant: remove documents actually always returns Ok for the inner result
            return Ok((self, Ok(0)));
        }

@ -193,6 +193,7 @@ where

        self.deleted_documents += deleted_documents;

+        // Maintains Invariant: remove documents actually always returns Ok for the inner result
        Ok((self, Ok(deleted_documents)))
    }

@ -200,7 +201,7 @@ where
    pub fn execute(mut self) -> Result<DocumentAdditionResult> {
        puffin::profile_function!();

-        if self.added_documents == 0 {
+        if self.added_documents == 0 && self.deleted_documents == 0 {
            let number_of_documents = self.index.number_of_documents(self.wtxn)?;
            return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
        }
@ -244,9 +245,6 @@ where
            primary_key,
            fields_ids_map,
            field_distribution,
-            new_external_documents_ids,
-            new_documents_ids,
-            replaced_documents_ids,
            documents_count,
            original_documents,
            flattened_documents,
@ -370,29 +368,12 @@ where
                let _ = lmdb_writer_sx.send(Err(e));
            }

-            // needs to be droped to avoid channel waiting lock.
+            // needs to be dropped to avoid channel waiting lock.
            drop(lmdb_writer_sx)
        });

-        // We delete the documents that this document addition replaces. This way we are
-        // able to simply insert all the documents even if they already exist in the database.
-        if !replaced_documents_ids.is_empty() {
-            let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
-            deletion_builder.strategy(self.config.deletion_strategy);
-            debug!("documents to delete {:?}", replaced_documents_ids);
-            deletion_builder.delete_documents(&replaced_documents_ids);
-            let deleted_documents_result = deletion_builder.execute_inner()?;
-            debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
-        }
-
-        let index_documents_ids = self.index.documents_ids(self.wtxn)?;
-        let index_is_empty = index_documents_ids.is_empty();
+        let index_is_empty = self.index.number_of_documents(self.wtxn)? == 0;
        let mut final_documents_ids = RoaringBitmap::new();
-        let mut word_pair_proximity_docids = None;
-        let mut word_position_docids = None;
-        let mut word_fid_docids = None;
-        let mut word_docids = None;
-        let mut exact_word_docids = None;

        let mut databases_seen = 0;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
@ -405,38 +386,9 @@ where
                return Err(Error::InternalError(InternalError::AbortedIndexation));
            }

-            let typed_chunk = match result? {
-                TypedChunk::WordDocids {
-                    word_docids_reader,
-                    exact_word_docids_reader,
-                    word_fid_docids_reader,
-                } => {
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
-                    word_docids = Some(cloneable_chunk);
-                    let cloneable_chunk =
-                        unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
-                    exact_word_docids = Some(cloneable_chunk);
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
-                    word_fid_docids = Some(cloneable_chunk);
-                    TypedChunk::WordDocids {
-                        word_docids_reader,
-                        exact_word_docids_reader,
-                        word_fid_docids_reader,
-                    }
-                }
-                TypedChunk::WordPairProximityDocids(chunk) => {
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
-                    word_pair_proximity_docids = Some(cloneable_chunk);
-                    TypedChunk::WordPairProximityDocids(chunk)
-                }
-                TypedChunk::WordPositionDocids(chunk) => {
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
-                    word_position_docids = Some(cloneable_chunk);
-                    TypedChunk::WordPositionDocids(chunk)
-                }
-                otherwise => otherwise,
-            };
+            let typed_chunk = result?;

+            // FIXME: return newly added as well as newly deleted documents
            let (docids, is_merged_database) =
                write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn, index_is_empty)?;
            if !docids.is_empty() {
@ -466,15 +418,6 @@ where
        // We write the primary key field id into the main database
        self.index.put_primary_key(self.wtxn, &primary_key)?;

-        // We write the external documents ids into the main database.
-        let mut external_documents_ids = self.index.external_documents_ids(self.wtxn)?;
-        external_documents_ids.insert_ids(&new_external_documents_ids)?;
-        let external_documents_ids = external_documents_ids.into_static();
-        self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
-
-        let all_documents_ids = index_documents_ids | new_documents_ids;
-        self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
-
        // TODO: reactivate prefix DB with diff-indexing
        // self.execute_prefix_databases(
        //     word_docids,
@ -484,7 +427,7 @@ where
        //     word_fid_docids,
        // )?;

-        Ok(all_documents_ids.len())
+        self.index.number_of_documents(self.wtxn)
    }

    #[logging_timer::time("IndexDocuments::{}")]
@ -718,14 +661,15 @@ fn execute_word_prefix_docids(
 #[cfg(test)]
 mod tests {
    use big_s::S;
+    use fst::IntoStreamer;
+    use heed::RwTxn;
    use maplit::hashset;

    use super::*;
    use crate::documents::documents_batch_reader_from_objects;
    use crate::index::tests::TempIndex;
    use crate::search::TermsMatchingStrategy;
-    use crate::update::DeleteDocuments;
-    use crate::{db_snap, BEU16};
+    use crate::{db_snap, Filter, Search, BEU16};

    #[test]
    fn simple_document_replacement() {
@ -816,11 +760,10 @@ mod tests {
        assert_eq!(count, 1);

        // Check that we get only one document from the database.
-        // Since the document has been deleted and re-inserted, its internal docid has been incremented to 1
-        let docs = index.documents(&rtxn, Some(1)).unwrap();
+        let docs = index.documents(&rtxn, Some(0)).unwrap();
        assert_eq!(docs.len(), 1);
        let (id, doc) = docs[0];
-        assert_eq!(id, 1);
+        assert_eq!(id, 0);

        // Check that this document is equal to the last one sent.
        let mut doc_iter = doc.iter();
@ -881,7 +824,7 @@ mod tests {
        assert_eq!(count, 3);

        // the document 0 has been deleted and reinserted with the id 3
-        let docs = index.documents(&rtxn, vec![1, 2, 3]).unwrap();
+        let docs = index.documents(&rtxn, vec![1, 2, 0]).unwrap();
        let kevin_position =
            docs.iter().position(|(_, d)| d.get(0).unwrap() == br#""updated kevin""#).unwrap();
        assert_eq!(kevin_position, 2);
@ -1027,7 +970,6 @@ mod tests {
        assert_eq!(count, 6);

        db_snap!(index, word_docids, "updated");
-        db_snap!(index, soft_deleted_documents_ids, "updated", @"[0, 1, 4, ]");

        drop(rtxn);
    }
@ -1130,17 +1072,15 @@ mod tests {
                { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
            ]))
            .unwrap();
-        let mut wtxn = index.write_txn().unwrap();
-        assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));

        // Delete not all of the documents but some of them.
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.delete_external_id("30");
-        builder.execute().unwrap();
+        index.delete_document("30");

-        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
-        assert!(external_documents_ids.get("30").is_none());
-        wtxn.commit().unwrap();
+        let txn = index.read_txn().unwrap();
+        assert_eq!(index.primary_key(&txn).unwrap(), Some("objectId"));
+
+        let external_documents_ids = index.external_documents_ids();
+        assert!(external_documents_ids.get(&txn, "30").unwrap().is_none());

        index
            .add_documents(documents!([
@ -1149,8 +1089,8 @@ mod tests {
            .unwrap();

        let wtxn = index.write_txn().unwrap();
-        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
-        assert!(external_documents_ids.get("30").is_some());
+        let external_documents_ids = index.external_documents_ids();
+        assert!(external_documents_ids.get(&wtxn, "30").unwrap().is_some());
        wtxn.commit().unwrap();

        index
@ -1444,8 +1384,10 @@ mod tests {
        index.add_documents(documents!({ "a" : { "b" : { "c" :  1 }}})).unwrap();

        let rtxn = index.read_txn().unwrap();
-        let external_documents_ids = index.external_documents_ids(&rtxn).unwrap();
-        assert!(external_documents_ids.get("1").is_some());
+        let all_documents_count = index.all_documents(&rtxn).unwrap().count();
+        assert_eq!(all_documents_count, 1);
+        let external_documents_ids = index.external_documents_ids();
+        assert!(external_documents_ids.get(&rtxn, "1").unwrap().is_some());
    }

    #[test]
@ -1499,12 +1441,6 @@ mod tests {
        3   2    second       second
        3   3    third        third
        "###);
-        db_snap!(index, string_faceted_documents_ids, @r###"
-        0   []
-        1   []
-        2   []
-        3   [0, 1, 2, 3, ]
-        "###);

        let rtxn = index.read_txn().unwrap();

@ -1528,12 +1464,6 @@ mod tests {

        db_snap!(index, facet_id_string_docids, @"");
        db_snap!(index, field_id_docid_facet_strings, @"");
-        db_snap!(index, string_faceted_documents_ids, @r###"
-        0   []
-        1   []
-        2   []
-        3   [0, 1, 2, 3, ]
-        "###);

        let rtxn = index.read_txn().unwrap();

@ -1560,12 +1490,6 @@ mod tests {
        3   2    second       second
        3   3    third        third
        "###);
-        db_snap!(index, string_faceted_documents_ids, @r###"
-        0   []
-        1   []
-        2   []
-        3   [0, 1, 2, 3, ]
-        "###);

        let rtxn = index.read_txn().unwrap();

@ -1728,7 +1652,7 @@ mod tests {

        let wtxn = index.read_txn().unwrap();

-        let map = index.external_documents_ids(&wtxn).unwrap().to_hash_map();
+        let map = index.external_documents_ids().to_hash_map(&wtxn).unwrap();
        let ids = map.values().collect::<HashSet<_>>();

        assert_eq!(ids.len(), map.len());
@ -2540,17 +2464,8 @@ mod tests {
        db_snap!(index, word_fid_docids, 2, @"a48d3f88db33f94bc23110a673ea49e4");
        db_snap!(index, word_position_docids, 2, @"3c9e66c6768ae2cf42b46b2c46e46a83");

-        let mut wtxn = index.write_txn().unwrap();
-
        // Delete not all of the documents but some of them.
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_external_id("0");
-        builder.delete_external_id("3");
-        let result = builder.execute().unwrap();
-        println!("{result:?}");
-
-        wtxn.commit().unwrap();
+        index.delete_documents(vec!["0".into(), "3".into()]);

        db_snap!(index, word_fid_docids, 3, @"4c2e2a1832e5802796edc1638136d933");
        db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
@ -2605,8 +2520,7 @@ mod tests {
            ),
        ]
        */
-        let mut index = TempIndex::new();
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
+        let index = TempIndex::new();

        // START OF BATCH

@ -2646,8 +2560,7 @@ mod tests {
        {"id":1,"doggo":"bernese"}
        "###);
        db_snap!(index, external_documents_ids, @r###"
-        soft:
-        hard:
+        docids:
        1                        0
        "###);

@ -2692,13 +2605,10 @@ mod tests {
        "###);

        db_snap!(index, external_documents_ids, @r###"
-        soft:
-        hard:
+        docids:
        0                        1
        "###);

-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-
        // BATCH 3

        println!("--- ENTERING BATCH 3");
@ -2740,4 +2650,537 @@ mod tests {
        let res = index.search(&rtxn).execute().unwrap();
        index.documents(&rtxn, res.documents_ids).unwrap();
    }
+
+    fn delete_documents<'t>(
+        wtxn: &mut RwTxn<'t, '_>,
+        index: &'t TempIndex,
+        external_ids: &[&str],
+    ) -> Vec<u32> {
+        let external_document_ids = index.external_documents_ids();
+        let ids_to_delete: Vec<u32> = external_ids
+            .iter()
+            .map(|id| external_document_ids.get(wtxn, id).unwrap().unwrap())
+            .collect();
+
+        // Delete some documents.
+        index.delete_documents_using_wtxn(
+            wtxn,
+            external_ids.iter().map(ToString::to_string).collect(),
+        );
+
+        ids_to_delete
+    }
+
+    #[test]
+    fn delete_documents_with_numbers_as_primary_key() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } },
+                    { "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
+                    { "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
+                ]),
+            )
+            .unwrap();
+
+        // delete those documents, ids are synchronous therefore 0, 1, and 2.
+        index.delete_documents_using_wtxn(&mut wtxn, vec![S("0"), S("1"), S("2")]);
+
+        wtxn.commit().unwrap();
+
+        // All these snapshots should be empty since the database was cleared
+        db_snap!(index, documents_ids);
+        db_snap!(index, word_docids);
+        db_snap!(index, word_pair_proximity_docids);
+        db_snap!(index, facet_id_exists_docids);
+
+        let rtxn = index.read_txn().unwrap();
+
+        assert!(index.field_distribution(&rtxn).unwrap().is_empty());
+    }
+
+    #[test]
+    fn delete_documents_with_strange_primary_key() {
+        let index = TempIndex::new();
+
+        index
+            .update_settings(|settings| settings.set_searchable_fields(vec!["name".to_string()]))
+            .unwrap();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "mysuperid": 0, "name": "kevin" },
+                    { "mysuperid": 1, "name": "kevina" },
+                    { "mysuperid": 2, "name": "benoit" }
+                ]),
+            )
+            .unwrap();
+        wtxn.commit().unwrap();
+
+        let mut wtxn = index.write_txn().unwrap();
+
+        // Delete not all of the documents but some of them.
+        index.delete_documents_using_wtxn(&mut wtxn, vec![S("0"), S("1")]);
+
+        wtxn.commit().unwrap();
+
+        db_snap!(index, documents_ids);
+        db_snap!(index, word_docids);
+        db_snap!(index, word_pair_proximity_docids);
+    }
+
+    #[test]
+    fn filtered_placeholder_search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+                settings.set_filterable_fields(hashset! { S("label"), S("label2") });
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        delete_documents(&mut wtxn, &index, &["1_4", "1_70", "1_72"]);
+
+        // Placeholder search with filter
+        let filter = Filter::from_str("label = sign").unwrap().unwrap();
+        let results = index.search(&wtxn).filter(filter).execute().unwrap();
+        assert!(results.documents_ids.is_empty());
+
+        wtxn.commit().unwrap();
+
+        db_snap!(index, word_docids);
+        db_snap!(index, facet_id_f64_docids);
+        db_snap!(index, word_pair_proximity_docids);
+        db_snap!(index, facet_id_exists_docids);
+        db_snap!(index, facet_id_string_docids);
+    }
+
+    #[test]
+    fn placeholder_search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1_4"]);
+
+        // Placeholder search
+        let results = index.search(&wtxn).execute().unwrap();
+        assert!(!results.documents_ids.is_empty());
+        for id in results.documents_ids.iter() {
+            assert!(
+                !deleted_internal_ids.contains(id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        wtxn.commit().unwrap();
+    }
+
+    #[test]
+    fn search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1_7", "1_52"]);
+
+        // search for abstract
+        let results = index.search(&wtxn).query("abstract").execute().unwrap();
+        assert!(!results.documents_ids.is_empty());
+        for id in results.documents_ids.iter() {
+            assert!(
+                !deleted_internal_ids.contains(id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        wtxn.commit().unwrap();
+    }
+
+    #[test]
+    fn geo_filtered_placeholder_search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("id"));
+                settings.set_filterable_fields(hashset!(S("_geo")));
+                settings.set_sortable_fields(hashset!(S("_geo")));
+            })
+            .unwrap();
+
+        index.add_documents_using_wtxn(&mut wtxn, documents!([
+            { "id": "1",  "city": "Lille",             "_geo": { "lat": 50.6299, "lng": 3.0569 } },
+            { "id": "2",  "city": "Mons-en-Barœul",    "_geo": { "lat": 50.6415, "lng": 3.1106 } },
+            { "id": "3",  "city": "Hellemmes",         "_geo": { "lat": 50.6312, "lng": 3.1106 } },
+            { "id": "4",  "city": "Villeneuve-d'Ascq", "_geo": { "lat": 50.6224, "lng": 3.1476 } },
+            { "id": "5",  "city": "Hem",               "_geo": { "lat": 50.6552, "lng": 3.1897 } },
+            { "id": "6",  "city": "Roubaix",           "_geo": { "lat": 50.6924, "lng": 3.1763 } },
+            { "id": "7",  "city": "Tourcoing",         "_geo": { "lat": 50.7263, "lng": 3.1541 } },
+            { "id": "8",  "city": "Mouscron",          "_geo": { "lat": 50.7453, "lng": 3.2206 } },
+            { "id": "9",  "city": "Tournai",           "_geo": { "lat": 50.6053, "lng": 3.3758 } },
+            { "id": "10", "city": "Ghent",             "_geo": { "lat": 51.0537, "lng": 3.6957 } },
+            { "id": "11", "city": "Brussels",          "_geo": { "lat": 50.8466, "lng": 4.3370 } },
+            { "id": "12", "city": "Charleroi",         "_geo": { "lat": 50.4095, "lng": 4.4347 } },
+            { "id": "13", "city": "Mons",              "_geo": { "lat": 50.4502, "lng": 3.9623 } },
+            { "id": "14", "city": "Valenciennes",      "_geo": { "lat": 50.3518, "lng": 3.5326 } },
+            { "id": "15", "city": "Arras",             "_geo": { "lat": 50.2844, "lng": 2.7637 } },
+            { "id": "16", "city": "Cambrai",           "_geo": { "lat": 50.1793, "lng": 3.2189 } },
+            { "id": "17", "city": "Bapaume",           "_geo": { "lat": 50.1112, "lng": 2.8547 } },
+            { "id": "18", "city": "Amiens",            "_geo": { "lat": 49.9314, "lng": 2.2710 } },
+            { "id": "19", "city": "Compiègne",         "_geo": { "lat": 49.4449, "lng": 2.7913 } },
+            { "id": "20", "city": "Paris",             "_geo": { "lat": 48.9021, "lng": 2.3708 } }
+        ])).unwrap();
+
+        let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &external_ids_to_delete);
+
+        // Placeholder search with geo filter
+        let filter = Filter::from_str("_geoRadius(50.6924, 3.1763, 20000)").unwrap().unwrap();
+        let results = index.search(&wtxn).filter(filter).execute().unwrap();
+        assert!(!results.documents_ids.is_empty());
+        for id in results.documents_ids.iter() {
+            assert!(
+                !deleted_internal_ids.contains(id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        wtxn.commit().unwrap();
+
+        db_snap!(index, facet_id_f64_docids);
+        db_snap!(index, facet_id_string_docids);
+    }
+
+    #[test]
+    fn get_documents_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        let deleted_external_ids = ["1_7", "1_52"];
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &deleted_external_ids);
+
+        // list all documents
+        let results = index.all_documents(&wtxn).unwrap();
+        for result in results {
+            let (id, _) = result.unwrap();
+            assert!(
+                !deleted_internal_ids.contains(&id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        // list internal document ids
+        let results = index.documents_ids(&wtxn).unwrap();
+        for id in results {
+            assert!(
+                !deleted_internal_ids.contains(&id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+        wtxn.commit().unwrap();
+
+        let rtxn = index.read_txn().unwrap();
+
+        // get internal docids from deleted external document ids
+        let results = index.external_documents_ids();
+        for id in deleted_external_ids {
+            assert!(
+                results.get(&rtxn, id).unwrap().is_none(),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+        drop(rtxn);
+    }
+
+    #[test]
+    fn stats_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index.add_documents_using_wtxn(&mut wtxn, documents!([
+            { "docid": "1_4",  "label": ["sign"]},
+            { "docid": "1_5",  "label": ["letter"]},
+            { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"], "title": "Mickey Mouse"},
+            { "docid": "1_36", "label": ["drawing","painting","pattern"]},
+            { "docid": "1_37", "label": ["art","drawing","outdoor"]},
+            { "docid": "1_38", "label": ["aquarium","art","drawing"], "title": "Nemo"},
+            { "docid": "1_39", "label": ["abstract"]},
+            { "docid": "1_40", "label": ["cartoon"]},
+            { "docid": "1_41", "label": ["art","drawing"]},
+            { "docid": "1_42", "label": ["art","pattern"]},
+            { "docid": "1_43", "label": ["abstract","art","drawing","pattern"], "number": 32i32},
+            { "docid": "1_44", "label": ["drawing"], "number": 44i32},
+            { "docid": "1_45", "label": ["art"]},
+            { "docid": "1_46", "label": ["abstract","colorfulness","pattern"]},
+            { "docid": "1_47", "label": ["abstract","pattern"]},
+            { "docid": "1_52", "label": ["abstract","cartoon"]},
+            { "docid": "1_57", "label": ["abstract","drawing","pattern"]},
+            { "docid": "1_58", "label": ["abstract","art","cartoon"]},
+            { "docid": "1_68", "label": ["design"]},
+            { "docid": "1_69", "label": ["geometry"]}
+        ])).unwrap();
+
+        delete_documents(&mut wtxn, &index, &["1_7", "1_52"]);
+
+        // count internal documents
+        let results = index.number_of_documents(&wtxn).unwrap();
+        assert_eq!(18, results);
+
+        // count field distribution
+        let results = index.field_distribution(&wtxn).unwrap();
+        assert_eq!(Some(&18), results.get("label"));
+        assert_eq!(Some(&1), results.get("title"));
+        assert_eq!(Some(&2), results.get("number"));
+
+        wtxn.commit().unwrap();
+    }
+
+    #[test]
+    fn stored_detected_script_and_language_should_not_return_deleted_documents() {
+        use charabia::{Language, Script};
+        let index = TempIndex::new();
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                { "id": "0", "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
+                { "id": "1", "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
+                { "id": "2", "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
+                { "id": "3", "title": "関西国際空港限定トートバッグ すもももももももものうち" },
+                { "id": "4", "title": "ภาษาไทยง่ายนิดเดียว" },
+                { "id": "5", "title": "The quick 在尊嚴和權利上一律平等。" },
+            ]))
+            .unwrap();
+
+        let key_cmn = (Script::Cj, Language::Cmn);
+        let cj_cmn_docs =
+            index.script_language_documents_ids(&wtxn, &key_cmn).unwrap().unwrap_or_default();
+        let mut expected_cj_cmn_docids = RoaringBitmap::new();
+        expected_cj_cmn_docids.push(1);
+        expected_cj_cmn_docids.push(5);
+        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
+
+        delete_documents(&mut wtxn, &index, &["1"]);
+        wtxn.commit().unwrap();
+
+        let rtxn = index.read_txn().unwrap();
+        let cj_cmn_docs =
+            index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap_or_default();
+        let mut expected_cj_cmn_docids = RoaringBitmap::new();
+        expected_cj_cmn_docids.push(5);
+        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
+    }
+
+    #[test]
+    fn delete_words_exact_attributes() {
+        let index = TempIndex::new();
+
+        index
+            .update_settings(|settings| {
+                settings.set_primary_key(S("id"));
+                settings.set_searchable_fields(vec![S("text"), S("exact")]);
+                settings.set_exact_attributes(vec![S("exact")].into_iter().collect());
+            })
+            .unwrap();
+
+        index
+            .add_documents(documents!([
+                { "id": 0, "text": "hello" },
+                { "id": 1, "exact": "hello"}
+            ]))
+            .unwrap();
+        db_snap!(index, word_docids, 1, @r###"
+        hello            [0, ]
+        "###);
+        db_snap!(index, exact_word_docids, 1, @r###"
+        hello            [1, ]
+        "###);
+        db_snap!(index, words_fst, 1, @"300000000000000001084cfcfc2ce1000000016000000090ea47f");
+
+        let mut wtxn = index.write_txn().unwrap();
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1"]);
+        wtxn.commit().unwrap();
+
+        db_snap!(index, word_docids, 2, @r###"
+        hello            [0, ]
+        "###);
+        db_snap!(index, exact_word_docids, 2, @"");
+        db_snap!(index, words_fst, 2, @"300000000000000001084cfcfc2ce1000000016000000090ea47f");
+
+        insta::assert_snapshot!(format!("{deleted_internal_ids:?}"), @"[1]");
+        let txn = index.read_txn().unwrap();
+        let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap();
+        insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###);
+
+        let mut s = Search::new(&txn, &index);
+        s.query("hello");
+        let crate::SearchResult { documents_ids, .. } = s.execute().unwrap();
+        insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+    }
 }
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+[]
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+[2, ]
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+benoit           [2, ]
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_exists_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_exists_docids.snap
@ -1,5 +1,5 @@
 ---
-source: milli/src/update/delete_documents.rs
+source: milli/src/update/index_documents/mod.rs
 ---
 1   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ]
 2   [21, ]
--- a/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+2   0  2.2    1  [21, ]
+
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_string_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_string_docids.snap
@ -1,5 +1,5 @@
 ---
-source: milli/src/update/delete_documents.rs
+source: milli/src/update/index_documents/mod.rs
 ---
 1   0  abstract     1  [2, 6, 10, 13, 14, 15, 16, 17, ]
 1   0  aquarium     1  [5, ]
--- a/Show More
+++ b/Show More