Merge #4174

4174: Fix warnings r=dureuill a=irevoire Fix all the warnings found in the CI: https://github.com/meilisearch/meilisearch/actions/runs/6622576021/job/17988323623 Co-authored-by: Tamo <tamo@meilisearch.com>
fix warnings
2025-07-21 13:51:05 +00:00 · 2023-10-30 10:12:54 +00:00 · 2023-10-30 11:00:46 +01:00 · 2023-10-26 17:21:30 +00:00 · 2023-10-26 17:01:10 +02:00 · 2023-10-23 12:00:50 +00:00
98 changed files with 2696 additions and 2315 deletions
--- a/.github/ISSUE_TEMPLATE/sprint_issue.md
+++ b/.github/ISSUE_TEMPLATE/sprint_issue.md
@ -7,19 +7,17 @@ assignees: ''

 ---

-Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
+Related product team resources: [PRD]() (_internal only_)
 Related product discussion:
 Related spec: WIP

 ## Motivation

-<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
+<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->

 ## Usage

-<!---Write a quick description of the usage if the usage has already been defined-->
-
-Refer to the final spec to know the details and the final decisions about the usage.
+<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->

 ## TODO

--- a/.github/workflows/trigger-benchmarks-on-message.yml
+++ b/.github/workflows/trigger-benchmarks-on-message.yml
@ -8,11 +8,11 @@ env:

 jobs:
  run-benchmarks-on-comment:
+    if: startsWith(github.event.comment.body, '/benchmark')
    name: Run and upload benchmarks
    runs-on: benchmarks
    timeout-minutes: 4320 # 72h
    steps:
-      - uses: actions/checkout@v3
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
@ -27,14 +27,25 @@ jobs:
          reaction-type: "eyes"
          repo-token: ${{ env.GH_TOKEN }}

+      - uses: xt0rted/pull-request-comment-branch@v2
+        id: comment-branch
+        with:
+          repo_token: ${{ env.GH_TOKEN }}
+
+      - uses: actions/checkout@v3
+        if: success()
+        with:
+          fetch-depth: 0 # fetch full history to be able to get main commit sha
+          ref: ${{ steps.comment-branch.outputs.head_ref }}
+
      # Set variables
      - name: Set current branch name
        shell: bash
-        run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
        id: current_branch
      - name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
        shell: bash
-        run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
        id: normalized_current_branch
      - name: Set shorter commit SHA
        shell: bash
@ -76,9 +87,11 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
        run: |
-          export base=$(git log --pretty=%p -n 1)
+          set -x
+          export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
+          export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
          echo 'Here are your benchmarks diff 👊' >> body.txt
          echo '```' >> body.txt
-          ./benchmarks/scripts/compare.sh $base ${{ steps.file.outputs.basename }}.json >> body.txt
+          ./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
          echo '```' >> body.txt
-          gh pr comment ${GITHUB_REF#refs/heads/} --body-file body.txt
+          gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -18,7 +18,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.0"
+version = "1.5.0"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/PROFILING.md
+++ b/PROFILING.md
@ -1,14 +1,14 @@
 # Profiling Meilisearch

-Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
+Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).

 ![An example profiling with Puffin viewer](assets/profiling-example.png)

 ## Profiling the Indexing Process

-When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
+When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.

-Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
+[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.

 Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.

--- a/README.md
+++ b/README.md
@ -25,6 +25,12 @@

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

+---
+
+### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
+
+---
+
 Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.

 <p align="center" name="demo">
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -19,6 +19,7 @@ one indexing operation.

 use std::collections::{BTreeSet, HashSet};
 use std::ffi::OsStr;
+use std::fmt;
 use std::fs::{self, File};
 use std::io::BufWriter;

@ -199,6 +200,29 @@ impl Batch {
    }
 }

+impl fmt::Display for Batch {
+    /// A text used when we debug the profiling reports.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let index_uid = self.index_uid();
+        let tasks = self.ids();
+        match self {
+            Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
+            Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
+            Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
+            Batch::Dump(_) => f.write_str("Dump")?,
+            Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
+            Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
+            Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
+            Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
+            Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
+        };
+        match index_uid {
+            Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
+            None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
+        }
+    }
+}
+
 impl IndexOperation {
    pub fn index_uid(&self) -> &str {
        match self {
@ -213,6 +237,30 @@ impl IndexOperation {
    }
 }

+impl fmt::Display for IndexOperation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            IndexOperation::DocumentOperation { .. } => {
+                f.write_str("IndexOperation::DocumentOperation")
+            }
+            IndexOperation::DocumentDeletion { .. } => {
+                f.write_str("IndexOperation::DocumentDeletion")
+            }
+            IndexOperation::IndexDocumentDeletionByFilter { .. } => {
+                f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
+            }
+            IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
+            IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
+            IndexOperation::DocumentClearAndSetting { .. } => {
+                f.write_str("IndexOperation::DocumentClearAndSetting")
+            }
+            IndexOperation::SettingsAndDocumentOperation { .. } => {
+                f.write_str("IndexOperation::SettingsAndDocumentOperation")
+            }
+        }
+    }
+}
+
 impl IndexScheduler {
    /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
    ///
@ -581,7 +629,7 @@ impl IndexScheduler {
            self.breakpoint(crate::Breakpoint::InsideProcessBatch);
        }

-        puffin::profile_function!(format!("{:?}", batch));
+        puffin::profile_function!(batch.to_string());

        match batch {
            Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
@ -848,7 +896,7 @@ impl IndexScheduler {
                })?;

                // 4. Dump experimental feature settings
-                let features = self.features()?.runtime_features();
+                let features = self.features().runtime_features();
                dump.create_experimental_features(features)?;

                let dump_uid = started_at.format(format_description!(
--- a/index-scheduler/src/features.rs
+++ b/index-scheduler/src/features.rs
@ -1,6 +1,8 @@
+use std::sync::{Arc, RwLock};
+
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::types::{SerdeJson, Str};
-use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
+use meilisearch_types::heed::{Database, Env, RwTxn};

 use crate::error::FeatureNotEnabledError;
 use crate::Result;
@ -9,20 +11,19 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";

 #[derive(Clone)]
 pub(crate) struct FeatureData {
-    runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
-    instance: InstanceTogglableFeatures,
+    persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
+    runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
 }

 #[derive(Debug, Clone, Copy)]
 pub struct RoFeatures {
    runtime: RuntimeTogglableFeatures,
-    instance: InstanceTogglableFeatures,
 }

 impl RoFeatures {
-    fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
-        let runtime = data.runtime_features(txn)?;
-        Ok(Self { runtime, instance: data.instance })
+    fn new(data: &FeatureData) -> Self {
+        let runtime = data.runtime_features();
+        Self { runtime }
    }

    pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
@ -43,13 +44,13 @@ impl RoFeatures {
    }

    pub fn check_metrics(&self) -> Result<()> {
-        if self.instance.metrics {
+        if self.runtime.metrics {
            Ok(())
        } else {
            Err(FeatureNotEnabledError {
                disabled_action: "Getting metrics",
                feature: "metrics",
-                issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
+                issue_link: "https://github.com/meilisearch/product/discussions/625",
            }
            .into())
        }
@ -67,15 +68,36 @@ impl RoFeatures {
            .into())
        }
    }
+
+    pub fn check_puffin(&self) -> Result<()> {
+        if self.runtime.export_puffin_reports {
+            Ok(())
+        } else {
+            Err(FeatureNotEnabledError {
+                disabled_action: "Outputting Puffin reports to disk",
+                feature: "export puffin reports",
+                issue_link: "https://github.com/meilisearch/product/discussions/693",
+            }
+            .into())
+        }
+    }
 }

 impl FeatureData {
    pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
        let mut wtxn = env.write_txn()?;
-        let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
+        let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
        wtxn.commit()?;

-        Ok(Self { runtime: runtime_features, instance: instance_features })
+        let txn = env.read_txn()?;
+        let persisted_features: RuntimeTogglableFeatures =
+            runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
+        let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
+            metrics: instance_features.metrics || persisted_features.metrics,
+            ..persisted_features
+        }));
+
+        Ok(Self { persisted: runtime_features_db, runtime })
    }

    pub fn put_runtime_features(
@ -83,16 +105,25 @@ impl FeatureData {
        mut wtxn: RwTxn,
        features: RuntimeTogglableFeatures,
    ) -> Result<()> {
-        self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
+        self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
        wtxn.commit()?;
+
+        // safe to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for an assignment here.
+        let mut toggled_features = self.runtime.write().unwrap();
+        *toggled_features = features;
        Ok(())
    }

-    fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
-        Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
+    fn runtime_features(&self) -> RuntimeTogglableFeatures {
+        // sound to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for copying the data here
+        *self.runtime.read().unwrap()
    }

-    pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
-        RoFeatures::new(txn, self)
+    pub fn features(&self) -> RoFeatures {
+        RoFeatures::new(self)
    }
 }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -30,6 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        index_mapper,
        features: _,
        max_number_of_tasks: _,
+        puffin_frame: _,
        wake_up: _,
        dumps_path: _,
        snapshots_path: _,
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -33,6 +33,7 @@ pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

 use std::collections::{BTreeMap, HashMap};
+use std::fs::File;
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@ -52,6 +53,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
+use puffin::FrameView;
 use roaring::RoaringBitmap;
 use synchronoise::SignalEvent;
 use time::format_description::well_known::Rfc3339;
@ -314,6 +316,9 @@ pub struct IndexScheduler {
    /// the finished tasks automatically.
    pub(crate) max_number_of_tasks: usize,

+    /// A frame to output the indexation profiling files to disk.
+    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
+
    /// The path used to create the dumps.
    pub(crate) dumps_path: PathBuf,

@ -364,6 +369,7 @@ impl IndexScheduler {
            wake_up: self.wake_up.clone(),
            autobatching_enabled: self.autobatching_enabled,
            max_number_of_tasks: self.max_number_of_tasks,
+            puffin_frame: self.puffin_frame.clone(),
            snapshots_path: self.snapshots_path.clone(),
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
@ -457,6 +463,7 @@ impl IndexScheduler {
            env,
            // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
            wake_up: Arc::new(SignalEvent::auto(true)),
+            puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
            autobatching_enabled: options.autobatching_enabled,
            max_number_of_tasks: options.max_number_of_tasks,
            dumps_path: options.dumps_path,
@ -572,17 +579,46 @@ impl IndexScheduler {
                run.wake_up.wait();

                loop {
+                    let puffin_enabled = run.features().check_puffin().is_ok();
+                    puffin::set_scopes_on(puffin_enabled);
+                    puffin::GlobalProfiler::lock().new_frame();
+
                    match run.tick() {
                        Ok(TickOutcome::TickAgain(_)) => (),
                        Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
                        Err(e) => {
-                            log::error!("{}", e);
+                            log::error!("{e}");
                            // Wait one second when an irrecoverable error occurs.
                            if !e.is_recoverable() {
                                std::thread::sleep(Duration::from_secs(1));
                            }
                        }
                    }
+
+                    // Let's write the previous frame to disk but only if
+                    // the user wanted to profile with puffin.
+                    if puffin_enabled {
+                        let mut frame_view = run.puffin_frame.lock();
+                        if !frame_view.is_empty() {
+                            let now = OffsetDateTime::now_utc();
+                            let mut file = match File::create(format!("{}.puffin", now)) {
+                                Ok(file) => file,
+                                Err(e) => {
+                                    log::error!("{e}");
+                                    continue;
+                                }
+                            };
+                            if let Err(e) = frame_view.save_to_writer(&mut file) {
+                                log::error!("{e}");
+                            }
+                            if let Err(e) = file.sync_all() {
+                                log::error!("{e}");
+                            }
+                            // We erase this frame view as it is no more useful. We want to
+                            // measure the new frames now that we exported the previous ones.
+                            *frame_view = FrameView::default();
+                        }
+                    }
                }
            })
            .unwrap();
@ -1062,8 +1098,6 @@ impl IndexScheduler {
            self.breakpoint(Breakpoint::Start);
        }

-        puffin::GlobalProfiler::lock().new_frame();
-
        self.cleanup_task_queue()?;

        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1259,9 +1293,8 @@ impl IndexScheduler {
        Ok(IndexStats { is_indexing, inner_stats: index_stats })
    }

-    pub fn features(&self) -> Result<RoFeatures> {
-        let rtxn = self.read_txn()?;
-        self.features.features(rtxn)
+    pub fn features(&self) -> RoFeatures {
+        self.features.features()
    }

    pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
 japanese = ["milli/japanese"]
 # thai specialized tokenization
 thai = ["milli/thai"]
-
 # allow greek specialized tokenization
 greek = ["milli/greek"]
+# allow khmer specialized tokenization
+khmer = ["milli/khmer"]
--- a/meilisearch-types/src/features.rs
+++ b/meilisearch-types/src/features.rs
@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
 pub struct RuntimeTogglableFeatures {
    pub score_details: bool,
    pub vector_store: bool,
+    pub metrics: bool,
+    pub export_puffin_reports: bool,
 }

 #[derive(Default, Debug, Clone, Copy)]
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
 pin-project-lite = "0.2.9"
 platform-dirs = "0.3.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-puffin = "0.16.0"
-puffin_http = { version = "0.13.0", optional = true }
+puffin = { version = "0.16.0", features = ["serialization"] }
 rand = "0.8.5"
 rayon = "1.7.0"
 regex = "1.7.3"
@ -135,7 +134,6 @@ zip = { version = "0.6.4", optional = true }
 [features]
 default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
 analytics = ["segment"]
-profile-with-puffin = ["dep:puffin_http"]
 mini-dashboard = [
    "actix-web-static-files",
    "static-files",
@ -152,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"]
 japanese = ["meilisearch-types/japanese"]
 thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
+khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -114,10 +114,7 @@ pub fn create_app(
        .configure(routes::configure)
        .configure(|s| dashboard(s, enable_dashboard));

-    let app = app.wrap(actix_web::middleware::Condition::new(
-        opt.experimental_enable_metrics,
-        middleware::RouteMetrics,
-    ));
+    let app = app.wrap(middleware::RouteMetrics);
    app.wrap(
        Cors::default()
            .send_wildcard()
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@ -30,10 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
 async fn main() -> anyhow::Result<()> {
    let (opt, config_read_from) = Opt::try_build()?;

-    #[cfg(feature = "profile-with-puffin")]
-    let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
-    puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
-
    anyhow::ensure!(
        !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
        "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
--- a/meilisearch/src/middleware.rs
+++ b/meilisearch/src/middleware.rs
@ -3,8 +3,10 @@
 use std::future::{ready, Ready};

 use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
+use actix_web::web::Data;
 use actix_web::Error;
 use futures_util::future::LocalBoxFuture;
+use index_scheduler::IndexScheduler;
 use prometheus::HistogramTimer;

 pub struct RouteMetrics;
@ -47,19 +49,27 @@ where

    fn call(&self, req: ServiceRequest) -> Self::Future {
        let mut histogram_timer: Option<HistogramTimer> = None;
-        let request_path = req.path();
-        let is_registered_resource = req.resource_map().has_resource(request_path);
-        if is_registered_resource {
-            let request_method = req.method().to_string();
-            histogram_timer = Some(
-                crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+
+        // calling unwrap here is safe because index scheduler is added to app data while creating actix app.
+        // also, the tests will fail if this is not present.
+        let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
+        let features = index_scheduler.features();
+
+        if features.check_metrics().is_ok() {
+            let request_path = req.path();
+            let is_registered_resource = req.resource_map().has_resource(request_path);
+            if is_registered_resource {
+                let request_method = req.method().to_string();
+                histogram_timer = Some(
+                    crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+                        .with_label_values(&[&request_method, request_path])
+                        .start_timer(),
+                );
+                crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
                    .with_label_values(&[&request_method, request_path])
-                    .start_timer(),
-            );
-            crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
-                .with_label_values(&[&request_method, request_path])
-                .inc();
-        }
+                    .inc();
+            }
+        };

        let fut = self.service.call(req);

--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@ -29,12 +29,12 @@ async fn get_features(
    >,
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
-) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+) -> HttpResponse {
+    let features = index_scheduler.features();

    analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
    debug!("returns: {:?}", features.runtime_features());
-    Ok(HttpResponse::Ok().json(features.runtime_features()))
+    HttpResponse::Ok().json(features.runtime_features())
 }

 #[derive(Debug, Deserr)]
@ -44,6 +44,10 @@ pub struct RuntimeTogglableFeatures {
    pub score_details: Option<bool>,
    #[deserr(default)]
    pub vector_store: Option<bool>,
+    #[deserr(default)]
+    pub metrics: Option<bool>,
+    #[deserr(default)]
+    pub export_puffin_reports: Option<bool>,
 }

 async fn patch_features(
@ -55,26 +59,36 @@ async fn patch_features(
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    let old_features = features.runtime_features();
-
    let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
        score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
        vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
+        metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
+        export_puffin_reports: new_features
+            .0
+            .export_puffin_reports
+            .unwrap_or(old_features.export_puffin_reports),
    };

    // explicitly destructure for analytics rather than using the `Serialize` implementation, because
    // the it renames to camelCase, which we don't want for analytics.
    // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
-    let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
-        new_features;
+    let meilisearch_types::features::RuntimeTogglableFeatures {
+        score_details,
+        vector_store,
+        metrics,
+        export_puffin_reports,
+    } = new_features;

    analytics.publish(
        "Experimental features Updated".to_string(),
        json!({
            "score_details": score_details,
            "vector_store": vector_store,
+            "metrics": metrics,
+            "export_puffin_reports": export_puffin_reports,
        }),
        Some(&req),
    );
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -68,7 +68,7 @@ pub async fn search(
    }

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result = tokio::task::spawn_blocking(move || {
        perform_facet_search(&index, search_query, facet_query, facet_name, features)
    })
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -157,7 +157,7 @@ pub async fn search_with_url_query(
    let mut aggregate = SearchAggregator::from_query(&query, &req);

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result =
        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
    if let Ok(ref search_result) = search_result {
@ -192,7 +192,7 @@ pub async fn search_with_post(

    let index = index_scheduler.index(&index_uid)?;

-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result =
        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
    if let Ok(ref search_result) = search_result {
--- a/meilisearch/src/routes/metrics.rs
+++ b/meilisearch/src/routes/metrics.rs
@ -19,7 +19,7 @@ pub async fn get_metrics(
    index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
    auth_controller: Data<AuthController>,
 ) -> Result<HttpResponse, ResponseError> {
-    index_scheduler.features()?.check_metrics()?;
+    index_scheduler.features().check_metrics()?;
    let auth_filters = index_scheduler.filters();
    if !auth_filters.all_indexes_authorized() {
        let mut error = ResponseError::from(AuthenticationError::InvalidToken);
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
    let queries = params.into_inner().queries;

    let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
    // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
--- a/meilisearch/tests/auth/authorization.rs
+++ b/meilisearch/tests/auth/authorization.rs
@ -2,10 +2,12 @@ use std::collections::{HashMap, HashSet};

 use ::time::format_description::well_known::Rfc3339;
 use maplit::{hashmap, hashset};
+use meilisearch::Opt;
 use once_cell::sync::Lazy;
+use tempfile::TempDir;
 use time::{Duration, OffsetDateTime};

-use crate::common::{Server, Value};
+use crate::common::{default_settings, Server, Value};
 use crate::json;

 pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
@ -195,7 +197,9 @@ async fn access_authorized_master_key() {

 #[actix_rt::test]
 async fn access_authorized_restricted_index() {
-    let mut server = Server::new_auth().await;
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
    for ((method, route), actions) in AUTHORIZATIONS.iter() {
        for action in actions {
            // create a new API key letting only the needed action.
--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@ -5,9 +5,11 @@ pub mod service;

 use std::fmt::{self, Display};

+#[allow(unused)]
 pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
 use meili_snap::json_string;
 use serde::{Deserialize, Serialize};
+#[allow(unused)]
 pub use server::{default_settings, Server};

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
--- a/meilisearch/tests/common/server.rs
+++ b/meilisearch/tests/common/server.rs
@ -202,6 +202,10 @@ impl Server {
    pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
        self.service.patch("/experimental-features", value).await
    }
+
+    pub async fn get_metrics(&self) -> (Value, StatusCode) {
+        self.service.get("/metrics").await
+    }
 }

 pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@ -221,7 +225,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
            skip_index_budget: true,
            ..Parser::parse_from(None as Option<&str>)
        },
-        experimental_enable_metrics: true,
+        experimental_enable_metrics: false,
        ..Parser::parse_from(None as Option<&str>)
    }
 }
--- a/meilisearch/tests/features/mod.rs
+++ b/meilisearch/tests/features/mod.rs
@ -1,4 +1,7 @@
-use crate::common::Server;
+use meilisearch::Opt;
+use tempfile::TempDir;
+
+use crate::common::{default_settings, Server};
 use crate::json;

 /// Feature name to test against.
@ -16,7 +19,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": false
+      "vectorStore": false,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -26,7 +31,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -36,7 +43,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -47,7 +56,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -58,11 +69,73 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);
 }

+#[actix_rt::test]
+async fn experimental_feature_metrics() {
+    // instance flag for metrics enables metrics at startup
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let server = Server::new_with_options(enable_metrics).await.unwrap();
+
+    let (response, code) = server.get_features().await;
+
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "scoreDetails": false,
+      "vectorStore": false,
+      "metrics": true,
+      "exportPuffinReports": false
+    }
+    "###);
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+
+    // metrics are not returned in json format
+    // so the test server will return null
+    meili_snap::snapshot!(response, @"null");
+
+    // disabling metrics results in invalid request
+    let (response, code) = server.set_features(json!({"metrics": false})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"false");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"400 Bad Request");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
+      "code": "feature_not_enabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
+    }
+    "###);
+
+    // enabling metrics via HTTP results in valid request
+    let (response, code) = server.set_features(json!({"metrics": true})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"true");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+
+    // startup without flag respects persisted metrics value
+    let disable_metrics =
+        Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
+    let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
+    let (response, code) = server_no_flag.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+}
+
 #[actix_rt::test]
 async fn errors() {
    let server = Server::new().await;
@ -73,7 +146,7 @@ async fn errors() {
    meili_snap::snapshot!(code, @"400 Bad Request");
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
-      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
+      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
      "code": "bad_request",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#bad_request"
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@ -0,0 +1,241 @@
+use meili_snap::snapshot;
+use once_cell::sync::Lazy;
+
+use crate::common::{Server, Value};
+use crate::json;
+
+pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+      {
+        "id": 1,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Brown"
+      },
+      {
+        "id": 2,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Black"
+      },
+      {
+        "id": 3,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Blue"
+      },
+      {
+        "id": 4,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Red"
+      },
+      {
+        "id": 5,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Blue"
+      },
+      {
+        "id": 6,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "Black"
+      },
+      {
+        "id": 7,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "White"
+      },
+      {
+        "id": 8,
+        "description": "Hoodie",
+        "brand": "Puma",
+        "product_id": "987654",
+        "color": "Gray"
+      },
+      {
+        "id": 9,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Green"
+      },
+      {
+        "id": 10,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Red"
+      },
+      {
+        "id": 11,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Blue"
+      },
+      {
+        "id": 12,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Indigo"
+      },
+      {
+        "id": 13,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Black"
+      },
+      {
+        "id": 14,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Stone Wash"
+      }
+    ])
+});
+
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4078
+#[actix_rt::test]
+async fn distinct_search_with_offset_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["estimatedTotalHits"] , @"11");
+
+    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["estimatedTotalHits"], @"10");
+
+    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"1");
+    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+}
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4130
+#[actix_rt::test]
+async fn distinct_search_with_pagination_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"0");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["page"], @"1");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["page"], @"3");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"4");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"3");
+    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"2");
+    snapshot!(response["totalHits"], @"6");
+}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@ -1,6 +1,7 @@
 // This modules contains all the test concerning search. Each particular feature of the search
 // should be tested in its own module to isolate tests and keep the tests readable.

+mod distinct;
 mod errors;
 mod facet_search;
 mod formatted;
@ -816,7 +817,7 @@ async fn experimental_feature_score_details() {
                      },
                      "proximity": {
                        "order": 2,
-                        "score": 0.875
+                        "score": 0.75
                      },
                      "attribute": {
                        "order": 3,
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.3", default-features = false }
+charabia = { version = "0.8.5", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
 deserr = { version = "0.6.0", features = ["actix-web"]}
@ -79,11 +79,10 @@ big_s = "1.0.2"
 insta = "1.29.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
-meili-snap = { path = "../meili-snap" }
 rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
-all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
+all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
@ -107,3 +106,6 @@ thai = ["charabia/thai"]

 # allow greek specialized tokenization
 greek = ["charabia/greek"]
+
+# allow khmer specialized tokenization
+khmer = ["charabia/khmer"]
--- a/milli/src/documents/enriched.rs
+++ b/milli/src/documents/enriched.rs
@ -1,4 +1,5 @@
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, str};

 use obkv::KvReader;
@ -19,14 +20,14 @@ use crate::FieldId;
 pub struct EnrichedDocumentsBatchReader<R> {
    documents: DocumentsBatchReader<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
    pub fn new(
        documents: DocumentsBatchReader<R>,
        primary_key: String,
-        external_ids: grenad::Reader<File>,
+        external_ids: grenad::Reader<BufReader<File>>,
    ) -> Result<Self, Error> {
        if documents.documents_count() as u64 == external_ids.len() {
            Ok(EnrichedDocumentsBatchReader {
@ -75,7 +76,7 @@ pub struct EnrichedDocument<'a> {
 pub struct EnrichedDocumentsBatchCursor<R> {
    documents: DocumentsBatchCursor<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R> EnrichedDocumentsBatchCursor<R> {
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -60,16 +60,12 @@ impl CboRoaringBitmapCodec {
    /// if the merged values length is under the threshold, values are directly
    /// serialized in the buffer else a RoaringBitmap is created from the
    /// values and is serialized in the buffer.
-    pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
-    where
-        I: IntoIterator<Item = A>,
-        A: AsRef<[u8]>,
-    {
+    pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
        let mut roaring = RoaringBitmap::new();
        let mut vec = Vec::new();

        for bytes in slices {
-            if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
+            if bytes.len() <= THRESHOLD * size_of::<u32>() {
                let mut reader = bytes.as_ref();
                while let Ok(integer) = reader.read_u32::<NativeEndian>() {
                    vec.push(integer);
@ -89,7 +85,7 @@ impl CboRoaringBitmapCodec {
                }
            } else {
                // We can unwrap safely because the vector is sorted upper.
-                let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
+                let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap();
                roaring.serialize_into(buffer)?;
            }
        } else {
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -119,16 +119,16 @@ pub struct Index {
    pub(crate) main: PolyDatabase,

    /// A word and all the documents ids containing the word.
-    pub word_docids: Database<Str, CboRoaringBitmapCodec>,
+    pub word_docids: Database<Str, RoaringBitmapCodec>,

    /// A word and all the documents ids containing the word, from attributes for which typos are not allowed.
-    pub exact_word_docids: Database<Str, CboRoaringBitmapCodec>,
+    pub exact_word_docids: Database<Str, RoaringBitmapCodec>,

    /// A prefix of word and all the documents ids containing this prefix.
-    pub word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
+    pub word_prefix_docids: Database<Str, RoaringBitmapCodec>,

    /// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
-    pub exact_word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
+    pub exact_word_prefix_docids: Database<Str, RoaringBitmapCodec>,

    /// Maps the proximity between a pair of words with all the docids where this relation appears.
    pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
--- a/milli/src/proximity.rs
+++ b/milli/src/proximity.rs
@ -2,7 +2,7 @@ use std::cmp;

 use crate::{relative_from_absolute_position, Position};

-pub const MAX_DISTANCE: u32 = 8;
+pub const MAX_DISTANCE: u32 = 4;

 pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
    if lhs <= rhs {
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
 use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
-pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
+pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
 use roaring::RoaringBitmap;
 use serde_json::Value;

--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
-pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
+pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
 use self::new::PartialSearchResult;
 use crate::error::UserError;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -53,11 +53,22 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                if excluded.contains(docid) {
                    continue;
                }
+
                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
                results.push(docid);
            }
+
            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
+            // drain the results of the skipped elements
+            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
+            // e.g. estimatedTotalHits is correct.
+            if results.len() >= from {
+                results.drain(..from);
+            } else {
+                results.clear();
+            }
+
            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
                docids: results,
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@ -11,7 +11,9 @@ use super::interner::Interned;
 use super::Word;
 use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
 use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
-use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext};
+use crate::{
+    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
+};

 /// A cache storing pointers to values in the LMDB databases.
 ///
@ -166,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> {
                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
                self.txn,
                word,
                self.word_interner.get(word).as_str(),
@ -180,7 +182,7 @@ impl<'ctx> SearchContext<'ctx> {
        &mut self,
        word: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
            self.txn,
            word,
            self.word_interner.get(word).as_str(),
@ -228,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> {
                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
                self.txn,
                prefix,
                self.word_interner.get(prefix).as_str(),
@ -242,7 +244,7 @@ impl<'ctx> SearchContext<'ctx> {
        &mut self,
        prefix: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
            self.txn,
            prefix,
            self.word_interner.get(prefix).as_str(),
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@ -29,7 +29,7 @@ use std::hash::Hash;
 pub use cheapest_paths::PathVisitor;
 pub use condition_docids_cache::ConditionDocIdsCache;
 pub use dead_ends_cache::DeadEndsCache;
-pub use exactness::{ExactnessCondition, ExactnessGraph};
+pub use exactness::ExactnessGraph;
 pub use fid::{FidCondition, FidGraph};
 pub use position::{PositionCondition, PositionGraph};
 pub use proximity::{ProximityCondition, ProximityGraph};
--- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs
@ -1,6 +1,7 @@
 #![allow(clippy::too_many_arguments)]

 use super::ProximityCondition;
+use crate::proximity::MAX_DISTANCE;
 use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::SearchContext;
@ -35,7 +36,7 @@ pub fn build_edges(
    }

    let mut conditions = vec![];
-    for cost in right_ngram_max..(7 + right_ngram_max) {
+    for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
        conditions.push((
            cost as u32,
            conditions_interner.insert(ProximityCondition::Uninit {
@ -47,7 +48,7 @@ pub fn build_edges(
    }

    conditions.push((
-        (7 + right_ngram_max) as u32,
+        ((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
        conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
    ));

--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@ -273,7 +273,7 @@ fn test_proximity_simple() {
    s.terms_matching_strategy(TermsMatchingStrategy::All);
    s.query("the quick brown fox jumps over the lazy dog");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
    [
@ -282,11 +282,11 @@ fn test_proximity_simple() {
        "\"the quickbrown fox jumps over the lazy dog\"",
        "\"the really quick brown fox jumps over the lazy dog\"",
        "\"the really quick brown fox jumps over the very lazy dog\"",
-        "\"brown quick fox jumps over the lazy dog\"",
        "\"the quick brown fox jumps over the lazy. dog\"",
        "\"dog the quick brown fox jumps over the lazy\"",
-        "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
+        "\"brown quick fox jumps over the lazy dog\"",
        "\"the. quick brown fox jumps over the lazy. dog\"",
+        "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
    ]
    "###);
 }
@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best s");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -382,9 +382,9 @@ fn test_proximity_prefix_db() {
        "\"summer best\"",
        "\"this is the best meal of summer\"",
        "\"summer x best\"",
-        "\"this is the best meal of the summer\"",
        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best cooked meal of the summer\"",
+        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best su");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
        "\"summer best\"",
        "\"this is the best meal of summer\"",
        "\"summer x best\"",
+        "\"this is the best meal I have ever had in such a beautiful summer day\"",
+        "\"this is the best cooked meal of the summer\"",
        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
-        "\"this is the best cooked meal of the summer\"",
-        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);
@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wint");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
        "\"winter best\"",
        "\"this is the best meal of winter\"",
        "\"winter x best\"",
+        "\"this is the best meal I have ever had in such a beautiful winter day\"",
+        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
-        "\"this is the best cooked meal of the winter\"",
-        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);

@ -471,7 +471,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wi");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -481,9 +481,9 @@ fn test_proximity_prefix_db() {
        "\"winter best\"",
        "\"this is the best meal of winter\"",
        "\"winter x best\"",
-        "\"this is the best meal of the winter\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
+        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
    ]
    "###);
--- a/milli/src/search/new/tests/proximity_typo.rs
+++ b/milli/src/search/new/tests/proximity_typo.rs
@ -68,8 +68,8 @@ fn test_trap_basic() {
        [
            Proximity(
                Rank {
-                    rank: 8,
-                    max_rank: 8,
+                    rank: 4,
+                    max_rank: 4,
                },
            ),
            Typo(
@ -82,8 +82,8 @@ fn test_trap_basic() {
        [
            Proximity(
                Rank {
-                    rank: 8,
-                    max_rank: 8,
+                    rank: 4,
+                    max_rank: 4,
                },
            ),
            Typo(
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness-4.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness-4.snap
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness.snap
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 57,
-                    max_rank: 57,
+                    rank: 25,
+                    max_rank: 25,
                },
            ),
        ],
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 56,
-                    max_rank: 57,
+                    rank: 24,
+                    max_rank: 25,
                },
            ),
        ],
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 21,
-                    max_rank: 22,
+                    rank: 9,
+                    max_rank: 10,
                },
            ),
        ],
@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 17,
-                    max_rank: 22,
+                    rank: 5,
+                    max_rank: 10,
                },
            ),
        ],
@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 17,
-                    max_rank: 22,
+                    rank: 5,
+                    max_rank: 10,
                },
            ),
        ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-11.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-11.snap
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
 expression: "format!(\"{document_scores:#?}\")"
 ---
 [
-    [
-        Proximity(
-            Rank {
-                rank: 8,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 7,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
                rank: 4,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 3,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +39,31 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +55,15 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -71,7 +63,15 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-5.snap
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
 expression: "format!(\"{document_scores:#?}\")"
 ---
 [
-    [
-        Proximity(
-            Rank {
-                rank: 8,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 7,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
                rank: 4,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 3,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -71,7 +47,31 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
@ -7,7 +7,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -15,7 +15,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-2.snap
@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-5.snap
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-8.snap
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_basic_and_complex1-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_basic_and_complex1-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_complex2-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_complex2-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 15,
+                rank: 4,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 50,
-                max_rank: 50,
+                rank: 22,
+                max_rank: 22,
            },
        ),
    ],
@ -24,132 +24,6 @@ expression: "format!(\"{document_scores:#?}\")"
                max_matching_words: 9,
            },
        ),
-        Proximity(
-            Rank {
-                rank: 50,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 49,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 49,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 48,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 41,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 40,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 8,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 43,
-                max_rank: 43,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 7,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 36,
-                max_rank: 36,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 7,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 31,
-                max_rank: 36,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 5,
-                max_matching_words: 9,
-            },
-        ),
        Proximity(
            Rank {
                rank: 22,
@ -160,14 +34,126 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Words(
            Words {
-                matching_words: 4,
+                matching_words: 9,
                max_matching_words: 9,
            },
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 21,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 21,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 20,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 17,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 16,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 8,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 19,
+                max_rank: 19,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 7,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 16,
+                max_rank: 16,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 7,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 13,
+                max_rank: 16,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 5,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -180,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -194,8 +180,22 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 4,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-5.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 42,
-                max_rank: 43,
+                rank: 18,
+                max_rank: 19,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 42,
-                max_rank: 43,
+                rank: 18,
+                max_rank: 19,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 41,
-                max_rank: 43,
+                rank: 17,
+                max_rank: 19,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 34,
-                max_rank: 43,
+                rank: 14,
+                max_rank: 19,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 33,
-                max_rank: 43,
+                rank: 13,
+                max_rank: 19,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 36,
-                max_rank: 36,
+                rank: 16,
+                max_rank: 16,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 29,
-                max_rank: 29,
+                rank: 13,
+                max_rank: 13,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 24,
-                max_rank: 29,
+                rank: 10,
+                max_rank: 13,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 51,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 48,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 50,
-                max_rank: 50,
+                rank: 22,
+                max_rank: 22,
            },
        ),
    ],
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 38,
-                max_rank: 43,
+                rank: 16,
+                max_rank: 19,
            },
        ),
    ],
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 29,
-                max_rank: 29,
+                rank: 13,
+                max_rank: 13,
            },
        ),
    ],
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-5.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 18,
+                max_rank: 25,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 45,
-                max_rank: 57,
+                rank: 18,
+                max_rank: 25,
            },
        ),
    ],
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 50,
+                rank: 19,
+                max_rank: 22,
            },
        ),
    ],
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 40,
-                max_rank: 43,
+                rank: 16,
+                max_rank: 19,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 35,
-                max_rank: 43,
+                rank: 13,
+                max_rank: 19,
            },
        ),
    ],
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 26,
-                max_rank: 29,
+                rank: 10,
+                max_rank: 13,
            },
        ),
    ],
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 13,
-                max_rank: 15,
+                rank: 5,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_tms_all-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_tms_all-2.snap
@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 51,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 48,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
--- a/milli/src/search/new/tests/sort.rs
+++ b/milli/src/search/new/tests/sort.rs
@ -13,7 +13,6 @@ This module tests the `sort` ranking rule:

 use big_s::S;
 use maplit::hashset;
-use meili_snap::insta;

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
--- a/milli/src/search/new/tests/stop_words.rs
+++ b/milli/src/search/new/tests/stop_words.rs
@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
            ),
            Proximity(
                Rank {
-                    rank: 7,
-                    max_rank: 8,
+                    rank: 3,
+                    max_rank: 4,
                },
            ),
            Fid(
@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
            ),
            Proximity(
                Rank {
-                    rank: 6,
-                    max_rank: 8,
+                    rank: 2,
+                    max_rank: 4,
                },
            ),
            Fid(
--- a/milli/src/search/new/tests/words_tms.rs
+++ b/milli/src/search/new/tests/words_tms.rs
@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

    // 7 is better than 6 because of the proximity between "the" and its surrounding terms
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
        "\"the quick brown fox jumps over the lazy\"",
        "\"the quick brown fox jumps over the\"",
@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

    // 10 is better than 9 because of the proximity between "quick" and "brown"
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
        "\"the great quick brown fox jumps over the lazy dog\"",
        "\"the quick brown fox jumps over the really lazy dog\"",
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
-        "\"this quick brown and scary fox jumps over the lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
        "\"the quick brown fox jumps over the lazy\"",
        "\"the quick brown fox jumps over the\"",
@ -427,7 +427,7 @@ fn test_words_tms_all() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -439,10 +439,10 @@ fn test_words_tms_all() {
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
    ]
    "###);
--- a/milli/src/update/del_add.rs
+++ b/milli/src/update/del_add.rs
@ -1,104 +0,0 @@
-use obkv::Key;
-
-pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
-pub type KvReaderDelAdd<'a> = obkv::KvReader<'a, DelAdd>;
-
-/// DelAdd defines the new value to add in the database and old value to delete from the database.
-///
-/// Its used in an OBKV to be serialized in grenad files.
-#[repr(u8)]
-#[derive(Clone, Copy, PartialOrd, PartialEq, Debug)]
-pub enum DelAdd {
-    Deletion = 0,
-    Addition = 1,
-}
-
-impl Key for DelAdd {
-    const BYTES_SIZE: usize = std::mem::size_of::<DelAdd>();
-    type BYTES = [u8; Self::BYTES_SIZE];
-
-    fn to_be_bytes(&self) -> Self::BYTES {
-        u8::to_be_bytes(*self as u8)
-    }
-
-    fn from_be_bytes(array: Self::BYTES) -> Self {
-        match u8::from_be_bytes(array) {
-            0 => Self::Deletion,
-            1 => Self::Addition,
-            otherwise => unreachable!("DelAdd has only 2 variants, unknown variant: {}", otherwise),
-        }
-    }
-}
-
-/// Creates a Kv<K, Kv<DelAdd, value>> from Kv<K, value>
-///
-/// if deletion is `true`, the value will be inserted behind a DelAdd::Deletion key.
-/// if addition is `true`, the value will be inserted behind a DelAdd::Addition key.
-/// if both deletion and addition are `true, the value will be inserted in both keys.
-pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
-    reader: obkv::KvReader<K>,
-    deletion: bool,
-    addition: bool,
-    buffer: &mut Vec<u8>,
-) -> Result<(), std::io::Error> {
-    let mut writer = obkv::KvWriter::new(buffer);
-    let mut value_buffer = Vec::new();
-    for (key, value) in reader.iter() {
-        value_buffer.clear();
-        let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-        if deletion {
-            value_writer.insert(DelAdd::Deletion, value)?;
-        }
-        if addition {
-            value_writer.insert(DelAdd::Addition, value)?;
-        }
-        value_writer.finish()?;
-        writer.insert(key, &value_buffer)?;
-    }
-
-    writer.finish()
-}
-
-/// Creates a Kv<K, Kv<DelAdd, value>> from two Kv<K, value>
-///
-/// putting each deletion obkv's keys under an DelAdd::Deletion
-/// and putting each addition obkv's keys under an DelAdd::Addition
-pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
-    deletion: obkv::KvReader<K>,
-    addition: obkv::KvReader<K>,
-    buffer: &mut Vec<u8>,
-) -> Result<(), std::io::Error> {
-    use itertools::merge_join_by;
-    use itertools::EitherOrBoth::{Both, Left, Right};
-
-    let mut writer = obkv::KvWriter::new(buffer);
-    let mut value_buffer = Vec::new();
-
-    for eob in merge_join_by(deletion.iter(), addition.iter(), |(b, _), (u, _)| b.cmp(u)) {
-        value_buffer.clear();
-        match eob {
-            Left((k, v)) => {
-                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                value_writer.insert(DelAdd::Deletion, v).unwrap();
-                writer.insert(k, value_writer.into_inner()?).unwrap();
-            }
-            Right((k, v)) => {
-                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                value_writer.insert(DelAdd::Addition, v).unwrap();
-                writer.insert(k, value_writer.into_inner()?).unwrap();
-            }
-            Both((k, deletion), (_, addition)) => {
-                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                value_writer.insert(DelAdd::Deletion, deletion).unwrap();
-                value_writer.insert(DelAdd::Addition, addition).unwrap();
-                writer.insert(k, value_writer.into_inner()?).unwrap();
-            }
-        }
-    }
-
-    writer.finish()
-}
-
-pub fn is_noop_del_add_obkv(del_add: KvReaderDelAdd) -> bool {
-    del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition)
-}
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@ -16,7 +16,9 @@ use crate::facet::FacetType;
 use crate::heed_codec::facet::FieldDocIdFacetCodec;
 use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::index::Hnsw;
-use crate::{ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, BEU32};
+use crate::{
+    ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32,
+};

 pub struct DeleteDocuments<'t, 'u, 'i> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -106,15 +108,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        self.delete_document(docid);
        Some(docid)
    }
-    pub fn execute(self) -> Result<DocumentDeletionResult> {
-        puffin::profile_function!();

+    pub fn execute(self) -> Result<DocumentDeletionResult> {
        let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
            self.execute_inner()?;

        Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
    }
+
    pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
+        puffin::profile_function!();
+
        self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;

        // We retrieve the current documents ids that are in the database.
@ -474,6 +478,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
            + for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
    {
+        puffin::profile_function!();
+
        while let Some(result) = iter.next() {
            let (bytes, mut docids) = result?;
            let previous_len = docids.len();
@ -493,9 +499,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {

 fn remove_from_word_prefix_docids(
    txn: &mut heed::RwTxn,
-    db: &Database<Str, CboRoaringBitmapCodec>,
+    db: &Database<Str, RoaringBitmapCodec>,
    to_remove: &RoaringBitmap,
 ) -> Result<fst::Set<Vec<u8>>> {
+    puffin::profile_function!();
+
    let mut prefixes_to_delete = fst::SetBuilder::memory();

    // We iterate over the word prefix docids database and remove the deleted documents ids
@ -521,11 +529,13 @@ fn remove_from_word_prefix_docids(

 fn remove_from_word_docids(
    txn: &mut heed::RwTxn,
-    db: &heed::Database<Str, CboRoaringBitmapCodec>,
+    db: &heed::Database<Str, RoaringBitmapCodec>,
    to_remove: &RoaringBitmap,
    words_to_keep: &mut BTreeSet<String>,
    words_to_remove: &mut BTreeSet<String>,
 ) -> Result<()> {
+    puffin::profile_function!();
+
    // We create an iterator to be able to get the content and delete the word docids.
    // It's faster to acquire a cursor to get and delete or put, as we avoid traversing
    // the LMDB B-Tree two times but only once.
@ -557,6 +567,8 @@ fn remove_docids_from_field_id_docid_facet_value(
    field_id: FieldId,
    to_remove: &RoaringBitmap,
 ) -> heed::Result<HashSet<Vec<u8>>> {
+    puffin::profile_function!();
+
    let db = match facet_type {
        FacetType::String => {
            index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
@ -592,6 +604,8 @@ fn remove_docids_from_facet_id_docids<'a, C>(
 where
    C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
 {
+    puffin::profile_function!();
+
    let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
    while let Some(result) = iter.next() {
        let (bytes, mut docids) = result?;
--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@ -1,5 +1,6 @@
 use std::borrow::Cow;
 use std::fs::File;
+use std::io::BufReader;

 use grenad::CompressionType;
 use heed::types::ByteSlice;
@ -30,7 +31,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    new_data: Option<grenad::Reader<File>>,
+    new_data: Option<grenad::Reader<BufReader<File>>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@ -38,7 +39,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        new_data: grenad::Reader<File>,
+        new_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@ -132,8 +133,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        self.db.delete_range(wtxn, &range).map(drop)?;
        Ok(())
    }
-
-    // TODO the new_data is an Reader<Obkv<Key, Obkv<DelAdd, RoaringBitmap>>>
    fn update_level0(&mut self, wtxn: &mut RwTxn) -> Result<()> {
        let new_data = match self.new_data.take() {
            Some(x) => x,
@ -189,7 +188,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        &self,
        field_id: FieldId,
        txn: &RoTxn,
-    ) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
+    ) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
        let mut all_docids = RoaringBitmap::new();
        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
            for bitmap in bitmaps {
@ -261,7 +260,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        field_id: u16,
        level: u8,
        handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
-    ) -> Result<Vec<grenad::Reader<File>>> {
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
        if level == 0 {
            self.read_level_0(rtxn, field_id, handle_group)?;
            // Level 0 is already in the database
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@ -1,5 +1,6 @@
 use std::collections::HashMap;
 use std::fs::File;
+use std::io::BufReader;

 use heed::types::{ByteSlice, DecodeIgnore};
 use heed::{BytesDecode, Error, RoTxn, RwTxn};
@ -34,14 +35,14 @@ pub struct FacetsUpdateIncremental<'i> {
    index: &'i Index,
    inner: FacetsUpdateIncrementalInner,
    facet_type: FacetType,
-    new_data: grenad::Reader<File>,
+    new_data: grenad::Reader<BufReader<File>>,
 }

 impl<'i> FacetsUpdateIncremental<'i> {
    pub fn new(
        index: &'i Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<File>,
+        new_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -78,6 +78,7 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;

 use std::collections::BTreeSet;
 use std::fs::File;
+use std::io::BufReader;
 use std::iter::FromIterator;

 use charabia::normalizer::{Normalize, NormalizerOption};
@ -108,14 +109,17 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    new_data: grenad::Reader<File>,
+    new_data: grenad::Reader<BufReader<File>>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
 }
 impl<'i> FacetsUpdate<'i> {
-    // TODO grenad::Reader<Key, Obkv<DelAdd, RoaringBitmap>>
-    pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
+    pub fn new(
+        index: &'i Index,
+        facet_type: FacetType,
+        new_data: grenad::Reader<BufReader<File>>,
+    ) -> Self {
        let database = match facet_type {
            FacetType::String => index
                .facet_id_string_docids
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@ -1,4 +1,4 @@
-use std::io::{Read, Seek};
+use std::io::{BufWriter, Read, Seek};
 use std::result::Result as StdResult;
 use std::{fmt, iter};

@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(

    let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();

-    let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
+    let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
    let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];

    // The primary key *field id* that has already been set for this index or the one
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -1,19 +1,22 @@
 use std::collections::{HashMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, mem, str};

 use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
-use obkv::{KvReader, KvWriterU16};
+use obkv::KvReader;
 use roaring::RoaringBitmap;
 use serde_json::Value;

-use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
+use super::helpers::{concat_u32s_array, create_sorter, sorter_into_reader, GrenadParameters};
 use crate::error::{InternalError, SerializationError};
-use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
-use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
+use crate::update::index_documents::MergeFn;
+use crate::{
+    absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
+};

-pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;
+pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;

 /// Extracts the word and positions where this word appear and
 /// prefixes it by the document id.
@ -29,160 +32,25 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
-) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
+) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
    puffin::profile_function!();

    let max_positions_per_attributes = max_positions_per_attributes
        .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
    let max_memory = indexer.max_memory_by_thread();

-    // initialize destination values.
    let mut documents_ids = RoaringBitmap::new();
    let mut script_language_docids = HashMap::new();
    let mut docid_word_positions_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        keep_latest_obkv,
+        concat_u32s_array,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    // initialize buffers.
-    let mut del_buffers = Buffers::default();
-    let mut add_buffers = Buffers::default();
-    let mut key_buffer = Vec::new();
-    let mut value_buffer = Vec::new();
-
-    // initialize tokenizer.
-    let mut builder = tokenizer_builder(stop_words, dictionary, allowed_separators, None);
-    let tokenizer = builder.build();
-
-    // iterate over documents.
-    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((key, value)) = cursor.move_on_next()? {
-        let document_id = key
-            .try_into()
-            .map(u32::from_be_bytes)
-            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-        let obkv = KvReader::<FieldId>::new(value);
-
-        // if the searchable fields didn't change, skip the searchable indexing for this document.
-        if !searchable_fields_changed(&KvReader::<FieldId>::new(value), searchable_fields) {
-            continue;
-        }
-
-        documents_ids.push(document_id);
-
-        // Update key buffer prefix.
-        key_buffer.clear();
-        key_buffer.extend_from_slice(&document_id.to_be_bytes());
-
-        // Tokenize deletions and additions in 2 diffferent threads.
-        let (del, add): (Result<_>, Result<_>) = rayon::join(
-            || {
-                // deletions
-                lang_safe_tokens_from_document(
-                    &obkv,
-                    searchable_fields,
-                    &tokenizer,
-                    stop_words,
-                    allowed_separators,
-                    dictionary,
-                    max_positions_per_attributes,
-                    DelAdd::Deletion,
-                    &mut del_buffers,
-                )
-            },
-            || {
-                // additions
-                lang_safe_tokens_from_document(
-                    &obkv,
-                    searchable_fields,
-                    &tokenizer,
-                    stop_words,
-                    allowed_separators,
-                    dictionary,
-                    max_positions_per_attributes,
-                    DelAdd::Addition,
-                    &mut add_buffers,
-                )
-            },
-        );
-
-        let (del_obkv, del_script_language_word_count) = del?;
-        let (add_obkv, add_script_language_word_count) = add?;
-
-        // merge deletions and additions.
-        value_buffer.clear();
-        del_add_from_two_obkvs(
-            KvReader::<FieldId>::new(del_obkv),
-            KvReader::<FieldId>::new(add_obkv),
-            &mut value_buffer,
-        )?;
-
-        // write them into the sorter.
-        let obkv = KvReader::<FieldId>::new(value);
-        for (field_id, value) in obkv.iter() {
-            key_buffer.truncate(mem::size_of::<u32>());
-            key_buffer.extend_from_slice(&field_id.to_be_bytes());
-            docid_word_positions_sorter.insert(&key_buffer, value)?;
-        }
-
-        // update script_language_docids deletions.
-        for (script, languages_frequency) in del_script_language_word_count {
-            for (language, _) in languages_frequency {
-                let entry = script_language_docids
-                    .entry((script, language))
-                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
-                entry.0.push(document_id);
-            }
-        }
-
-        // update script_language_docids additions.
-        for (script, languages_frequency) in add_script_language_word_count {
-            for (language, _) in languages_frequency {
-                let entry = script_language_docids
-                    .entry((script, language))
-                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
-                entry.1.push(document_id);
-            }
-        }
-    }
-
-    sorter_into_reader(docid_word_positions_sorter, indexer)
-        .map(|reader| (documents_ids, reader, script_language_docids))
-}
-
-/// Check if any searchable fields of a document changed.
-fn searchable_fields_changed(
-    obkv: &KvReader<FieldId>,
-    searchable_fields: &Option<HashSet<FieldId>>,
-) -> bool {
-    for (field_id, field_bytes) in obkv.iter() {
-        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
-            let del_add = KvReaderDelAdd::new(field_bytes);
-            match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
-                // if both fields are None, check the next field.
-                (None, None) => (),
-                // if both contains a value and values are the same, check the next field.
-                (Some(del), Some(add)) if del == add => (),
-                // otherwise the fields are different, return true.
-                _otherwise => return true,
-            }
-        }
-    }
-
-    false
-}
-
-/// Factorize tokenizer building.
-fn tokenizer_builder<'a>(
-    stop_words: Option<&'a fst::Set<&[u8]>>,
-    allowed_separators: Option<&'a [&str]>,
-    dictionary: Option<&'a [&str]>,
-    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
-) -> TokenizerBuilder<'a, &'a [u8]> {
+    let mut buffers = Buffers::default();
    let mut tokenizer_builder = TokenizerBuilder::new();
    if let Some(stop_words) = stop_words {
        tokenizer_builder.stop_words(stop_words);
@ -193,144 +61,130 @@ fn tokenizer_builder<'a>(
    if let Some(separators) = allowed_separators {
        tokenizer_builder.separators(separators);
    }
+    let tokenizer = tokenizer_builder.build();

-    if let Some(script_language) = script_language {
-        tokenizer_builder.allow_list(&script_language);
-    }
+    let mut cursor = obkv_documents.into_cursor()?;
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let document_id = key
+            .try_into()
+            .map(u32::from_be_bytes)
+            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+        let obkv = KvReader::<FieldId>::new(value);

-    tokenizer_builder
-}
+        documents_ids.push(document_id);
+        buffers.key_buffer.clear();
+        buffers.key_buffer.extend_from_slice(&document_id.to_be_bytes());

-/// Extract words maped with their positions of a document,
-/// ensuring no Language detection mistakes was made.
-fn lang_safe_tokens_from_document<'a>(
-    obkv: &KvReader<FieldId>,
-    searchable_fields: &Option<HashSet<FieldId>>,
-    tokenizer: &Tokenizer,
-    stop_words: Option<&fst::Set<&[u8]>>,
-    allowed_separators: Option<&[&str]>,
-    dictionary: Option<&[&str]>,
-    max_positions_per_attributes: u32,
-    del_add: DelAdd,
-    buffers: &'a mut Buffers,
-) -> Result<(&'a [u8], HashMap<Script, Vec<(Language, usize)>>)> {
-    let mut script_language_word_count = HashMap::new();
+        let mut script_language_word_count = HashMap::new();

-    tokens_from_document(
-        &obkv,
-        searchable_fields,
-        &tokenizer,
-        max_positions_per_attributes,
-        del_add,
-        buffers,
-        &mut script_language_word_count,
-    )?;
+        extract_tokens_from_document(
+            &obkv,
+            searchable_fields,
+            &tokenizer,
+            max_positions_per_attributes,
+            &mut buffers,
+            &mut script_language_word_count,
+            &mut docid_word_positions_sorter,
+        )?;

-    // if we detect a potetial mistake in the language detection,
-    // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
-    // context: https://github.com/meilisearch/meilisearch/issues/3565
-    if script_language_word_count
-        .values()
-        .map(Vec::as_slice)
-        .any(potential_language_detection_error)
-    {
-        // build an allow list with the most frequent detected languages in the document.
-        let script_language: HashMap<_, _> =
-            script_language_word_count.iter().filter_map(most_frequent_languages).collect();
+        // if we detect a potetial mistake in the language detection,
+        // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
+        // context: https://github.com/meilisearch/meilisearch/issues/3565
+        if script_language_word_count
+            .values()
+            .map(Vec::as_slice)
+            .any(potential_language_detection_error)
+        {
+            // build an allow list with the most frequent detected languages in the document.
+            let script_language: HashMap<_, _> =
+                script_language_word_count.iter().filter_map(most_frequent_languages).collect();

-        // if the allow list is empty, meaning that no Language is considered frequent,
-        // then we don't rerun the extraction.
-        if !script_language.is_empty() {
-            // build a new temporary tokenizer including the allow list.
-            let mut builder = tokenizer_builder(
-                stop_words,
-                dictionary,
-                allowed_separators,
-                Some(&script_language),
-            );
-            let tokenizer = builder.build();
+            // if the allow list is empty, meaning that no Language is considered frequent,
+            // then we don't rerun the extraction.
+            if !script_language.is_empty() {
+                // build a new temporary tokenizer including the allow list.
+                let mut tokenizer_builder = TokenizerBuilder::new();
+                if let Some(stop_words) = stop_words {
+                    tokenizer_builder.stop_words(stop_words);
+                }
+                tokenizer_builder.allow_list(&script_language);
+                let tokenizer = tokenizer_builder.build();

-            script_language_word_count.clear();
+                script_language_word_count.clear();

-            // rerun the extraction.
-            tokens_from_document(
-                &obkv,
-                searchable_fields,
-                &tokenizer,
-                max_positions_per_attributes,
-                del_add,
-                buffers,
-                &mut script_language_word_count,
-            )?;
+                // rerun the extraction.
+                extract_tokens_from_document(
+                    &obkv,
+                    searchable_fields,
+                    &tokenizer,
+                    max_positions_per_attributes,
+                    &mut buffers,
+                    &mut script_language_word_count,
+                    &mut docid_word_positions_sorter,
+                )?;
+            }
+        }
+
+        for (script, languages_frequency) in script_language_word_count {
+            for (language, _) in languages_frequency {
+                let entry = script_language_docids
+                    .entry((script, language))
+                    .or_insert_with(RoaringBitmap::new);
+                entry.push(document_id);
+            }
        }
    }

-    Ok((&buffers.obkv_buffer, script_language_word_count))
+    sorter_into_reader(docid_word_positions_sorter, indexer)
+        .map(|reader| (documents_ids, reader, script_language_docids))
 }

-/// Extract words maped with their positions of a document.
-fn tokens_from_document<'a>(
+fn extract_tokens_from_document(
    obkv: &KvReader<FieldId>,
    searchable_fields: &Option<HashSet<FieldId>>,
    tokenizer: &Tokenizer,
    max_positions_per_attributes: u32,
-    del_add: DelAdd,
-    buffers: &'a mut Buffers,
+    buffers: &mut Buffers,
    script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
-) -> Result<&'a [u8]> {
-    buffers.obkv_buffer.clear();
-    let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
+    docid_word_positions_sorter: &mut grenad::Sorter<MergeFn>,
+) -> Result<()> {
    for (field_id, field_bytes) in obkv.iter() {
-        // if field is searchable.
        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
-            // extract deletion or addition only.
-            if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
-                // parse json.
-                let value =
-                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+            let value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+            buffers.field_buffer.clear();
+            if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
+                let tokens = process_tokens(tokenizer.tokenize(field))
+                    .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);

-                // prepare writting destination.
-                buffers.obkv_positions_buffer.clear();
-                let mut writer = KvWriterU16::new(&mut buffers.obkv_positions_buffer);
-
-                // convert json into an unique string.
-                buffers.field_buffer.clear();
-                if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
-                    // create an iterator of token with their positions.
-                    let tokens = process_tokens(tokenizer.tokenize(field))
-                        .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
-
-                    for (index, token) in tokens {
-                        // if a language has been detected for the token, we update the counter.
-                        if let Some(language) = token.language {
-                            let script = token.script;
-                            let entry =
-                                script_language_word_count.entry(script).or_insert_with(Vec::new);
-                            match entry.iter_mut().find(|(l, _)| *l == language) {
-                                Some((_, n)) => *n += 1,
-                                None => entry.push((language, 1)),
-                            }
-                        }
-
-                        // keep a word only if it is not empty and fit in a LMDB key.
-                        let token = token.lemma().trim();
-                        if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
-                            let position: u16 = index
-                                .try_into()
-                                .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-                            writer.insert(position, token.as_bytes())?;
+                for (index, token) in tokens {
+                    // if a language has been detected for the token, we update the counter.
+                    if let Some(language) = token.language {
+                        let script = token.script;
+                        let entry =
+                            script_language_word_count.entry(script).or_insert_with(Vec::new);
+                        match entry.iter_mut().find(|(l, _)| *l == language) {
+                            Some((_, n)) => *n += 1,
+                            None => entry.push((language, 1)),
                        }
                    }
+                    let token = token.lemma().trim();
+                    if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
+                        buffers.key_buffer.truncate(mem::size_of::<u32>());
+                        buffers.key_buffer.extend_from_slice(token.as_bytes());

-                    // write positions into document.
-                    let positions = writer.into_inner()?;
-                    document_writer.insert(field_id, positions)?;
+                        let position: u16 = index
+                            .try_into()
+                            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+                        let position = absolute_from_relative_position(field_id, position);
+                        docid_word_positions_sorter
+                            .insert(&buffers.key_buffer, position.to_ne_bytes())?;
+                    }
                }
            }
        }
    }

-    Ok(document_writer.into_inner().map(|v| v.as_slice())?)
+    Ok(())
 }

 /// Transform a JSON value into a string that can be indexed.
@ -433,10 +287,10 @@ fn compute_language_frequency_threshold(languages_frequency: &[(Language, usize)

 #[derive(Default)]
 struct Buffers {
+    // the key buffer is the concatenation of the internal document id with the field id.
+    // The buffer has to be completelly cleared between documents,
+    // and the field id part must be cleared between each field.
+    key_buffer: Vec<u8>,
    // the field buffer for each fields desserialization, and must be cleared between each field.
    field_buffer: String,
-    // buffer used to store the value data containing an obkv.
-    obkv_buffer: Vec<u8>,
-    // buffer used to store the value data containing an obkv of tokens with their positions.
-    obkv_positions_buffer: Vec<u8>,
 }
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@ -1,15 +1,14 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use heed::{BytesDecode, BytesEncode};

 use super::helpers::{
-    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
+    create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
 };
 use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
 };
-use crate::update::del_add::{KvReaderDelAdd, KvWriterDelAdd};
 use crate::Result;

 /// Extracts the facet number and the documents ids where this facet number appear.
@ -18,39 +17,30 @@ use crate::Result;
 /// documents ids from the given chunk of docid facet number positions.
 #[logging_timer::time]
 pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
-    fid_docid_facet_number: grenad::Reader<R>,
+    docid_fid_facet_number: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

    let mut facet_number_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        merge_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    let mut buffer = Vec::new();
-    let mut cursor = fid_docid_facet_number.into_cursor()?;
-    while let Some((key_bytes, deladd_obkv_bytes)) = cursor.move_on_next()? {
+    let mut cursor = docid_fid_facet_number.into_cursor()?;
+    while let Some((key_bytes, _)) = cursor.move_on_next()? {
        let (field_id, document_id, number) =
            FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();

        let key = FacetGroupKey { field_id, level: 0, left_bound: number };
        let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
-
-        buffer.clear();
-        let mut obkv = KvWriterDelAdd::new(&mut buffer);
-        for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() {
-            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
-        }
-        obkv.finish()?;
-
-        facet_number_docids_sorter.insert(key_bytes, &buffer)?;
+        facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
    }

    sorter_into_reader(facet_number_docids_sorter, indexer)
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -1,14 +1,13 @@
 use std::fs::File;
-use std::{io, str};
+use std::io::{self, BufReader};

 use heed::BytesEncode;

 use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
 use crate::heed_codec::StrRefCodec;
-use crate::update::del_add::{KvReaderDelAdd, KvWriterDelAdd};
-use crate::update::index_documents::helpers::merge_deladd_cbo_roaring_bitmaps;
-use crate::{FieldId, Result};
+use crate::update::index_documents::merge_cbo_roaring_bitmaps;
+use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};

 /// Extracts the facet string and the documents ids where this facet string appear.
 ///
@ -18,23 +17,22 @@ use crate::{FieldId, Result};
 pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
    docid_fid_facet_string: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        merge_deladd_cbo_roaring_bitmaps,
+        merge_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    let mut buffer = Vec::new();
    let mut cursor = docid_fid_facet_string.into_cursor()?;
-    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
+    while let Some((key, _original_value_bytes)) = cursor.move_on_next()? {
        let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
        let field_id = FieldId::from_be_bytes(field_id_bytes);

@ -42,17 +40,21 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
            try_split_array_at::<_, 4>(bytes).unwrap();
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let normalized_value = str::from_utf8(normalized_value_bytes)?;
-        let key = FacetGroupKey { field_id, level: 0, left_bound: normalized_value };
-        let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
+        let mut normalised_value = std::str::from_utf8(normalized_value_bytes)?;

-        buffer.clear();
-        let mut obkv = KvWriterDelAdd::new(&mut buffer);
-        for (deladd_key, _) in KvReaderDelAdd::new(deladd_original_value_bytes).iter() {
-            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
+        let normalised_truncated_value: String;
+        if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
+            normalised_truncated_value = normalised_value
+                .char_indices()
+                .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
+                .map(|(_, c)| c)
+                .collect();
+            normalised_value = normalised_truncated_value.as_str();
        }
-        obkv.finish()?;
-        facet_string_docids_sorter.insert(&key_bytes, &buffer)?;
+        let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
+        let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
+        // document id is encoded in native-endian because of the CBO roaring bitmap codec
+        facet_string_docids_sorter.insert(&key_bytes, document_id.to_ne_bytes())?;
    }

    sorter_into_reader(facet_string_docids_sorter, indexer)
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -1,39 +1,27 @@
-use std::borrow::Cow;
 use std::collections::{BTreeMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};
 use std::mem::size_of;
-use std::result::Result as StdResult;

-use grenad::Sorter;
 use heed::zerocopy::AsBytes;
 use heed::BytesEncode;
-use itertools::EitherOrBoth;
-use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use serde_json::{from_slice, Value};
-use FilterableValues::{Empty, Null, Values};

 use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
 use crate::error::InternalError;
 use crate::facet::value_encoding::f64_into_bytes;
-use crate::update::del_add::{DelAdd, KvWriterDelAdd};
 use crate::update::index_documents::{create_writer, writer_into_reader};
-use crate::{
-    CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
-};
-
-/// The length of the elements that are always in the buffer when inserting new values.
-const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
+use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH};

 /// The extracted facet values stored in grenad files by type.
 pub struct ExtractedFacetValues {
-    pub fid_docid_facet_numbers_chunk: grenad::Reader<File>,
-    pub fid_docid_facet_strings_chunk: grenad::Reader<File>,
-    pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
-    pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
-    pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
+    pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
+    pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
 }

 /// Extracts the facet values of each faceted field of each document.
@ -70,150 +58,71 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        max_memory.map(|m| m / 2),
    );

-    // The tuples represents the Del and Add side for a bitmap
-    let mut facet_exists_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
-    let mut facet_is_null_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
-    let mut facet_is_empty_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
-
-    // We create two buffer for mutable ref issues with closures.
-    let mut numbers_key_buffer = Vec::new();
-    let mut strings_key_buffer = Vec::new();
+    let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
+    let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
+    let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new();

+    let mut key_buffer = Vec::new();
    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
        let obkv = obkv::KvReader::new(value);

        for (field_id, field_bytes) in obkv.iter() {
            if faceted_fields.contains(&field_id) {
-                numbers_key_buffer.clear();
-                strings_key_buffer.clear();
+                key_buffer.clear();

                // Set key to the field_id
                // Note: this encoding is consistent with FieldIdCodec
-                numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
-                strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                key_buffer.extend_from_slice(&field_id.to_be_bytes());

+                // Here, we know already that the document must be added to the “field id exists” database
                let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
                let document = BEU32::from(document).get();

+                facet_exists_docids.entry(field_id).or_default().insert(document);
+
                // For the other extraction tasks, prefix the key with the field_id and the document_id
-                numbers_key_buffer.extend_from_slice(docid_bytes);
-                strings_key_buffer.extend_from_slice(docid_bytes);
+                key_buffer.extend_from_slice(docid_bytes);

-                let del_add_obkv = obkv::KvReader::new(field_bytes);
-                let del_value = match del_add_obkv.get(DelAdd::Deletion) {
-                    Some(bytes) => from_slice(bytes).map_err(InternalError::SerdeJson)?,
-                    None => None,
-                };
-                let add_value = match del_add_obkv.get(DelAdd::Addition) {
-                    Some(bytes) => from_slice(bytes).map_err(InternalError::SerdeJson)?,
-                    None => None,
-                };
+                let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;

-                // We insert the document id on the Del and the Add side if the field exists.
-                let (ref mut del_exists, ref mut add_exists) =
-                    facet_exists_docids.entry(field_id).or_default();
-                let (ref mut del_is_null, ref mut add_is_null) =
-                    facet_is_null_docids.entry(field_id).or_default();
-                let (ref mut del_is_empty, ref mut add_is_empty) =
-                    facet_is_empty_docids.entry(field_id).or_default();
+                match extract_facet_values(
+                    &value,
+                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
+                ) {
+                    FilterableValues::Null => {
+                        facet_is_null_docids.entry(field_id).or_default().insert(document);
+                    }
+                    FilterableValues::Empty => {
+                        facet_is_empty_docids.entry(field_id).or_default().insert(document);
+                    }
+                    FilterableValues::Values { numbers, strings } => {
+                        // insert facet numbers in sorter
+                        for number in numbers {
+                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
+                            if let Some(value_bytes) = f64_into_bytes(number) {
+                                key_buffer.extend_from_slice(&value_bytes);
+                                key_buffer.extend_from_slice(&number.to_be_bytes());

-                if del_value.is_some() {
-                    del_exists.insert(document);
-                }
-                if add_value.is_some() {
-                    add_exists.insert(document);
-                }
+                                fid_docid_facet_numbers_sorter
+                                    .insert(&key_buffer, ().as_bytes())?;
+                            }
+                        }

-                let geo_support =
-                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
-                let del_filterable_values =
-                    del_value.map(|value| extract_facet_values(&value, geo_support));
-                let add_filterable_values =
-                    add_value.map(|value| extract_facet_values(&value, geo_support));
+                        // insert normalized and original facet string in sorter
+                        for (normalized, original) in
+                            strings.into_iter().filter(|(n, _)| !n.is_empty())
+                        {
+                            let normalized_truncated_value: String = normalized
+                                .char_indices()
+                                .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
+                                .map(|(_, c)| c)
+                                .collect();

-                // Those closures are just here to simplify things a bit.
-                let mut insert_numbers_diff = |del_numbers, add_numbers| {
-                    insert_numbers_diff(
-                        &mut fid_docid_facet_numbers_sorter,
-                        &mut numbers_key_buffer,
-                        del_numbers,
-                        add_numbers,
-                    )
-                };
-                let mut insert_strings_diff = |del_strings, add_strings| {
-                    insert_strings_diff(
-                        &mut fid_docid_facet_strings_sorter,
-                        &mut strings_key_buffer,
-                        del_strings,
-                        add_strings,
-                    )
-                };
-
-                match (del_filterable_values, add_filterable_values) {
-                    (None, None) => (),
-                    (Some(del_filterable_values), None) => match del_filterable_values {
-                        Null => {
-                            del_is_null.insert(document);
-                        }
-                        Empty => {
-                            del_is_empty.insert(document);
-                        }
-                        Values { numbers, strings } => {
-                            insert_numbers_diff(numbers, vec![])?;
-                            insert_strings_diff(strings, vec![])?;
-                        }
-                    },
-                    (None, Some(add_filterable_values)) => match add_filterable_values {
-                        Null => {
-                            add_is_null.insert(document);
-                        }
-                        Empty => {
-                            add_is_empty.insert(document);
-                        }
-                        Values { numbers, strings } => {
-                            insert_numbers_diff(vec![], numbers)?;
-                            insert_strings_diff(vec![], strings)?;
-                        }
-                    },
-                    (Some(del_filterable_values), Some(add_filterable_values)) => {
-                        match (del_filterable_values, add_filterable_values) {
-                            (Null, Null) | (Empty, Empty) => (),
-                            (Null, Empty) => {
-                                del_is_null.insert(document);
-                                add_is_empty.insert(document);
-                            }
-                            (Empty, Null) => {
-                                del_is_empty.insert(document);
-                                add_is_null.insert(document);
-                            }
-                            (Null, Values { numbers, strings }) => {
-                                insert_numbers_diff(vec![], numbers)?;
-                                insert_strings_diff(vec![], strings)?;
-                                del_is_null.insert(document);
-                            }
-                            (Empty, Values { numbers, strings }) => {
-                                insert_numbers_diff(vec![], numbers)?;
-                                insert_strings_diff(vec![], strings)?;
-                                del_is_empty.insert(document);
-                            }
-                            (Values { numbers, strings }, Null) => {
-                                add_is_null.insert(document);
-                                insert_numbers_diff(numbers, vec![])?;
-                                insert_strings_diff(strings, vec![])?;
-                            }
-                            (Values { numbers, strings }, Empty) => {
-                                add_is_empty.insert(document);
-                                insert_numbers_diff(numbers, vec![])?;
-                                insert_strings_diff(strings, vec![])?;
-                            }
-                            (
-                                Values { numbers: del_numbers, strings: del_strings },
-                                Values { numbers: add_numbers, strings: add_strings },
-                            ) => {
-                                insert_numbers_diff(del_numbers, add_numbers)?;
-                                insert_strings_diff(del_strings, add_strings)?;
-                            }
+                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
+                            key_buffer.extend_from_slice(normalized_truncated_value.as_bytes());
+                            fid_docid_facet_strings_sorter
+                                .insert(&key_buffer, original.as_bytes())?;
                        }
                    }
                }
@ -221,15 +130,14 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        }
    }

-    let mut buffer = Vec::new();
    let mut facet_exists_docids_writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, (del_bitmap, add_bitmap)) in facet_exists_docids.into_iter() {
-        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
-        facet_exists_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
+    for (fid, bitmap) in facet_exists_docids.into_iter() {
+        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
+        facet_exists_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
    }
    let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;

@ -238,9 +146,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, (del_bitmap, add_bitmap)) in facet_is_null_docids.into_iter() {
-        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
-        facet_is_null_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
+    for (fid, bitmap) in facet_is_null_docids.into_iter() {
+        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
+        facet_is_null_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
    }
    let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;

@ -249,156 +157,21 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, (del_bitmap, add_bitmap)) in facet_is_empty_docids.into_iter() {
-        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
-        facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
+    for (fid, bitmap) in facet_is_empty_docids.into_iter() {
+        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
+        facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
    }
    let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?;

    Ok(ExtractedFacetValues {
-        fid_docid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
-        fid_docid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
+        docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
+        docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
        fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
        fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader,
        fid_facet_exists_docids_chunk: facet_exists_docids_reader,
    })
 }

-/// Generates a vector of bytes containing a DelAdd obkv with two bitmaps.
-fn deladd_obkv_cbo_roaring_bitmaps(
-    buffer: &mut Vec<u8>,
-    del_bitmap: &RoaringBitmap,
-    add_bitmap: &RoaringBitmap,
-) -> io::Result<()> {
-    buffer.clear();
-    let mut obkv = KvWriterDelAdd::new(buffer);
-    let del_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
-    let add_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
-    obkv.insert(DelAdd::Deletion, del_bitmap_bytes)?;
-    obkv.insert(DelAdd::Addition, add_bitmap_bytes)?;
-    obkv.finish()
-}
-
-/// Truncates a string to the biggest valid LMDB key size.
-fn truncate_string(s: String) -> String {
-    s.char_indices()
-        .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
-        .map(|(_, c)| c)
-        .collect()
-}
-
-/// Computes the diff between both Del and Add numbers and
-/// only inserts the parts that differ in the sorter.
-fn insert_numbers_diff<MF>(
-    fid_docid_facet_numbers_sorter: &mut Sorter<MF>,
-    key_buffer: &mut Vec<u8>,
-    mut del_numbers: Vec<f64>,
-    mut add_numbers: Vec<f64>,
-) -> Result<()>
-where
-    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
-{
-    // We sort and dedup the float numbers
-    del_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
-    add_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
-    del_numbers.dedup_by_key(|f| OrderedFloat(*f));
-    add_numbers.dedup_by_key(|f| OrderedFloat(*f));
-
-    let merged_numbers_iter = itertools::merge_join_by(
-        del_numbers.into_iter().map(OrderedFloat),
-        add_numbers.into_iter().map(OrderedFloat),
-        |del, add| del.cmp(add),
-    );
-
-    // insert facet numbers in sorter
-    for eob in merged_numbers_iter {
-        key_buffer.truncate(TRUNCATE_SIZE);
-        match eob {
-            EitherOrBoth::Both(_, _) => (), // no need to touch anything
-            EitherOrBoth::Left(OrderedFloat(number)) => {
-                if let Some(value_bytes) = f64_into_bytes(number) {
-                    key_buffer.extend_from_slice(&value_bytes);
-                    key_buffer.extend_from_slice(&number.to_be_bytes());
-
-                    // We insert only the Del part of the Obkv to inform
-                    // that we only want to remove all those numbers.
-                    let mut obkv = KvWriterDelAdd::memory();
-                    obkv.insert(DelAdd::Deletion, ().as_bytes())?;
-                    let bytes = obkv.into_inner()?;
-                    fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
-                }
-            }
-            EitherOrBoth::Right(OrderedFloat(number)) => {
-                if let Some(value_bytes) = f64_into_bytes(number) {
-                    key_buffer.extend_from_slice(&value_bytes);
-                    key_buffer.extend_from_slice(&number.to_be_bytes());
-
-                    // We insert only the Del part of the Obkv to inform
-                    // that we only want to remove all those numbers.
-                    let mut obkv = KvWriterDelAdd::memory();
-                    obkv.insert(DelAdd::Addition, ().as_bytes())?;
-                    let bytes = obkv.into_inner()?;
-                    fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-/// Computes the diff between both Del and Add strings and
-/// only inserts the parts that differ in the sorter.
-fn insert_strings_diff<MF>(
-    fid_docid_facet_strings_sorter: &mut Sorter<MF>,
-    key_buffer: &mut Vec<u8>,
-    mut del_strings: Vec<(String, String)>,
-    mut add_strings: Vec<(String, String)>,
-) -> Result<()>
-where
-    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
-{
-    // We sort and dedup the normalized and original strings
-    del_strings.sort_unstable();
-    add_strings.sort_unstable();
-    del_strings.dedup();
-    add_strings.dedup();
-
-    let merged_strings_iter = itertools::merge_join_by(
-        del_strings.into_iter().filter(|(n, _)| !n.is_empty()),
-        add_strings.into_iter().filter(|(n, _)| !n.is_empty()),
-        |del, add| del.cmp(add),
-    );
-
-    // insert normalized and original facet string in sorter
-    for eob in merged_strings_iter {
-        key_buffer.truncate(TRUNCATE_SIZE);
-        match eob {
-            EitherOrBoth::Both(_, _) => (), // no need to touch anything
-            EitherOrBoth::Left((normalized, original)) => {
-                let truncated = truncate_string(normalized);
-                key_buffer.extend_from_slice(truncated.as_bytes());
-
-                let mut obkv = KvWriterDelAdd::memory();
-                obkv.insert(DelAdd::Deletion, original)?;
-                let bytes = obkv.into_inner()?;
-                fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
-            }
-            EitherOrBoth::Right((normalized, original)) => {
-                let truncated = truncate_string(normalized);
-                key_buffer.extend_from_slice(truncated.as_bytes());
-
-                let mut obkv = KvWriterDelAdd::memory();
-                obkv.insert(DelAdd::Addition, original)?;
-                let bytes = obkv.into_inner()?;
-                fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
-            }
-        }
-    }
-
-    Ok(())
-}
-
 /// Represent what a document field contains.
 enum FilterableValues {
    /// Corresponds to the JSON `null` value.
@ -409,7 +182,6 @@ enum FilterableValues {
    Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 }

-/// Extracts the facet values of a JSON field.
 fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
    fn inner_extract_facet_values(
        value: &Value,
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@ -1,17 +1,16 @@
+use std::collections::HashMap;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

-use obkv::KvReaderU16;
+use grenad::Sorter;

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
-    GrenadParameters,
+    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
+    try_split_array_at, GrenadParameters, MergeFn,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::Result;
-
-const MAX_COUNTED_WORDS: usize = 30;
+use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};

 /// Extracts the field id word count and the documents ids where
 /// this field id with this amount of words appear.
@ -22,7 +21,7 @@ const MAX_COUNTED_WORDS: usize = 30;
 pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
@ -36,21 +35,63 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
        max_memory,
    );

-    let mut key_buffer = Vec::new();
+    // This map is assumed to not consume a lot of memory.
+    let mut document_fid_wordcount = HashMap::new();
+    let mut current_document_id = None;
+
    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, fid_bytes) = try_split_array_at(key)
+        let (document_id_bytes, _word_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let word_count = KvReaderU16::new(&value).iter().take(MAX_COUNTED_WORDS + 1).count();
-        if word_count <= MAX_COUNTED_WORDS {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(fid_bytes);
-            key_buffer.push(word_count as u8);
-            fid_word_count_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        let curr_document_id = *current_document_id.get_or_insert(document_id);
+        if curr_document_id != document_id {
+            drain_document_fid_wordcount_into_sorter(
+                &mut fid_word_count_docids_sorter,
+                &mut document_fid_wordcount,
+                curr_document_id,
+            )?;
+            current_document_id = Some(document_id);
        }
+
+        for position in read_u32_ne_bytes(value) {
+            let (field_id, _) = relative_from_absolute_position(position);
+
+            let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0);
+            *value += 1;
+        }
+    }
+
+    if let Some(document_id) = current_document_id {
+        // We must make sure that don't lose the current document field id
+        // word count map if we break because we reached the end of the chunk.
+        drain_document_fid_wordcount_into_sorter(
+            &mut fid_word_count_docids_sorter,
+            &mut document_fid_wordcount,
+            document_id,
+        )?;
    }

    sorter_into_reader(fid_word_count_docids_sorter, indexer)
 }
+
+fn drain_document_fid_wordcount_into_sorter(
+    fid_word_count_docids_sorter: &mut Sorter<MergeFn>,
+    document_fid_wordcount: &mut HashMap<FieldId, u32>,
+    document_id: DocumentId,
+) -> Result<()> {
+    let mut key_buffer = Vec::new();
+
+    for (fid, count) in document_fid_wordcount.drain() {
+        if count <= 30 {
+            key_buffer.clear();
+            key_buffer.extend_from_slice(&fid.to_be_bytes());
+            key_buffer.push(count as u8);
+
+            fid_word_count_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        }
+    }
+
+    Ok(())
+}
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use concat_arrays::concat_arrays;
 use serde_json::Value;
@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    primary_key_id: FieldId,
    (lat_fid, lng_fid): (FieldId, FieldId),
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let mut writer = create_writer(
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@ -1,6 +1,6 @@
 use std::convert::TryFrom;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use bytemuck::cast_slice;
 use serde_json::{from_slice, Value};
@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    primary_key_id: FieldId,
    vectors_fid: FieldId,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let mut writer = create_writer(
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -1,20 +1,18 @@
-use std::collections::{BTreeSet, HashSet};
+use std::collections::HashSet;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};
+use std::iter::FromIterator;

-use heed::BytesDecode;
-use obkv::KvReaderU16;
+use roaring::RoaringBitmap;

 use super::helpers::{
-    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader,
-    try_split_array_at, writer_into_reader, GrenadParameters,
+    create_sorter, merge_roaring_bitmaps, serialize_roaring_bitmap, sorter_into_reader,
+    try_split_array_at, GrenadParameters,
 };
 use crate::error::SerializationError;
-use crate::heed_codec::StrBEU16Codec;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWriterDelAdd};
-use crate::update::MergeFn;
-use crate::{DocumentId, FieldId, Result};
+use crate::update::index_documents::helpers::read_u32_ne_bytes;
+use crate::{relative_from_absolute_position, FieldId, Result};

 /// Extracts the word and the documents ids where this word appear.
 ///
@ -28,148 +26,65 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
    exact_attributes: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> {
+) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

-    let mut word_fid_docids_sorter = create_sorter(
-        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        indexer.max_nb_chunks,
-        max_memory.map(|x| x / 3),
-    );
-    let mut key_buffer = Vec::new();
-    let mut del_words = BTreeSet::new();
-    let mut add_words = BTreeSet::new();
-    let mut cursor = docid_word_positions.into_cursor()?;
-    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, fid_bytes) = try_split_array_at(key)
-            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
-        let (fid_bytes, _) = try_split_array_at(fid_bytes)
-            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
-        let document_id = u32::from_be_bytes(document_id_bytes);
-        let fid = u16::from_be_bytes(fid_bytes);
-
-        let del_add_reader = KvReaderDelAdd::new(&value);
-        // extract all unique words to remove.
-        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
-            for (_pos, word) in KvReaderU16::new(&deletion).iter() {
-                del_words.insert(word.to_vec());
-            }
-        }
-
-        // extract all unique additional words.
-        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-            for (_pos, word) in KvReaderU16::new(&addition).iter() {
-                add_words.insert(word.to_vec());
-            }
-        }
-
-        words_into_sorter(
-            document_id,
-            fid,
-            &mut key_buffer,
-            &del_words,
-            &add_words,
-            &mut word_fid_docids_sorter,
-        )?;
-
-        del_words.clear();
-        add_words.clear();
-    }
-
    let mut word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        merge_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
-        max_memory.map(|x| x / 3),
+        max_memory.map(|x| x / 2),
    );

    let mut exact_word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        merge_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
-        max_memory.map(|x| x / 3),
+        max_memory.map(|x| x / 2),
    );

-    let mut word_fid_docids_writer = create_writer(
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        tempfile::tempfile()?,
-    );
-
-    let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
-    // TODO: replace sorters by writers by accumulating values into a buffer before inserting them.
-    while let Some((key, value)) = iter.next()? {
-        // only keep the value if their is a change to apply in the DB.
-        if !is_noop_del_add_obkv(KvReaderDelAdd::new(value)) {
-            word_fid_docids_writer.insert(key, value)?;
-        }
-
-        let (word, fid) = StrBEU16Codec::bytes_decode(key)
+    let mut value_buffer = Vec::new();
+    let mut cursor = docid_word_positions.into_cursor()?;
+    while let Some((key, positions)) = cursor.move_on_next()? {
+        let (document_id_bytes, word_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+        let document_id = u32::from_be_bytes(document_id_bytes);

-        // every words contained in an attribute set to exact must be pushed in the exact_words list.
-        if exact_attributes.contains(&fid) {
-            exact_word_docids_sorter.insert(word.as_bytes(), &value)?;
+        let bitmap = RoaringBitmap::from_iter(Some(document_id));
+        serialize_roaring_bitmap(&bitmap, &mut value_buffer)?;
+
+        // If there are no exact attributes, we do not need to iterate over positions.
+        if exact_attributes.is_empty() {
+            word_docids_sorter.insert(word_bytes, &value_buffer)?;
        } else {
-            word_docids_sorter.insert(word.as_bytes(), &value)?;
+            let mut added_to_exact = false;
+            let mut added_to_word_docids = false;
+            for position in read_u32_ne_bytes(positions) {
+                // as soon as we know that this word had been to both readers, we don't need to
+                // iterate over the positions.
+                if added_to_exact && added_to_word_docids {
+                    break;
+                }
+                let (fid, _) = relative_from_absolute_position(position);
+                if exact_attributes.contains(&fid) && !added_to_exact {
+                    exact_word_docids_sorter.insert(word_bytes, &value_buffer)?;
+                    added_to_exact = true;
+                } else if !added_to_word_docids {
+                    word_docids_sorter.insert(word_bytes, &value_buffer)?;
+                    added_to_word_docids = true;
+                }
+            }
        }
    }

    Ok((
        sorter_into_reader(word_docids_sorter, indexer)?,
        sorter_into_reader(exact_word_docids_sorter, indexer)?,
-        writer_into_reader(word_fid_docids_writer)?,
    ))
 }
-
-fn words_into_sorter(
-    document_id: DocumentId,
-    fid: FieldId,
-    key_buffer: &mut Vec<u8>,
-    del_words: &BTreeSet<Vec<u8>>,
-    add_words: &BTreeSet<Vec<u8>>,
-    word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
-) -> Result<()> {
-    puffin::profile_function!();
-
-    use itertools::merge_join_by;
-    use itertools::EitherOrBoth::{Both, Left, Right};
-
-    let mut buffer = Vec::new();
-    for eob in merge_join_by(del_words.iter(), add_words.iter(), |d, a| d.cmp(a)) {
-        buffer.clear();
-        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
-        let word_bytes = match eob {
-            Left(word_bytes) => {
-                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
-                word_bytes
-            }
-            Right(word_bytes) => {
-                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
-                word_bytes
-            }
-            Both(word_bytes, _) => {
-                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
-                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
-                word_bytes
-            }
-        };
-
-        key_buffer.clear();
-        key_buffer.extend_from_slice(&word_bytes);
-        key_buffer.push(0);
-        key_buffer.extend_from_slice(&fid.to_be_bytes());
-        word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
-    }
-
-    Ok(())
-}
--- a/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
@ -0,0 +1,51 @@
+use std::fs::File;
+use std::io::{self, BufReader};
+
+use super::helpers::{
+    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
+    try_split_array_at, GrenadParameters,
+};
+use crate::error::SerializationError;
+use crate::index::db_name::DOCID_WORD_POSITIONS;
+use crate::{relative_from_absolute_position, DocumentId, Result};
+
+/// Extracts the word, field id, and the documents ids where this word appear at this field id.
+#[logging_timer::time]
+pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
+    docid_word_positions: grenad::Reader<R>,
+    indexer: GrenadParameters,
+) -> Result<grenad::Reader<BufReader<File>>> {
+    puffin::profile_function!();
+
+    let max_memory = indexer.max_memory_by_thread();
+
+    let mut word_fid_docids_sorter = create_sorter(
+        grenad::SortAlgorithm::Unstable,
+        merge_cbo_roaring_bitmaps,
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        indexer.max_nb_chunks,
+        max_memory,
+    );
+
+    let mut key_buffer = Vec::new();
+    let mut cursor = docid_word_positions.into_cursor()?;
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let (document_id_bytes, word_bytes) = try_split_array_at(key)
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+        let document_id = DocumentId::from_be_bytes(document_id_bytes);
+
+        for position in read_u32_ne_bytes(value) {
+            key_buffer.clear();
+            key_buffer.extend_from_slice(word_bytes);
+            key_buffer.push(0);
+            let (fid, _) = relative_from_absolute_position(position);
+            key_buffer.extend_from_slice(&fid.to_be_bytes());
+            word_fid_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        }
+    }
+
+    let word_fid_docids_reader = sorter_into_reader(word_fid_docids_sorter, indexer)?;
+
+    Ok(word_fid_docids_reader)
+}
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@ -1,17 +1,16 @@
-use std::collections::{BTreeMap, VecDeque};
+use std::cmp::Ordering;
+use std::collections::{BinaryHeap, HashMap};
 use std::fs::File;
-use std::{cmp, io};
-
-use obkv::KvReaderU16;
+use std::io::BufReader;
+use std::{cmp, io, mem, str, vec};

 use super::helpers::{
-    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
-    writer_into_reader, GrenadParameters, MergeFn,
+    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
+    try_split_array_at, GrenadParameters, MergeFn,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::proximity::{index_proximity, MAX_DISTANCE};
-use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::proximity::{positions_proximity, MAX_DISTANCE};
 use crate::{DocumentId, Result};

 /// Extracts the best proximity between pairs of words and the documents ids where this pair appear.
@ -22,143 +21,63 @@ use crate::{DocumentId, Result};
 pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

-    let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
-        .into_iter()
-        .map(|_| {
-            create_sorter(
-                grenad::SortAlgorithm::Unstable,
-                merge_deladd_cbo_roaring_bitmaps,
-                indexer.chunk_compression_type,
-                indexer.chunk_compression_level,
-                indexer.max_nb_chunks,
-                max_memory.map(|m| m / MAX_DISTANCE as usize),
-            )
-        })
-        .collect();
+    let mut word_pair_proximity_docids_sorter = create_sorter(
+        grenad::SortAlgorithm::Unstable,
+        merge_cbo_roaring_bitmaps,
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        indexer.max_nb_chunks,
+        max_memory.map(|m| m / 2),
+    );

-    let mut del_word_positions: VecDeque<(String, u16)> =
-        VecDeque::with_capacity(MAX_DISTANCE as usize);
-    let mut add_word_positions: VecDeque<(String, u16)> =
-        VecDeque::with_capacity(MAX_DISTANCE as usize);
-    let mut del_word_pair_proximity = BTreeMap::new();
-    let mut add_word_pair_proximity = BTreeMap::new();
+    // This map is assumed to not consume a lot of memory.
+    let mut document_word_positions_heap = BinaryHeap::new();
    let mut current_document_id = None;

    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, _fid_bytes) = try_split_array_at(key)
+        let (document_id_bytes, word_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);
+        let word = str::from_utf8(word_bytes)?;

-        // if we change document, we fill the sorter
-        if current_document_id.map_or(false, |id| id != document_id) {
-            puffin::profile_scope!("Document into sorter");
-
+        let curr_document_id = *current_document_id.get_or_insert(document_id);
+        if curr_document_id != document_id {
+            let document_word_positions_heap = mem::take(&mut document_word_positions_heap);
            document_word_positions_into_sorter(
-                current_document_id.unwrap(),
-                &del_word_pair_proximity,
-                &add_word_pair_proximity,
-                &mut word_pair_proximity_docids_sorters,
+                curr_document_id,
+                document_word_positions_heap,
+                &mut word_pair_proximity_docids_sorter,
            )?;
-            del_word_pair_proximity.clear();
-            add_word_pair_proximity.clear();
+            current_document_id = Some(document_id);
        }

-        current_document_id = Some(document_id);
-
-        let (del, add): (Result<_>, Result<_>) = rayon::join(
-            || {
-                // deletions
-                if let Some(deletion) = KvReaderDelAdd::new(&value).get(DelAdd::Deletion) {
-                    for (position, word) in KvReaderU16::new(deletion).iter() {
-                        // drain the proximity window until the head word is considered close to the word we are inserting.
-                        while del_word_positions.get(0).map_or(false, |(_w, p)| {
-                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
-                        }) {
-                            word_positions_into_word_pair_proximity(
-                                &mut del_word_positions,
-                                &mut del_word_pair_proximity,
-                            )?;
-                        }
-
-                        // insert the new word.
-                        let word = std::str::from_utf8(word)?;
-                        del_word_positions.push_back((word.to_string(), position));
-                    }
-
-                    while !del_word_positions.is_empty() {
-                        word_positions_into_word_pair_proximity(
-                            &mut del_word_positions,
-                            &mut del_word_pair_proximity,
-                        )?;
-                    }
-                }
-
-                Ok(())
-            },
-            || {
-                // additions
-                if let Some(addition) = KvReaderDelAdd::new(&value).get(DelAdd::Addition) {
-                    for (position, word) in KvReaderU16::new(addition).iter() {
-                        // drain the proximity window until the head word is considered close to the word we are inserting.
-                        while add_word_positions.get(0).map_or(false, |(_w, p)| {
-                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
-                        }) {
-                            word_positions_into_word_pair_proximity(
-                                &mut add_word_positions,
-                                &mut add_word_pair_proximity,
-                            )?;
-                        }
-
-                        // insert the new word.
-                        let word = std::str::from_utf8(word)?;
-                        add_word_positions.push_back((word.to_string(), position));
-                    }
-
-                    while !add_word_positions.is_empty() {
-                        word_positions_into_word_pair_proximity(
-                            &mut add_word_positions,
-                            &mut add_word_pair_proximity,
-                        )?;
-                    }
-                }
-
-                Ok(())
-            },
-        );
-
-        del?;
-        add?;
+        let word = word.to_string();
+        let mut positions: Vec<_> = read_u32_ne_bytes(value).collect();
+        positions.sort_unstable();
+        let mut iter = positions.into_iter();
+        if let Some(position) = iter.next() {
+            document_word_positions_heap.push(PeekedWordPosition { word, position, iter });
+        }
    }

    if let Some(document_id) = current_document_id {
-        puffin::profile_scope!("Final document into sorter");
+        // We must make sure that don't lose the current document field id
+        // word count map if we break because we reached the end of the chunk.
+        let document_word_positions_heap = mem::take(&mut document_word_positions_heap);
        document_word_positions_into_sorter(
            document_id,
-            &del_word_pair_proximity,
-            &add_word_pair_proximity,
-            &mut word_pair_proximity_docids_sorters,
+            document_word_positions_heap,
+            &mut word_pair_proximity_docids_sorter,
        )?;
    }
-    {
-        puffin::profile_scope!("sorter_into_reader");
-        let mut writer = create_writer(
-            indexer.chunk_compression_type,
-            indexer.chunk_compression_level,
-            tempfile::tempfile()?,
-        );

-        for sorter in word_pair_proximity_docids_sorters {
-            sorter.write_into_stream_writer(&mut writer)?;
-        }
-
-        writer_into_reader(writer)
-    }
+    sorter_into_reader(word_pair_proximity_docids_sorter, indexer)
 }

 /// Fills the list of all pairs of words with the shortest proximity between 1 and 7 inclusive.
@ -167,66 +86,96 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
 /// close to each other.
 fn document_word_positions_into_sorter(
    document_id: DocumentId,
-    del_word_pair_proximity: &BTreeMap<(String, String), u8>,
-    add_word_pair_proximity: &BTreeMap<(String, String), u8>,
-    word_pair_proximity_docids_sorters: &mut Vec<grenad::Sorter<MergeFn>>,
+    mut word_positions_heap: BinaryHeap<PeekedWordPosition<vec::IntoIter<u32>>>,
+    word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>,
 ) -> Result<()> {
-    use itertools::merge_join_by;
-    use itertools::EitherOrBoth::{Both, Left, Right};
+    let mut word_pair_proximity = HashMap::new();
+    let mut ordered_peeked_word_positions = Vec::new();
+    while !word_positions_heap.is_empty() {
+        while let Some(peeked_word_position) = word_positions_heap.pop() {
+            ordered_peeked_word_positions.push(peeked_word_position);
+            if ordered_peeked_word_positions.len() == 7 {
+                break;
+            }
+        }
+
+        if let Some((head, tail)) = ordered_peeked_word_positions.split_first() {
+            for PeekedWordPosition { word, position, .. } in tail {
+                let prox = positions_proximity(head.position, *position);
+                if prox > 0 && prox < MAX_DISTANCE {
+                    word_pair_proximity
+                        .entry((head.word.clone(), word.clone()))
+                        .and_modify(|p| {
+                            *p = cmp::min(*p, prox);
+                        })
+                        .or_insert(prox);
+                }
+            }
+
+            // Push the tail in the heap.
+            let tail_iter = ordered_peeked_word_positions.drain(1..);
+            word_positions_heap.extend(tail_iter);
+
+            // Advance the head and push it in the heap.
+            if let Some(mut head) = ordered_peeked_word_positions.pop() {
+                if let Some(next_position) = head.iter.next() {
+                    let prox = positions_proximity(head.position, next_position);
+
+                    if prox > 0 && prox < MAX_DISTANCE {
+                        word_pair_proximity
+                            .entry((head.word.clone(), head.word.clone()))
+                            .and_modify(|p| {
+                                *p = cmp::min(*p, prox);
+                            })
+                            .or_insert(prox);
+                    }
+
+                    word_positions_heap.push(PeekedWordPosition {
+                        word: head.word,
+                        position: next_position,
+                        iter: head.iter,
+                    });
+                }
+            }
+        }
+    }

-    let mut buffer = Vec::new();
    let mut key_buffer = Vec::new();
-    for eob in
-        merge_join_by(del_word_pair_proximity.iter(), add_word_pair_proximity.iter(), |d, a| {
-            d.cmp(a)
-        })
-    {
-        buffer.clear();
-        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
-        let ((w1, w2), prox) = match eob {
-            Left(key_value) => {
-                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
-                key_value
-            }
-            Right(key_value) => {
-                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
-                key_value
-            }
-            Both(key_value, _) => {
-                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
-                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
-                key_value
-            }
-        };
-
+    for ((w1, w2), prox) in word_pair_proximity {
        key_buffer.clear();
-        key_buffer.push(*prox as u8);
+        key_buffer.push(prox as u8);
        key_buffer.extend_from_slice(w1.as_bytes());
        key_buffer.push(0);
        key_buffer.extend_from_slice(w2.as_bytes());

-        word_pair_proximity_docids_sorters[*prox as usize - 1]
-            .insert(&key_buffer, value_writer.into_inner().unwrap())?;
+        word_pair_proximity_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
    }

    Ok(())
 }

-fn word_positions_into_word_pair_proximity(
-    word_positions: &mut VecDeque<(String, u16)>,
-    word_pair_proximity: &mut BTreeMap<(String, String), u8>,
-) -> Result<()> {
-    let (head_word, head_position) = word_positions.pop_front().unwrap();
-    for (word, position) in word_positions.iter() {
-        let prox = index_proximity(head_position as u32, *position as u32) as u8;
-        if prox > 0 && prox < MAX_DISTANCE as u8 {
-            word_pair_proximity
-                .entry((head_word.clone(), word.clone()))
-                .and_modify(|p| {
-                    *p = cmp::min(*p, prox);
-                })
-                .or_insert(prox);
-        }
-    }
-    Ok(())
+struct PeekedWordPosition<I> {
+    word: String,
+    position: u32,
+    iter: I,
+}
+
+impl<I> Ord for PeekedWordPosition<I> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.position.cmp(&other.position).reverse()
+    }
+}
+
+impl<I> PartialOrd for PeekedWordPosition<I> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<I> Eq for PeekedWordPosition<I> {}
+
+impl<I> PartialEq for PeekedWordPosition<I> {
+    fn eq(&self, other: &Self) -> bool {
+        self.position == other.position
+    }
 }
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@ -1,18 +1,13 @@
-use std::collections::BTreeSet;
 use std::fs::File;
-use std::io;
-
-use obkv::KvReaderU16;
+use std::io::{self, BufReader};

 use super::helpers::{
-    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
-    GrenadParameters,
+    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
+    try_split_array_at, GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
-use crate::update::MergeFn;
-use crate::{bucketed_position, DocumentId, Result};
+use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Result};

 /// Extracts the word positions and the documents ids where this word appear.
 ///
@ -22,117 +17,39 @@ use crate::{bucketed_position, DocumentId, Result};
 pub fn extract_word_position_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

    let mut word_position_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_deladd_cbo_roaring_bitmaps,
+        merge_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
-    let mut add_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
-    let mut current_document_id: Option<u32> = None;
    let mut key_buffer = Vec::new();
    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, _fid_bytes) = try_split_array_at(key)
+        let (document_id_bytes, word_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = DocumentId::from_be_bytes(document_id_bytes);

-        if current_document_id.map_or(false, |id| document_id != id) {
-            words_position_into_sorter(
-                current_document_id.unwrap(),
-                &mut key_buffer,
-                &del_word_positions,
-                &add_word_positions,
-                &mut word_position_docids_sorter,
-            )?;
-            del_word_positions.clear();
-            add_word_positions.clear();
-        }
-
-        current_document_id = Some(document_id);
-
-        let del_add_reader = KvReaderDelAdd::new(&value);
-        // extract all unique words to remove.
-        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
-            for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
-                let position = bucketed_position(position);
-                del_word_positions.insert((position, word_bytes.to_vec()));
-            }
-        }
-
-        // extract all unique additional words.
-        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-            for (position, word_bytes) in KvReaderU16::new(addition).iter() {
-                let position = bucketed_position(position);
-                add_word_positions.insert((position, word_bytes.to_vec()));
-            }
+        for position in read_u32_ne_bytes(value) {
+            key_buffer.clear();
+            key_buffer.extend_from_slice(word_bytes);
+            key_buffer.push(0);
+            let (_, position) = relative_from_absolute_position(position);
+            let position = bucketed_position(position);
+            key_buffer.extend_from_slice(&position.to_be_bytes());
+            word_position_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
        }
    }

-    if let Some(document_id) = current_document_id {
-        words_position_into_sorter(
-            document_id,
-            &mut key_buffer,
-            &del_word_positions,
-            &add_word_positions,
-            &mut word_position_docids_sorter,
-        )?;
-    }
-
-    // TODO remove noop DelAdd OBKV
    let word_position_docids_reader = sorter_into_reader(word_position_docids_sorter, indexer)?;

    Ok(word_position_docids_reader)
 }
-
-fn words_position_into_sorter(
-    document_id: DocumentId,
-    key_buffer: &mut Vec<u8>,
-    del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
-    add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
-    word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
-) -> Result<()> {
-    puffin::profile_function!();
-
-    use itertools::merge_join_by;
-    use itertools::EitherOrBoth::{Both, Left, Right};
-
-    let mut buffer = Vec::new();
-    for eob in merge_join_by(del_word_positions.iter(), add_word_positions.iter(), |d, a| d.cmp(a))
-    {
-        buffer.clear();
-        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
-        let (position, word_bytes) = match eob {
-            Left(key) => {
-                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
-                key
-            }
-            Right(key) => {
-                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
-                key
-            }
-            Both(key, _) => {
-                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
-                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
-                key
-            }
-        };
-
-        key_buffer.clear();
-        key_buffer.extend_from_slice(word_bytes);
-        key_buffer.push(0);
-        key_buffer.extend_from_slice(&position.to_be_bytes());
-        word_position_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
-    }
-
-    Ok(())
-}
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -6,11 +6,13 @@ mod extract_fid_word_count_docids;
 mod extract_geo_points;
 mod extract_vector_points;
 mod extract_word_docids;
+mod extract_word_fid_docids;
 mod extract_word_pair_proximity_docids;
 mod extract_word_position_docids;

 use std::collections::HashSet;
 use std::fs::File;
+use std::io::BufReader;

 use crossbeam_channel::Sender;
 use log::debug;
@ -24,11 +26,12 @@ use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
 use self::extract_geo_points::extract_geo_points;
 use self::extract_vector_points::extract_vector_points;
 use self::extract_word_docids::extract_word_docids;
+use self::extract_word_fid_docids::extract_word_fid_docids;
 use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
 use self::extract_word_position_docids::extract_word_position_docids;
 use super::helpers::{
-    as_cloneable_grenad, merge_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
-    MergeableReader,
+    as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
+    GrenadParameters, MergeFn, MergeableReader,
 };
 use super::{helpers, TypedChunk};
 use crate::{FieldId, Result};
@ -37,8 +40,8 @@ use crate::{FieldId, Result};
 /// Send data in grenad file over provided Sender.
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn data_from_obkv_documents(
-    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
-    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
+    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
+    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: Option<HashSet<FieldId>>,
@ -91,9 +94,9 @@ pub(crate) fn data_from_obkv_documents(
    let (
        docid_word_positions_chunks,
        (
-            fid_docid_facet_numbers_chunks,
+            docid_fid_facet_numbers_chunks,
            (
-                fid_docid_facet_strings_chunks,
+                docid_fid_facet_strings_chunks,
                (
                    facet_is_null_docids_chunks,
                    (facet_is_empty_docids_chunks, facet_exists_docids_chunks),
@ -150,7 +153,7 @@ pub(crate) fn data_from_obkv_documents(
        });
    }

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@ -160,7 +163,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-pair-proximity-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@ -173,24 +176,21 @@ pub(crate) fn data_from_obkv_documents(
    spawn_extraction_task::<
        _,
        _,
-        Vec<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)>,
+        Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
    >(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
        move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
-        merge_cbo_roaring_bitmaps,
-        |(word_docids_reader, exact_word_docids_reader, word_fid_docids_reader)| {
-            TypedChunk::WordDocids {
-                word_docids_reader,
-                exact_word_docids_reader,
-                word_fid_docids_reader,
-            }
+        merge_roaring_bitmaps,
+        |(word_docids_reader, exact_word_docids_reader)| TypedChunk::WordDocids {
+            word_docids_reader,
+            exact_word_docids_reader,
        },
        "word-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@ -199,9 +199,18 @@ pub(crate) fn data_from_obkv_documents(
        TypedChunk::WordPositionDocids,
        "word-position-docids",
    );
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+        docid_word_positions_chunks,
+        indexer,
+        lmdb_writer_sx.clone(),
+        extract_word_fid_docids,
+        merge_cbo_roaring_bitmaps,
+        TypedChunk::WordFidDocids,
+        "word-fid-docids",
+    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
-        fid_docid_facet_strings_chunks,
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+        docid_fid_facet_strings_chunks,
        indexer,
        lmdb_writer_sx.clone(),
        extract_facet_string_docids,
@ -210,8 +219,8 @@ pub(crate) fn data_from_obkv_documents(
        "field-id-facet-string-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
-        fid_docid_facet_numbers_chunks,
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+        docid_fid_facet_numbers_chunks,
        indexer,
        lmdb_writer_sx,
        extract_facet_number_docids,
@ -265,7 +274,7 @@ fn spawn_extraction_task<FE, FS, M>(
 /// Extract chunked data and send it into lmdb_writer_sx sender:
 /// - documents
 fn send_original_documents_data(
-    original_documents_chunk: Result<grenad::Reader<File>>,
+    original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    vectors_field_id: Option<FieldId>,
@ -307,7 +316,7 @@ fn send_original_documents_data(
 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
 fn send_and_extract_flattened_documents_data(
-    flattened_documents_chunk: Result<grenad::Reader<File>>,
+    flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: &Option<HashSet<FieldId>>,
@ -324,7 +333,10 @@ fn send_and_extract_flattened_documents_data(
        grenad::Reader<CursorClonableMmap>,
        (
            grenad::Reader<CursorClonableMmap>,
-            (grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
+            (
+                grenad::Reader<BufReader<File>>,
+                (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
+            ),
        ),
    ),
 )> {
@ -344,7 +356,7 @@ fn send_and_extract_flattened_documents_data(
        });
    }

-    let (docid_word_positions_chunk, fid_docid_facet_values_chunks): (Result<_>, Result<_>) =
+    let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
                let (documents_ids, docid_word_positions_chunk, script_language_pair) =
@ -372,8 +384,8 @@ fn send_and_extract_flattened_documents_data(
            },
            || {
                let ExtractedFacetValues {
-                    fid_docid_facet_numbers_chunk,
-                    fid_docid_facet_strings_chunk,
+                    docid_fid_facet_numbers_chunk,
+                    docid_fid_facet_strings_chunk,
                    fid_facet_is_null_docids_chunk,
                    fid_facet_is_empty_docids_chunk,
                    fid_facet_exists_docids_chunk,
@ -384,26 +396,26 @@ fn send_and_extract_flattened_documents_data(
                    geo_fields_ids,
                )?;

-                // send fid_docid_facet_numbers_chunk to DB writer
-                let fid_docid_facet_numbers_chunk =
-                    unsafe { as_cloneable_grenad(&fid_docid_facet_numbers_chunk)? };
+                // send docid_fid_facet_numbers_chunk to DB writer
+                let docid_fid_facet_numbers_chunk =
+                    unsafe { as_cloneable_grenad(&docid_fid_facet_numbers_chunk)? };

                let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers(
-                    fid_docid_facet_numbers_chunk.clone(),
+                    docid_fid_facet_numbers_chunk.clone(),
                )));

-                // send fid_docid_facet_strings_chunk to DB writer
-                let fid_docid_facet_strings_chunk =
-                    unsafe { as_cloneable_grenad(&fid_docid_facet_strings_chunk)? };
+                // send docid_fid_facet_strings_chunk to DB writer
+                let docid_fid_facet_strings_chunk =
+                    unsafe { as_cloneable_grenad(&docid_fid_facet_strings_chunk)? };

                let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings(
-                    fid_docid_facet_strings_chunk.clone(),
+                    docid_fid_facet_strings_chunk.clone(),
                )));

                Ok((
-                    fid_docid_facet_numbers_chunk,
+                    docid_fid_facet_numbers_chunk,
                    (
-                        fid_docid_facet_strings_chunk,
+                        docid_fid_facet_strings_chunk,
                        (
                            fid_facet_is_null_docids_chunk,
                            (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
@ -413,5 +425,5 @@ fn send_and_extract_flattened_documents_data(
            },
        );

-    Ok((docid_word_positions_chunk?, fid_docid_facet_values_chunks?))
+    Ok((docid_word_positions_chunk?, docid_fid_facet_values_chunks?))
 }
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::fs::File;
-use std::io::{self, Seek};
+use std::io::{self, BufReader, BufWriter, Seek};
 use std::time::Instant;

 use grenad::{CompressionType, Sorter};
@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
    typ: grenad::CompressionType,
    level: Option<u32>,
    file: R,
-) -> grenad::Writer<R> {
+) -> grenad::Writer<BufWriter<R>> {
    let mut builder = grenad::Writer::builder();
    builder.compression_type(typ);
    if let Some(level) = level {
        builder.compression_level(level);
    }
-    builder.build(file)
+    builder.build(BufWriter::new(file))
 }

 pub fn create_sorter(
@ -53,8 +53,7 @@ pub fn create_sorter(
 pub fn sorter_into_reader(
    sorter: grenad::Sorter<MergeFn>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
-    puffin::profile_function!();
+) -> Result<grenad::Reader<BufReader<File>>> {
    let mut writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
@ -65,16 +64,18 @@ pub fn sorter_into_reader(
    writer_into_reader(writer)
 }

-pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
-    let mut file = writer.into_inner()?;
+pub fn writer_into_reader(
+    writer: grenad::Writer<BufWriter<File>>,
+) -> Result<grenad::Reader<BufReader<File>>> {
+    let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
    file.rewind()?;
-    grenad::Reader::new(file).map_err(Into::into)
+    grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
 }

 pub unsafe fn as_cloneable_grenad(
-    reader: &grenad::Reader<File>,
+    reader: &grenad::Reader<BufReader<File>>,
 ) -> Result<grenad::Reader<CursorClonableMmap>> {
-    let file = reader.get_ref();
+    let file = reader.get_ref().get_ref();
    let mmap = memmap2::Mmap::map(file)?;
    let cursor = io::Cursor::new(ClonableMmap::from(mmap));
    let reader = grenad::Reader::new(cursor)?;
@ -90,8 +91,8 @@ where
    fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
 }

-impl MergeableReader for Vec<grenad::Reader<File>> {
-    type Output = grenad::Reader<File>;
+impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
+    type Output = grenad::Reader<BufReader<File>>;

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut merger = MergerBuilder::new(merge_fn);
@ -100,8 +101,8 @@ impl MergeableReader for Vec<grenad::Reader<File>> {
    }
 }

-impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
-    type Output = (grenad::Reader<File>, grenad::Reader<File>);
+impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
+    type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut m1 = MergerBuilder::new(merge_fn);
@ -114,22 +115,6 @@ impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
    }
 }

-impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> {
-    type Output = (grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>);
-
-    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
-        let mut m1 = MergerBuilder::new(merge_fn);
-        let mut m2 = MergerBuilder::new(merge_fn);
-        let mut m3 = MergerBuilder::new(merge_fn);
-        for (r1, r2, r3) in self.into_iter() {
-            m1.push(r1)?;
-            m2.push(r2)?;
-            m3.push(r3)?;
-        }
-        Ok((m1.finish(params)?, m2.finish(params)?, m3.finish(params)?))
-    }
-}
-
 struct MergerBuilder<R>(grenad::MergerBuilder<R, MergeFn>);

 impl<R: io::Read + io::Seek> MergerBuilder<R> {
@ -142,7 +127,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
        Ok(())
    }

-    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
+    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
        let merger = self.0.build();
        let mut writer = create_writer(
            params.chunk_compression_type,
@ -193,7 +178,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
    reader: grenad::Reader<R>,
    indexer: GrenadParameters,
    documents_chunk_size: usize,
-) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
+) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
    let mut continue_reading = true;
    let mut cursor = reader.into_cursor()?;

--- a/milli/src/update/index_documents/helpers/merge_functions.rs
+++ b/milli/src/update/index_documents/helpers/merge_functions.rs
@ -6,13 +6,11 @@ use std::result::Result as StdResult;
 use roaring::RoaringBitmap;

 use crate::heed_codec::CboRoaringBitmapCodec;
-use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::transform::Operation;
 use crate::Result;

 pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;

-#[allow(unused)]
 pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
    if values.len() == 1 {
        Ok(values[0].clone())
@ -77,123 +75,57 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
    Ok(obkvs.last().unwrap().clone())
 }

-pub fn merge_two_del_add_obkvs(
-    base: obkv::KvReaderU16,
-    update: obkv::KvReaderU16,
-    merge_additions: bool,
-    buffer: &mut Vec<u8>,
-) {
+pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};

    buffer.clear();

    let mut writer = obkv::KvWriter::new(buffer);
-    let mut value_buffer = Vec::new();
    for eob in merge_join_by(base.iter(), update.iter(), |(b, _), (u, _)| b.cmp(u)) {
        match eob {
-            Left((k, v)) => {
-                if merge_additions {
-                    writer.insert(k, v).unwrap()
-                } else {
-                    // If merge_additions is false, recreate an obkv keeping the deletions only.
-                    value_buffer.clear();
-                    let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                    let base_reader = KvReaderDelAdd::new(v);
-
-                    if let Some(deletion) = base_reader.get(DelAdd::Deletion) {
-                        value_writer.insert(DelAdd::Deletion, deletion).unwrap();
-                        value_writer.finish().unwrap();
-                        writer.insert(k, &value_buffer).unwrap()
-                    }
-                }
-            }
-            Right((k, v)) => writer.insert(k, v).unwrap(),
-            Both((k, base), (_, update)) => {
-                // merge deletions and additions.
-                value_buffer.clear();
-                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
-                let base_reader = KvReaderDelAdd::new(base);
-                let update_reader = KvReaderDelAdd::new(update);
-
-                // keep newest deletion.
-                if let Some(deletion) = update_reader
-                    .get(DelAdd::Deletion)
-                    .or_else(|| base_reader.get(DelAdd::Deletion))
-                {
-                    value_writer.insert(DelAdd::Deletion, deletion).unwrap();
-                }
-
-                // keep base addition only if merge_additions is true.
-                let base_addition =
-                    merge_additions.then(|| base_reader.get(DelAdd::Addition)).flatten();
-                // keep newest addition.
-                // TODO use or_else
-                if let Some(addition) = update_reader.get(DelAdd::Addition).or(base_addition) {
-                    value_writer.insert(DelAdd::Addition, addition).unwrap();
-                }
-
-                value_writer.finish().unwrap();
-                writer.insert(k, &value_buffer).unwrap()
-            }
+            Both(_, (k, v)) | Left((k, v)) | Right((k, v)) => writer.insert(k, v).unwrap(),
        }
    }

    writer.finish().unwrap();
 }

-/// Merge all the obkvs from the newest to the oldest.
-fn inner_merge_del_add_obkvs<'a>(
+/// Merge all the obks in the order we see them.
+pub fn merge_obkvs_and_operations<'a>(
+    _key: &[u8],
    obkvs: &[Cow<'a, [u8]>],
-    merge_additions: bool,
 ) -> Result<Cow<'a, [u8]>> {
-    // pop the newest operation from the list.
-    let (newest, obkvs) = obkvs.split_last().unwrap();
-    // keep the operation type for the returned value.
-    let newest_operation_type = newest[0];
+    // [add, add, delete, add, add]
+    // we can ignore everything that happened before the last delete.
+    let starting_position =
+        obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);

-    // treat the newest obkv as the starting point of the merge.
-    let mut acc_operation_type = newest_operation_type;
-    let mut acc = newest[1..].to_vec();
-    let mut buffer = Vec::new();
-    // reverse iter from the most recent to the oldest.
-    for current in obkvs.into_iter().rev() {
-        // if in the previous iteration there was a complete deletion,
-        // stop the merge process.
-        if acc_operation_type == Operation::Deletion as u8 {
-            break;
-        }
-
-        let newest = obkv::KvReader::new(&acc);
-        let oldest = obkv::KvReader::new(&current[1..]);
-        merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer);
-
-        // we want the result of the merge into our accumulator.
-        std::mem::swap(&mut acc, &mut buffer);
-        acc_operation_type = current[0];
+    // [add, add, delete]
+    // if the last operation was a deletion then we simply return the deletion
+    if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
+    {
+        return Ok(obkvs[obkvs.len() - 1].clone());
    }
+    let mut buffer = Vec::new();

-    acc.insert(0, newest_operation_type);
-    Ok(Cow::from(acc))
+    // (add, add, delete) [add, add]
+    // in the other case, no deletion will be encountered during the merge
+    let mut ret =
+        obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
+            let first = obkv::KvReader::new(&acc);
+            let second = obkv::KvReader::new(&current[1..]);
+            merge_two_obkvs(first, second, &mut buffer);
+
+            // we want the result of the merge into our accumulator
+            std::mem::swap(&mut acc, &mut buffer);
+            acc
+        });
+
+    ret.insert(0, Operation::Addition as u8);
+    Ok(Cow::from(ret))
 }

-/// Merge all the obkvs from the newest to the oldest.
-pub fn obkvs_merge_additions_and_deletions<'a>(
-    _key: &[u8],
-    obkvs: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    inner_merge_del_add_obkvs(obkvs, true)
-}
-
-/// Merge all the obkvs deletions from the newest to the oldest and keep only the newest additions.
-pub fn obkvs_keep_last_addition_merge_deletions<'a>(
-    _key: &[u8],
-    obkvs: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    inner_merge_del_add_obkvs(obkvs, false)
-}
-
-/// Do a union of all the CboRoaringBitmaps in the values.
 pub fn merge_cbo_roaring_bitmaps<'a>(
    _key: &[u8],
    values: &[Cow<'a, [u8]>],
@ -206,36 +138,3 @@ pub fn merge_cbo_roaring_bitmaps<'a>(
        Ok(Cow::from(vec))
    }
 }
-
-/// Do a union of CboRoaringBitmaps on both sides of a DelAdd obkv
-/// separately and outputs a new DelAdd with both unions.
-pub fn merge_deladd_cbo_roaring_bitmaps<'a>(
-    _key: &[u8],
-    values: &[Cow<'a, [u8]>],
-) -> Result<Cow<'a, [u8]>> {
-    if values.len() == 1 {
-        Ok(values[0].clone())
-    } else {
-        // Retrieve the bitmaps from both sides
-        let mut del_bitmaps_bytes = Vec::new();
-        let mut add_bitmaps_bytes = Vec::new();
-        for value in values {
-            let obkv = KvReaderDelAdd::new(value);
-            if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
-                del_bitmaps_bytes.push(bitmap_bytes);
-            }
-            if let Some(bitmap_bytes) = obkv.get(DelAdd::Addition) {
-                add_bitmaps_bytes.push(bitmap_bytes);
-            }
-        }
-
-        let mut output_deladd_obkv = KvWriterDelAdd::memory();
-        let mut buffer = Vec::new();
-        CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
-        output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
-        buffer.clear();
-        CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
-        output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
-        output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
-    }
-}
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@ -14,8 +14,7 @@ pub use grenad_helpers::{
 };
 pub use merge_functions::{
    concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
-    merge_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps, merge_roaring_bitmaps,
-    obkvs_keep_last_addition_merge_deletions, obkvs_merge_additions_and_deletions,
+    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
    serialize_roaring_bitmap, MergeFn,
 };

@ -45,7 +44,6 @@ where
    Some((head, tail))
 }

-#[allow(unused)]
 pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator<Item = u32> + '_ {
    bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes)
 }
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -20,10 +20,7 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{
-    extract_finite_float_from_value, validate_document_id, validate_document_id_value,
-    validate_geo_from_json, DocumentId,
-};
+pub use self::enrich::{extract_finite_float_from_value, DocumentId};
 pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
@ -38,7 +35,7 @@ use crate::update::{
    self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
    WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
 };
-use crate::{CboRoaringBitmapCodec, Index, Result};
+use crate::{Index, Result, RoaringBitmapCodec};

 static MERGED_DATABASE_COUNT: usize = 7;
 static PREFIX_DATABASE_COUNT: usize = 5;
@ -406,23 +403,13 @@ where
            }

            let typed_chunk = match result? {
-                TypedChunk::WordDocids {
-                    word_docids_reader,
-                    exact_word_docids_reader,
-                    word_fid_docids_reader,
-                } => {
+                TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
                    word_docids = Some(cloneable_chunk);
                    let cloneable_chunk =
                        unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
                    exact_word_docids = Some(cloneable_chunk);
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
-                    word_fid_docids = Some(cloneable_chunk);
-                    TypedChunk::WordDocids {
-                        word_docids_reader,
-                        exact_word_docids_reader,
-                        word_fid_docids_reader,
-                    }
+                    TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
                }
                TypedChunk::WordPairProximityDocids(chunk) => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
@ -434,6 +421,11 @@ where
                    word_position_docids = Some(cloneable_chunk);
                    TypedChunk::WordPositionDocids(chunk)
                }
+                TypedChunk::WordFidDocids(chunk) => {
+                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
+                    word_fid_docids = Some(cloneable_chunk);
+                    TypedChunk::WordFidDocids(chunk)
+                }
                otherwise => otherwise,
            };

@ -475,14 +467,13 @@ where
        let all_documents_ids = index_documents_ids | new_documents_ids;
        self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;

-        // TODO: reactivate prefix DB with diff-indexing
-        // self.execute_prefix_databases(
-        //     word_docids,
-        //     exact_word_docids,
-        //     word_pair_proximity_docids,
-        //     word_position_docids,
-        //     word_fid_docids,
-        // )?;
+        self.execute_prefix_databases(
+            word_docids,
+            exact_word_docids,
+            word_pair_proximity_docids,
+            word_position_docids,
+            word_fid_docids,
+        )?;

        Ok(all_documents_ids.len())
    }
@ -696,8 +687,8 @@ where
 fn execute_word_prefix_docids(
    txn: &mut heed::RwTxn,
    reader: grenad::Reader<Cursor<ClonableMmap>>,
-    word_docids_db: Database<Str, CboRoaringBitmapCodec>,
-    word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
+    word_docids_db: Database<Str, RoaringBitmapCodec>,
+    word_prefix_docids_db: Database<Str, RoaringBitmapCodec>,
    indexer_config: &IndexerConfig,
    new_prefix_fst_words: &[String],
    common_prefix_fst_words: &[&[String]],
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@ -7,20 +7,18 @@ use std::io::{Read, Seek};
 use fxhash::FxHashMap;
 use heed::RoTxn;
 use itertools::Itertools;
-use obkv::{KvReader, KvReaderU16, KvWriter};
+use obkv::{KvReader, KvWriter};
 use roaring::RoaringBitmap;
 use serde_json::Value;
 use smartstring::SmartString;

 use super::helpers::{
-    create_sorter, create_writer, obkvs_keep_last_addition_merge_deletions,
-    obkvs_merge_additions_and_deletions, MergeFn,
+    create_sorter, create_writer, keep_latest_obkv, merge_obkvs_and_operations, MergeFn,
 };
 use super::{IndexDocumentsMethod, IndexerConfig};
 use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::{db_name, main_key};
-use crate::update::del_add::into_del_add_obkv;
 use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
 use crate::{
    FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
@ -108,8 +106,8 @@ impl<'a, 'i> Transform<'a, 'i> {
        // We must choose the appropriate merge function for when two or more documents
        // with the same user id must be merged or fully replaced in the same batch.
        let merge_function = match index_documents_method {
-            IndexDocumentsMethod::ReplaceDocuments => obkvs_keep_last_addition_merge_deletions,
-            IndexDocumentsMethod::UpdateDocuments => obkvs_merge_additions_and_deletions,
+            IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
+            IndexDocumentsMethod::UpdateDocuments => merge_obkvs_and_operations,
        };

        // We initialize the sorter with the user indexing settings.
@ -225,21 +223,19 @@ impl<'a, 'i> Transform<'a, 'i> {
            let docid = match self.new_external_documents_ids_builder.entry((*external_id).into()) {
                Entry::Occupied(entry) => *entry.get() as u32,
                Entry::Vacant(entry) => {
-                    let docid = match external_documents_ids.get(entry.key()) {
-                        Some(docid) => {
-                            // If it was already in the list of replaced documents it means it was deleted
-                            // by the remove_document method. We should starts as if it never existed.
-                            if self.replaced_documents_ids.insert(docid) {
-                                original_docid = Some(docid);
-                            }
-
-                            docid
+                    // If the document was already in the db we mark it as a replaced document.
+                    // It'll be deleted later.
+                    if let Some(docid) = external_documents_ids.get(entry.key()) {
+                        // If it was already in the list of replaced documents it means it was deleted
+                        // by the remove_document method. We should starts as if it never existed.
+                        if self.replaced_documents_ids.insert(docid) {
+                            original_docid = Some(docid);
                        }
-                        None => self
-                            .available_documents_ids
-                            .next()
-                            .ok_or(UserError::DocumentLimitReached)?,
-                    };
+                    }
+                    let docid = self
+                        .available_documents_ids
+                        .next()
+                        .ok_or(UserError::DocumentLimitReached)?;
                    entry.insert(docid as u64);
                    docid
                }
@ -267,28 +263,16 @@ impl<'a, 'i> Transform<'a, 'i> {
                    skip_insertion = true;
                } else {
                    // we associate the base document with the new key, everything will get merged later.
-                    let keep_original_version =
-                        self.index_documents_method == IndexDocumentsMethod::UpdateDocuments;
                    document_sorter_buffer.clear();
                    document_sorter_buffer.push(Operation::Addition as u8);
-                    into_del_add_obkv(
-                        KvReaderU16::new(base_obkv),
-                        true,
-                        keep_original_version,
-                        &mut document_sorter_buffer,
-                    )?;
+                    document_sorter_buffer.extend_from_slice(base_obkv);
                    self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
                    match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
                        Some(flattened_obkv) => {
                            // we recreate our buffer with the flattened documents
                            document_sorter_buffer.clear();
                            document_sorter_buffer.push(Operation::Addition as u8);
-                            into_del_add_obkv(
-                                KvReaderU16::new(&flattened_obkv),
-                                true,
-                                keep_original_version,
-                                &mut document_sorter_buffer,
-                            )?;
+                            document_sorter_buffer.extend_from_slice(&flattened_obkv);
                            self.flattened_sorter
                                .insert(docid.to_be_bytes(), &document_sorter_buffer)?
                        }
@ -304,12 +288,7 @@ impl<'a, 'i> Transform<'a, 'i> {

                document_sorter_buffer.clear();
                document_sorter_buffer.push(Operation::Addition as u8);
-                into_del_add_obkv(
-                    KvReaderU16::new(&obkv_buffer),
-                    false,
-                    true,
-                    &mut document_sorter_buffer,
-                )?;
+                document_sorter_buffer.extend_from_slice(&obkv_buffer);
                // We use the extracted/generated user id as the key for this document.
                self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;

@ -317,12 +296,7 @@ impl<'a, 'i> Transform<'a, 'i> {
                    Some(flattened_obkv) => {
                        document_sorter_buffer.clear();
                        document_sorter_buffer.push(Operation::Addition as u8);
-                        into_del_add_obkv(
-                            KvReaderU16::new(&flattened_obkv),
-                            false,
-                            true,
-                            &mut document_sorter_buffer,
-                        )?;
+                        document_sorter_buffer.extend_from_slice(&flattened_obkv);
                        self.flattened_sorter
                            .insert(docid.to_be_bytes(), &document_sorter_buffer)?
                    }
@ -380,25 +354,19 @@ impl<'a, 'i> Transform<'a, 'i> {
        let external_documents_ids = self.index.external_documents_ids(wtxn)?;

        let mut documents_deleted = 0;
-        let mut document_sorter_buffer = Vec::new();
        for to_remove in to_remove {
            if should_abort() {
                return Err(Error::InternalError(InternalError::AbortedIndexation));
            }

-            // Check if the document has been added in the current indexing process.
-            let deleted_from_current = match self
-                .new_external_documents_ids_builder
-                .entry((*to_remove).into())
-            {
+            match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
                // if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
                Entry::Occupied(entry) => {
                    let doc_id = *entry.get() as u32;
-                    document_sorter_buffer.clear();
-                    document_sorter_buffer.push(Operation::Deletion as u8);
-                    obkv::KvWriterU16::new(&mut document_sorter_buffer).finish().unwrap();
-                    self.original_sorter.insert(doc_id.to_be_bytes(), &document_sorter_buffer)?;
-                    self.flattened_sorter.insert(doc_id.to_be_bytes(), &document_sorter_buffer)?;
+                    self.original_sorter
+                        .insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
+                    self.flattened_sorter
+                        .insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;

                    // we must NOT update the list of replaced_documents_ids
                    // Either:
@ -407,69 +375,21 @@ impl<'a, 'i> Transform<'a, 'i> {
                    //    we're removing it there is nothing to do.
                    self.new_documents_ids.remove(doc_id);
                    entry.remove_entry();
-                    true
                }
-                Entry::Vacant(_) => false,
-            };
-
-            // If the document was already in the db we mark it as a `to_delete` document.
-            // Then we push the document in sorters in deletion mode.
-            let deleted_from_db = match external_documents_ids.get(&to_remove) {
-                Some(docid) => {
-                    self.replaced_documents_ids.insert(docid);
-
-                    // fetch the obkv document
-                    let original_key = BEU32::new(docid);
-                    let base_obkv = self
-                        .index
-                        .documents
-                        .remap_data_type::<heed::types::ByteSlice>()
-                        .get(wtxn, &original_key)?
-                        .ok_or(InternalError::DatabaseMissingEntry {
-                            db_name: db_name::DOCUMENTS,
-                            key: None,
-                        })?;
-
-                    // push it as to delete in the original_sorter
-                    document_sorter_buffer.clear();
-                    document_sorter_buffer.push(Operation::Deletion as u8);
-                    into_del_add_obkv(
-                        KvReaderU16::new(base_obkv),
-                        true,
-                        false,
-                        &mut document_sorter_buffer,
-                    )?;
-                    self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
-
-                    // flatten it and push it as to delete in the flattened_sorter
-                    match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
-                        Some(flattened_obkv) => {
-                            // we recreate our buffer with the flattened documents
-                            document_sorter_buffer.clear();
-                            document_sorter_buffer.push(Operation::Deletion as u8);
-                            into_del_add_obkv(
-                                KvReaderU16::new(&flattened_obkv),
-                                true,
-                                false,
-                                &mut document_sorter_buffer,
-                            )?;
-                            self.flattened_sorter
-                                .insert(docid.to_be_bytes(), &document_sorter_buffer)?
-                        }
-                        None => self
-                            .flattened_sorter
-                            .insert(docid.to_be_bytes(), &document_sorter_buffer)?,
+                Entry::Vacant(entry) => {
+                    // If the document was already in the db we mark it as a `to_delete` document.
+                    // It'll be deleted later. We don't need to push anything to the sorters.
+                    if let Some(docid) = external_documents_ids.get(entry.key()) {
+                        self.replaced_documents_ids.insert(docid);
+                    } else {
+                        // if the document is nowehere to be found, there is nothing to do and we must NOT
+                        // increment the count of documents_deleted
+                        continue;
                    }
-
-                    true
                }
-                None => false,
            };

-            // increase counter only if the document existed somewhere before.
-            if deleted_from_current || deleted_from_db {
-                documents_deleted += 1;
-            }
+            documents_deleted += 1;
        }

        Ok(documents_deleted)
@ -669,7 +589,9 @@ impl<'a, 'i> Transform<'a, 'i> {
        let mut documents_count = 0;

        while let Some((key, val)) = iter.next()? {
-            // skip first byte corresponding to the operation type (Deletion or Addition).
+            if val[0] == Operation::Deletion as u8 {
+                continue;
+            }
            let val = &val[1..];

            // send a callback to show at which step we are
@ -709,7 +631,9 @@ impl<'a, 'i> Transform<'a, 'i> {
        // We get rids of the `Operation` byte and skip the deleted documents as well.
        let mut iter = self.flattened_sorter.into_stream_merger_iter()?;
        while let Some((key, val)) = iter.next()? {
-            // skip first byte corresponding to the operation type (Deletion or Addition).
+            if val[0] == Operation::Deletion as u8 {
+                continue;
+            }
            let val = &val[1..];
            writer.insert(key, val)?;
        }
@ -735,8 +659,10 @@ impl<'a, 'i> Transform<'a, 'i> {
            new_documents_ids: self.new_documents_ids,
            replaced_documents_ids: self.replaced_documents_ids,
            documents_count: self.documents_count,
-            original_documents,
-            flattened_documents,
+            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
+            flattened_documents: flattened_documents
+                .into_inner()
+                .map_err(|err| err.into_error())?,
        })
    }

@ -787,7 +713,6 @@ impl<'a, 'i> Transform<'a, 'i> {
        );

        let mut obkv_buffer = Vec::new();
-        let mut document_sorter_buffer = Vec::new();
        for result in self.index.all_documents(wtxn)? {
            let (docid, obkv) = result?;

@ -802,9 +727,7 @@ impl<'a, 'i> Transform<'a, 'i> {
            }

            let buffer = obkv_writer.into_inner()?;
-            document_sorter_buffer.clear();
-            into_del_add_obkv(KvReaderU16::new(buffer), false, true, &mut document_sorter_buffer)?;
-            original_writer.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
+            original_writer.insert(docid.to_be_bytes(), &buffer)?;

            // Once we have the document. We're going to flatten it
            // and insert it in the flattened sorter.
@ -839,9 +762,7 @@ impl<'a, 'i> Transform<'a, 'i> {
                let value = serde_json::to_vec(&value).map_err(InternalError::SerdeJson)?;
                writer.insert(fid, &value)?;
            }
-            document_sorter_buffer.clear();
-            into_del_add_obkv(KvReaderU16::new(&buffer), false, true, &mut document_sorter_buffer)?;
-            flattened_writer.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
+            flattened_writer.insert(docid.to_be_bytes(), &buffer)?;
        }

        // Once we have written all the documents, we extract
@ -860,8 +781,10 @@ impl<'a, 'i> Transform<'a, 'i> {
            new_documents_ids: documents_ids,
            replaced_documents_ids: RoaringBitmap::default(),
            documents_count,
-            original_documents,
-            flattened_documents,
+            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
+            flattened_documents: flattened_documents
+                .into_inner()
+                .map_err(|err| err.into_error())?,
        };

        let new_facets = output.compute_real_facets(wtxn, self.index)?;
@ -905,86 +828,38 @@ mod test {

    #[test]
    fn merge_obkvs() {
-        let mut additive_doc_0 = Vec::new();
-        let mut deletive_doc_0 = Vec::new();
-        let mut del_add_doc_0 = Vec::new();
-        let mut kv_writer = KvWriter::memory();
+        let mut doc_0 = Vec::new();
+        let mut kv_writer = KvWriter::new(&mut doc_0);
        kv_writer.insert(0_u8, [0]).unwrap();
-        let buffer = kv_writer.into_inner().unwrap();
-        into_del_add_obkv(KvReaderU16::new(&buffer), false, true, &mut additive_doc_0).unwrap();
-        additive_doc_0.insert(0, Operation::Addition as u8);
-        into_del_add_obkv(KvReaderU16::new(&buffer), true, false, &mut deletive_doc_0).unwrap();
-        deletive_doc_0.insert(0, Operation::Deletion as u8);
-        into_del_add_obkv(KvReaderU16::new(&buffer), true, true, &mut del_add_doc_0).unwrap();
-        del_add_doc_0.insert(0, Operation::Addition as u8);
+        kv_writer.finish().unwrap();
+        doc_0.insert(0, Operation::Addition as u8);

-        let mut additive_doc_1 = Vec::new();
-        let mut kv_writer = KvWriter::memory();
-        kv_writer.insert(1_u8, [1]).unwrap();
-        let buffer = kv_writer.into_inner().unwrap();
-        into_del_add_obkv(KvReaderU16::new(&buffer), false, true, &mut additive_doc_1).unwrap();
-        additive_doc_1.insert(0, Operation::Addition as u8);
+        let ret = merge_obkvs_and_operations(&[], &[Cow::from(doc_0.as_slice())]).unwrap();
+        assert_eq!(*ret, doc_0);

-        let mut additive_doc_0_1 = Vec::new();
-        let mut kv_writer = KvWriter::memory();
-        kv_writer.insert(0_u8, [0]).unwrap();
-        kv_writer.insert(1_u8, [1]).unwrap();
-        let buffer = kv_writer.into_inner().unwrap();
-        into_del_add_obkv(KvReaderU16::new(&buffer), false, true, &mut additive_doc_0_1).unwrap();
-        additive_doc_0_1.insert(0, Operation::Addition as u8);
-
-        let ret = obkvs_merge_additions_and_deletions(&[], &[Cow::from(additive_doc_0.as_slice())])
-            .unwrap();
-        assert_eq!(*ret, additive_doc_0);
-
-        let ret = obkvs_merge_additions_and_deletions(
+        let ret = merge_obkvs_and_operations(
            &[],
-            &[Cow::from(deletive_doc_0.as_slice()), Cow::from(additive_doc_0.as_slice())],
+            &[Cow::from([Operation::Deletion as u8].as_slice()), Cow::from(doc_0.as_slice())],
        )
        .unwrap();
-        assert_eq!(*ret, del_add_doc_0);
+        assert_eq!(*ret, doc_0);

-        let ret = obkvs_merge_additions_and_deletions(
+        let ret = merge_obkvs_and_operations(
            &[],
-            &[Cow::from(additive_doc_0.as_slice()), Cow::from(deletive_doc_0.as_slice())],
+            &[Cow::from(doc_0.as_slice()), Cow::from([Operation::Deletion as u8].as_slice())],
        )
        .unwrap();
-        assert_eq!(*ret, deletive_doc_0);
+        assert_eq!(*ret, [Operation::Deletion as u8]);

-        let ret = obkvs_merge_additions_and_deletions(
+        let ret = merge_obkvs_and_operations(
            &[],
            &[
-                Cow::from(additive_doc_1.as_slice()),
-                Cow::from(deletive_doc_0.as_slice()),
-                Cow::from(additive_doc_0.as_slice()),
+                Cow::from([Operation::Addition as u8, 1].as_slice()),
+                Cow::from([Operation::Deletion as u8].as_slice()),
+                Cow::from(doc_0.as_slice()),
            ],
        )
        .unwrap();
-        assert_eq!(*ret, del_add_doc_0);
-
-        let ret = obkvs_merge_additions_and_deletions(
-            &[],
-            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
-        )
-        .unwrap();
-        assert_eq!(*ret, additive_doc_0_1);
-
-        let ret = obkvs_keep_last_addition_merge_deletions(
-            &[],
-            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
-        )
-        .unwrap();
-        assert_eq!(*ret, additive_doc_0);
-
-        let ret = obkvs_keep_last_addition_merge_deletions(
-            &[],
-            &[
-                Cow::from(deletive_doc_0.as_slice()),
-                Cow::from(additive_doc_1.as_slice()),
-                Cow::from(additive_doc_0.as_slice()),
-            ],
-        )
-        .unwrap();
-        assert_eq!(*ret, del_add_doc_0);
+        assert_eq!(*ret, doc_0);
    }
 }
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -2,7 +2,7 @@ use std::borrow::Cow;
 use std::collections::HashMap;
 use std::convert::TryInto;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
@ -27,23 +27,23 @@ pub(crate) enum TypedChunk {
    FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
    FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
    Documents(grenad::Reader<CursorClonableMmap>),
-    FieldIdWordcountDocids(grenad::Reader<File>),
+    FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
    NewDocumentsIds(RoaringBitmap),
    WordDocids {
-        word_docids_reader: grenad::Reader<File>,
-        exact_word_docids_reader: grenad::Reader<File>,
-        word_fid_docids_reader: grenad::Reader<File>,
+        word_docids_reader: grenad::Reader<BufReader<File>>,
+        exact_word_docids_reader: grenad::Reader<BufReader<File>>,
    },
-    WordPositionDocids(grenad::Reader<File>),
-    WordPairProximityDocids(grenad::Reader<File>),
-    FieldIdFacetStringDocids(grenad::Reader<File>),
-    FieldIdFacetNumberDocids(grenad::Reader<File>),
-    FieldIdFacetExistsDocids(grenad::Reader<File>),
-    FieldIdFacetIsNullDocids(grenad::Reader<File>),
-    FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
-    GeoPoints(grenad::Reader<File>),
-    VectorPoints(grenad::Reader<File>),
-    ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>),
+    WordPositionDocids(grenad::Reader<BufReader<File>>),
+    WordFidDocids(grenad::Reader<BufReader<File>>),
+    WordPairProximityDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
+    GeoPoints(grenad::Reader<BufReader<File>>),
+    VectorPoints(grenad::Reader<BufReader<File>>),
+    ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
 }

 impl TypedChunk {
@ -64,19 +64,17 @@ impl TypedChunk {
            TypedChunk::NewDocumentsIds(grenad) => {
                format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len())
            }
-            TypedChunk::WordDocids {
-                word_docids_reader,
-                exact_word_docids_reader,
-                word_fid_docids_reader,
-            } => format!(
-                "WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {}, word_fid_docids_reader: {} }}",
+            TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!(
+                "WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}",
                word_docids_reader.len(),
-                exact_word_docids_reader.len(),
-                word_fid_docids_reader.len()
+                exact_word_docids_reader.len()
            ),
            TypedChunk::WordPositionDocids(grenad) => {
                format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
            }
+            TypedChunk::WordFidDocids(grenad) => {
+                format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
+            }
            TypedChunk::WordPairProximityDocids(grenad) => {
                format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
            }
@ -101,8 +99,8 @@ impl TypedChunk {
            TypedChunk::VectorPoints(grenad) => {
                format!("VectorPoints {{ number_of_entries: {} }}", grenad.len())
            }
-            TypedChunk::ScriptLanguageDocids(sl_map) => {
-                format!("ScriptLanguageDocids {{ number_of_entries: {} }}", sl_map.len())
+            TypedChunk::ScriptLanguageDocids(grenad) => {
+                format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len())
            }
        }
    }
@ -140,11 +138,7 @@ pub(crate) fn write_typed_chunk_into_index(
        TypedChunk::NewDocumentsIds(documents_ids) => {
            return Ok((documents_ids, is_merged_database))
        }
-        TypedChunk::WordDocids {
-            word_docids_reader,
-            exact_word_docids_reader,
-            word_fid_docids_reader,
-        } => {
+        TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
            let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
            append_entries_into_database(
                word_docids_iter.clone(),
@ -152,7 +146,7 @@ pub(crate) fn write_typed_chunk_into_index(
                wtxn,
                index_is_empty,
                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                merge_roaring_bitmaps,
            )?;

            let exact_word_docids_iter = unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;
@ -162,17 +156,7 @@ pub(crate) fn write_typed_chunk_into_index(
                wtxn,
                index_is_empty,
                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
-            )?;
-
-            let word_fid_docids_iter = unsafe { as_cloneable_grenad(&word_fid_docids_reader) }?;
-            append_entries_into_database(
-                word_fid_docids_iter,
-                &index.word_fid_docids,
-                wtxn,
-                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                merge_roaring_bitmaps,
            )?;

            // create fst from word docids
@ -198,6 +182,17 @@ pub(crate) fn write_typed_chunk_into_index(
            )?;
            is_merged_database = true;
        }
+        TypedChunk::WordFidDocids(word_fid_docids_iter) => {
+            append_entries_into_database(
+                word_fid_docids_iter,
+                &index.word_fid_docids,
+                wtxn,
+                index_is_empty,
+                |value, _buffer| Ok(value),
+                merge_cbo_roaring_bitmaps,
+            )?;
+            is_merged_database = true;
+        }
        TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
            let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
            indexer.execute(wtxn)?;
@ -344,25 +339,22 @@ pub(crate) fn write_typed_chunk_into_index(
            log::debug!("There are {} entries in the HNSW so far", hnsw_length);
            index.put_vector_hnsw(wtxn, &new_hnsw)?;
        }
-        TypedChunk::ScriptLanguageDocids(sl_map) => {
-            for (key, (deletion, addition)) in sl_map {
-                let mut db_key_exists = false;
+        TypedChunk::ScriptLanguageDocids(hash_pair) => {
+            let mut buffer = Vec::new();
+            for (key, value) in hash_pair {
+                buffer.clear();
                let final_value = match index.script_language_docids.get(wtxn, &key)? {
                    Some(db_values) => {
-                        db_key_exists = true;
-                        (db_values - deletion) | addition
+                        let mut db_value_buffer = Vec::new();
+                        serialize_roaring_bitmap(&db_values, &mut db_value_buffer)?;
+                        let mut new_value_buffer = Vec::new();
+                        serialize_roaring_bitmap(&value, &mut new_value_buffer)?;
+                        merge_roaring_bitmaps(&new_value_buffer, &db_value_buffer, &mut buffer)?;
+                        RoaringBitmap::deserialize_from(&buffer[..])?
                    }
-                    None => addition,
+                    None => value,
                };
-
-                if final_value.is_empty() {
-                    // If the database entry exists, delete it.
-                    if db_key_exists == true {
-                        index.script_language_docids.delete(wtxn, &key)?;
-                    }
-                } else {
-                    index.script_language_docids.put(wtxn, &key, &final_value)?;
-                }
+                index.script_language_docids.put(wtxn, &key, &final_value)?;
            }
        }
    }
@ -387,6 +379,13 @@ fn merge_word_docids_reader_into_fst(
    Ok(builder.into_set())
 }

+fn merge_roaring_bitmaps(new_value: &[u8], db_value: &[u8], buffer: &mut Vec<u8>) -> Result<()> {
+    let new_value = RoaringBitmap::deserialize_from(new_value)?;
+    let db_value = RoaringBitmap::deserialize_from(db_value)?;
+    let value = new_value | db_value;
+    Ok(serialize_roaring_bitmap(&value, buffer)?)
+}
+
 fn merge_cbo_roaring_bitmaps(
    new_value: &[u8],
    db_value: &[u8],
@ -456,7 +455,6 @@ where
    R: io::Read + io::Seek,
    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
    FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
-    K: for<'a> heed::BytesDecode<'a>,
 {
    puffin::profile_function!(format!("number of entries: {}", data.len()));

@ -477,12 +475,6 @@ where
    let mut cursor = data.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
        if valid_lmdb_key(key) {
-            debug_assert!(
-                K::bytes_decode(&key).is_some(),
-                "Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
-                key.len(),
-                &key
-            );
            buffer.clear();
            let value = serialize_value(value, &mut buffer)?;
            unsafe { database.append(key, value)? };
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@ -21,7 +21,6 @@ pub use self::words_prefixes_fst::WordsPrefixesFst;

 mod available_documents_ids;
 mod clear_documents;
-pub(crate) mod del_add;
 mod delete_documents;
 pub(crate) mod facet;
 mod index_documents;
--- a/milli/src/update/prefix_word_pairs/mod.rs
+++ b/milli/src/update/prefix_word_pairs/mod.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::collections::HashSet;
-use std::io::BufReader;
+use std::io::{BufReader, BufWriter};

 use grenad::CompressionType;
 use heed::types::ByteSlice;
@ -119,9 +119,9 @@ pub fn insert_into_database(
 pub fn write_into_lmdb_database_without_merging(
    wtxn: &mut heed::RwTxn,
    database: heed::PolyDatabase,
-    writer: grenad::Writer<std::fs::File>,
+    writer: grenad::Writer<BufWriter<std::fs::File>>,
 ) -> Result<()> {
-    let file = writer.into_inner()?;
+    let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
    let reader = grenad::Reader::new(BufReader::new(file))?;
    if database.is_empty(wtxn)? {
        let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/word_prefix_pair_proximity_docids.snap
@ -20,7 +20,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [100, ]
 3  rings            a    [101, ]
 3  the              a    [101, ]
-4  at               b    [100, ]
-4  at               be   [100, ]
-4  bell             a    [101, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_pair_proximity_docids.snap
@ -30,10 +30,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  bell             5                [101, ]
 3  rings            am               [101, ]
 3  the              at               [101, ]
-4  an               house            [100, ]
-4  at               beautiful        [100, ]
-4  bell             am               [101, ]
-4  the              5                [101, ]
-5  at               house            [100, ]
-5  the              am               [101, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_prefix_pair_proximity_docids.snap
@ -28,8 +28,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  rings            a    [101, ]
 3  rings            am   [101, ]
 3  the              a    [101, ]
-4  at               b    [100, ]
-4  at               be   [100, ]
-4  bell             a    [101, ]
-4  bell             am   [101, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
@ -7,5 +7,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 2  bell             a    [51, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
@ -7,5 +7,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 2  bell             a    [51, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/reupdate/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/reupdate/word_prefix_pair_proximity_docids.snap
@ -16,6 +16,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  at               b    [50, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/word_prefix_docids.rs
+++ b/milli/src/update/word_prefix_docids.rs
@ -5,15 +5,15 @@ use heed::types::{ByteSlice, Str};
 use heed::Database;

 use crate::update::index_documents::{
-    create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
+    create_sorter, merge_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
    CursorClonableMmap, MergeFn,
 };
-use crate::{CboRoaringBitmapCodec, Result};
+use crate::{Result, RoaringBitmapCodec};

 pub struct WordPrefixDocids<'t, 'u, 'i> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
-    word_docids: Database<Str, CboRoaringBitmapCodec>,
-    word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
+    word_docids: Database<Str, RoaringBitmapCodec>,
+    word_prefix_docids: Database<Str, RoaringBitmapCodec>,
    pub(crate) chunk_compression_type: CompressionType,
    pub(crate) chunk_compression_level: Option<u32>,
    pub(crate) max_nb_chunks: Option<usize>,
@ -23,8 +23,8 @@ pub struct WordPrefixDocids<'t, 'u, 'i> {
 impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
    pub fn new(
        wtxn: &'t mut heed::RwTxn<'i, 'u>,
-        word_docids: Database<Str, CboRoaringBitmapCodec>,
-        word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
+        word_docids: Database<Str, RoaringBitmapCodec>,
+        word_prefix_docids: Database<Str, RoaringBitmapCodec>,
    ) -> WordPrefixDocids<'t, 'u, 'i> {
        WordPrefixDocids {
            wtxn,
@ -40,7 +40,6 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
    #[logging_timer::time("WordPrefixDocids::{}")]
    pub fn execute(
        self,
-        // TODO grenad::Reader<onkv::Reader<Word, obkv::Reader<DelAdd, CboRoaringBitmap>>>
        mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,
        new_prefix_fst_words: &[String],
        common_prefix_fst_words: &[&[String]],
@ -52,8 +51,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
        // and write into it at the same time, therefore we write into another file.
        let mut prefix_docids_sorter = create_sorter(
            grenad::SortAlgorithm::Unstable,
-            // TODO change to merge_deladd_cbo_roaring_bitmaps
-            merge_cbo_roaring_bitmaps,
+            merge_roaring_bitmaps,
            self.chunk_compression_type,
            self.chunk_compression_level,
            self.max_nb_chunks,
@ -98,7 +96,6 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
            let prefix = std::str::from_utf8(prefix.as_bytes())?;
            for result in db.prefix_iter(self.wtxn, prefix)? {
                let (_word, data) = result?;
-                // TODO fake a DelAdd -> Add(`data`)
                prefix_docids_sorter.insert(prefix, data)?;
            }
        }
@ -114,14 +111,11 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
        drop(iter);

        // We finally write the word prefix docids into the LMDB database.
-        // TODO introduce a new function that is similar to `append_entries_into_database`
-        //      and accepts the `merge_deladd_cbo_roaring_bitmaps` function
        sorter_into_lmdb_database(
            self.wtxn,
            *self.word_prefix_docids.as_polymorph(),
            prefix_docids_sorter,
-            // TODO change to `merge_deladd_cbo_roaring_bitmaps`
-            merge_cbo_roaring_bitmaps,
+            merge_roaring_bitmaps,
        )?;

        Ok(())
@ -133,7 +127,6 @@ fn write_prefixes_in_sorter(
    sorter: &mut grenad::Sorter<MergeFn>,
 ) -> Result<()> {
    for (key, data_slices) in prefixes.drain() {
-        // TODO merge keys before inserting them in the sorter
        for data in data_slices {
            if valid_lmdb_key(&key) {
                sorter.insert(&key, data)?;
--- a/milli/tests/search/distinct.rs
+++ b/milli/tests/search/distinct.rs
@ -8,7 +8,7 @@ use Criterion::*;
 use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

 macro_rules! test_distinct {
-    ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
+    ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
        #[test]
        fn $func() {
            let criteria = $criteria;
@ -27,6 +27,7 @@ macro_rules! test_distinct {
            let mut search = Search::new(&rtxn, &index);
            search.query(search::TEST_QUERY);
            search.limit($limit);
+            search.offset($offset);
            search.exhaustive_number_hits($exhaustive);

            search.terms_matching_strategy(TermsMatchingStrategy::default());
@ -47,6 +48,7 @@ macro_rules! test_distinct {
                            Some(d.id)
                        }
                    })
+                    .skip($offset)
                    .take($limit)
                    .collect();

@ -61,6 +63,7 @@ test_distinct!(
    tag,
    true,
    1,
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    3
 );
@ -69,6 +72,7 @@ test_distinct!(
    asc_desc_rank,
    true,
    1,
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    7
 );
@ -77,6 +81,7 @@ test_distinct!(
    asc_desc_rank,
    true,
    0,
+    0,
    vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
    7
 );
@ -86,6 +91,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    3
 );
@ -94,6 +100,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    7
 );
@ -102,6 +109,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words],
    3
 );
@ -110,6 +118,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words],
    7
 );
@ -118,6 +127,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo],
    3
 );
@ -126,6 +136,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo],
    7
 );
@ -134,6 +145,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Proximity],
    3
 );
@ -142,6 +154,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Proximity],
    7
 );
@ -150,6 +163,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Attribute],
    3
 );
@ -158,6 +172,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Attribute],
    7
 );
@ -166,6 +181,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Exactness],
    3
 );
@ -174,6 +190,47 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Exactness],
    7
 );
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    distinct_string_limit_and_offset,
+    tag,
+    false,
+    EXTERNAL_DOCUMENTS_IDS.len(),
+    1,
+    vec![],
+    3
+);
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    exhaustive_distinct_string_limit_and_offset,
+    tag,
+    true,
+    1,
+    2,
+    vec![],
+    3
+);
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    distinct_number_limit_and_offset,
+    asc_desc_rank,
+    false,
+    EXTERNAL_DOCUMENTS_IDS.len(),
+    2,
+    vec![],
+    7
+);
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    exhaustive_distinct_number_limit_and_offset,
+    asc_desc_rank,
+    true,
+    2,
+    4,
+    vec![],
+    7
+);