Merge #4205

4205: Prevent search hang on the processing index r=Kerollmops a=dureuill Fixes #4206, an issue originally [reported on Discord](https://discord.com/channels/1006923006964154428/1148983671026618579/1148983671026618579) where having parallel search requests on more indexes than the index cache capacity would cause search requests on the currently updating index to hang until the index is done updating. ## Test setup - Create 20 empty indexes by sending settings to them - repeatedly send placeholder search requests to each of the indexes in a loop - Create another index and send a significant batch of documents to index. - Attempt to perform a search request on that last index. - Before this PR, the search request hangs while the index update task is processing - After this PR, the search request respond immediately even while the index update task is processing ## Changes - When getting the handle to an index for some potentially long running batches of tasks, save it in the index scheduler. - Drop the handle from the index-scheduler when the task is done so that we don't leak indexes. - When getting an index from outside the task queue processor, check if there is such an handle matching the requested index. If so, skip the cache entirely and clone the handle. Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Fix typo, remove caching for the change of index
2025-07-20 13:30:38 +00:00 · 2023-11-13 10:36:01 +00:00 · 2023-11-13 10:44:36 +01:00 · 2023-11-13 09:43:20 +00:00 · 2023-11-13 09:57:42 +01:00 · 2023-11-12 21:53:11 +01:00
97 changed files with 2553 additions and 789 deletions
--- a/.github/ISSUE_TEMPLATE/sprint_issue.md
+++ b/.github/ISSUE_TEMPLATE/sprint_issue.md
@ -7,19 +7,17 @@ assignees: ''

 ---

-Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
+Related product team resources: [PRD]() (_internal only_)
 Related product discussion:
 Related spec: WIP

 ## Motivation

-<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
+<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->

 ## Usage

-<!---Write a quick description of the usage if the usage has already been defined-->
-
-Refer to the final spec to know the details and the final decisions about the usage.
+<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->

 ## TODO

--- a/.github/workflows/benchmarks-manual.yml
+++ b/.github/workflows/benchmarks-manual.yml
@ -74,4 +74,4 @@ jobs:
          echo "${{ steps.file.outputs.basename }}.json has just been pushed."
          echo 'How to compare this benchmark with another one?'
          echo '  - Check the available files with: ./benchmarks/scripts/list.sh'
-          echo "  - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
+          echo "  - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
--- a/.github/workflows/benchmarks-pr.yml
+++ b/.github/workflows/benchmarks-pr.yml
@ -0,0 +1,97 @@
+name: Benchmarks (PR)
+on: issue_comment
+permissions:
+  issues: write
+
+env:
+  GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+
+jobs:
+  run-benchmarks-on-comment:
+    if: startsWith(github.event.comment.body, '/benchmark')
+    name: Run and upload benchmarks
+    runs-on: benchmarks
+    timeout-minutes: 4320 # 72h
+    steps:
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+
+      - name: Check for Command
+        id: command
+        uses: xt0rted/slash-command-action@v2
+        with:
+          command: benchmark
+          reaction-type: "eyes"
+          repo-token: ${{ env.GH_TOKEN }}
+
+      - uses: xt0rted/pull-request-comment-branch@v2
+        id: comment-branch
+        with:
+          repo_token: ${{ env.GH_TOKEN }}
+
+      - uses: actions/checkout@v3
+        if: success()
+        with:
+          fetch-depth: 0 # fetch full history to be able to get main commit sha
+          ref: ${{ steps.comment-branch.outputs.head_ref }}
+
+      # Set variables
+      - name: Set current branch name
+        shell: bash
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
+        id: current_branch
+      - name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
+        shell: bash
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
+        id: normalized_current_branch
+      - name: Set shorter commit SHA
+        shell: bash
+        run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
+        id: commit_sha
+      - name: Set file basename with format "dataset_branch_commitSHA"
+        shell: bash
+        run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
+        id: file
+
+      # Run benchmarks
+      - name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
+        run: |
+          cd benchmarks
+          cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
+
+      # Generate critcmp files
+      - name: Install critcmp
+        uses: taiki-e/install-action@v2
+        with:
+          tool: critcmp
+      - name: Export cripcmp file
+        run: |
+          critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
+
+      # Upload benchmarks
+      - name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
+        uses: BetaHuhn/do-spaces-action@v2
+        with:
+          access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
+          secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
+          space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
+          space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
+          source: ${{ steps.file.outputs.basename }}.json
+          out_dir: critcmp_results
+
+      # Compute the diff of the benchmarks and send a message on the GitHub PR
+      - name: Compute and send a message in the PR
+        env:
+          GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+        run: |
+          set -x
+          export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
+          export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
+          echo 'Here are your benchmarks diff 👊' >> body.txt
+          echo '```' >> body.txt
+          ./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
+          echo '```' >> body.txt
+          gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@ -57,10 +57,10 @@ jobs:
          echo "date=$commit_date" >> $GITHUB_OUTPUT

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
+        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
@ -70,7 +70,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
        with:
          images: getmeili/meilisearch
          # Prevent `latest` to be updated for each new tag pushed.
@ -83,7 +83,7 @@ jobs:
            type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}

      - name: Build and push
-        uses: docker/build-push-action@v4
+        uses: docker/build-push-action@v5
        with:
          push: true
          platforms: linux/amd64,linux/arm64
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -2,6 +2,7 @@
 resolver = "2"
 members = [
    "meilisearch",
+    "meilitool",
    "meilisearch-types",
    "meilisearch-auth",
    "meili-snap",
@ -18,7 +19,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.0"
+version = "1.5.0"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/11
+++ b/11
@ -3,7 +3,7 @@ FROM    rust:alpine3.16 AS compiler

 RUN     apk add -q --update-cache --no-cache build-base openssl-dev

-WORKDIR /meilisearch
+WORKDIR /

 ARG     COMMIT_SHA
 ARG     COMMIT_DATE
@ -17,7 +17,7 @@ RUN     set -eux; \
        if [ "$apkArch" = "aarch64" ]; then \
            export JEMALLOC_SYS_WITH_LG_PAGE=16; \
        fi && \
-        cargo build --release
+        cargo build --release -p meilisearch -p meilitool

 # Run
 FROM    alpine:3.16
@ -28,9 +28,10 @@ ENV     MEILI_SERVER_PROVIDER docker
 RUN     apk update --quiet \
        && apk add -q --no-cache libgcc tini curl

-# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
-# to find.
-COPY    --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
+# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
+# and it's easy to find.
+COPY    --from=compiler /target/release/meilisearch /bin/meilisearch
+COPY    --from=compiler /target/release/meilitool /bin/meilitool
 # To stay compatible with the older version of the container (pre v0.27.0) we're
 # going to symlink the meilisearch binary in the path to `/meilisearch`
 RUN     ln -s /bin/meilisearch /meilisearch
--- a/PROFILING.md
+++ b/PROFILING.md
@ -1,14 +1,14 @@
 # Profiling Meilisearch

-Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
+Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).

 ![An example profiling with Puffin viewer](assets/profiling-example.png)

 ## Profiling the Indexing Process

-When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
+When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.

-Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
+[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.

 Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.

--- a/README.md
+++ b/README.md
@ -25,6 +25,12 @@

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

+---
+
+### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
+
+---
+
 Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.

 <p align="center" name="demo">
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -19,6 +19,7 @@ one indexing operation.

 use std::collections::{BTreeSet, HashSet};
 use std::ffi::OsStr;
+use std::fmt;
 use std::fs::{self, File};
 use std::io::BufWriter;

@ -199,6 +200,29 @@ impl Batch {
    }
 }

+impl fmt::Display for Batch {
+    /// A text used when we debug the profiling reports.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let index_uid = self.index_uid();
+        let tasks = self.ids();
+        match self {
+            Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
+            Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
+            Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
+            Batch::Dump(_) => f.write_str("Dump")?,
+            Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
+            Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
+            Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
+            Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
+            Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
+        };
+        match index_uid {
+            Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
+            None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
+        }
+    }
+}
+
 impl IndexOperation {
    pub fn index_uid(&self) -> &str {
        match self {
@ -213,6 +237,30 @@ impl IndexOperation {
    }
 }

+impl fmt::Display for IndexOperation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            IndexOperation::DocumentOperation { .. } => {
+                f.write_str("IndexOperation::DocumentOperation")
+            }
+            IndexOperation::DocumentDeletion { .. } => {
+                f.write_str("IndexOperation::DocumentDeletion")
+            }
+            IndexOperation::IndexDocumentDeletionByFilter { .. } => {
+                f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
+            }
+            IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
+            IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
+            IndexOperation::DocumentClearAndSetting { .. } => {
+                f.write_str("IndexOperation::DocumentClearAndSetting")
+            }
+            IndexOperation::SettingsAndDocumentOperation { .. } => {
+                f.write_str("IndexOperation::SettingsAndDocumentOperation")
+            }
+        }
+    }
+}
+
 impl IndexScheduler {
    /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
    ///
@ -581,7 +629,7 @@ impl IndexScheduler {
            self.breakpoint(crate::Breakpoint::InsideProcessBatch);
        }

-        puffin::profile_function!(format!("{:?}", batch));
+        puffin::profile_function!(batch.to_string());

        match batch {
            Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
@ -848,7 +896,7 @@ impl IndexScheduler {
                })?;

                // 4. Dump experimental feature settings
-                let features = self.features()?.runtime_features();
+                let features = self.features().runtime_features();
                dump.create_experimental_features(features)?;

                let dump_uid = started_at.format(format_description!(
@ -875,6 +923,10 @@ impl IndexScheduler {
                    self.index_mapper.index(&rtxn, &index_uid)?
                };

+                // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
+                *self.currently_updating_index.write().unwrap() =
+                    Some((index_uid.clone(), index.clone()));
+
                let mut index_wtxn = index.write_txn()?;
                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
                index_wtxn.commit()?;
--- a/index-scheduler/src/features.rs
+++ b/index-scheduler/src/features.rs
@ -1,6 +1,8 @@
+use std::sync::{Arc, RwLock};
+
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::types::{SerdeJson, Str};
-use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
+use meilisearch_types::heed::{Database, Env, RwTxn};

 use crate::error::FeatureNotEnabledError;
 use crate::Result;
@ -9,20 +11,19 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";

 #[derive(Clone)]
 pub(crate) struct FeatureData {
-    runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
-    instance: InstanceTogglableFeatures,
+    persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
+    runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
 }

 #[derive(Debug, Clone, Copy)]
 pub struct RoFeatures {
    runtime: RuntimeTogglableFeatures,
-    instance: InstanceTogglableFeatures,
 }

 impl RoFeatures {
-    fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
-        let runtime = data.runtime_features(txn)?;
-        Ok(Self { runtime, instance: data.instance })
+    fn new(data: &FeatureData) -> Self {
+        let runtime = data.runtime_features();
+        Self { runtime }
    }

    pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
@ -43,13 +44,13 @@ impl RoFeatures {
    }

    pub fn check_metrics(&self) -> Result<()> {
-        if self.instance.metrics {
+        if self.runtime.metrics {
            Ok(())
        } else {
            Err(FeatureNotEnabledError {
                disabled_action: "Getting metrics",
                feature: "metrics",
-                issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
+                issue_link: "https://github.com/meilisearch/product/discussions/625",
            }
            .into())
        }
@ -67,15 +68,36 @@ impl RoFeatures {
            .into())
        }
    }
+
+    pub fn check_puffin(&self) -> Result<()> {
+        if self.runtime.export_puffin_reports {
+            Ok(())
+        } else {
+            Err(FeatureNotEnabledError {
+                disabled_action: "Outputting Puffin reports to disk",
+                feature: "export puffin reports",
+                issue_link: "https://github.com/meilisearch/product/discussions/693",
+            }
+            .into())
+        }
+    }
 }

 impl FeatureData {
    pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
        let mut wtxn = env.write_txn()?;
-        let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
+        let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
        wtxn.commit()?;

-        Ok(Self { runtime: runtime_features, instance: instance_features })
+        let txn = env.read_txn()?;
+        let persisted_features: RuntimeTogglableFeatures =
+            runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
+        let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
+            metrics: instance_features.metrics || persisted_features.metrics,
+            ..persisted_features
+        }));
+
+        Ok(Self { persisted: runtime_features_db, runtime })
    }

    pub fn put_runtime_features(
@ -83,16 +105,25 @@ impl FeatureData {
        mut wtxn: RwTxn,
        features: RuntimeTogglableFeatures,
    ) -> Result<()> {
-        self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
+        self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
        wtxn.commit()?;
+
+        // safe to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for an assignment here.
+        let mut toggled_features = self.runtime.write().unwrap();
+        *toggled_features = features;
        Ok(())
    }

-    fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
-        Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
+    fn runtime_features(&self) -> RuntimeTogglableFeatures {
+        // sound to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for copying the data here
+        *self.runtime.read().unwrap()
    }

-    pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
-        RoFeatures::new(txn, self)
+    pub fn features(&self) -> RoFeatures {
+        RoFeatures::new(self)
    }
 }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -30,6 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        index_mapper,
        features: _,
        max_number_of_tasks: _,
+        puffin_frame: _,
        wake_up: _,
        dumps_path: _,
        snapshots_path: _,
@ -38,6 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
+        currently_updating_index: _,
    } = scheduler;

    let rtxn = env.read_txn().unwrap();
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -27,12 +27,13 @@ mod index_mapper;
 mod insta_snapshot;
 mod lru;
 mod utils;
-mod uuid_codec;
+pub mod uuid_codec;

 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

 use std::collections::{BTreeMap, HashMap};
+use std::fs::File;
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@ -52,6 +53,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
+use puffin::FrameView;
 use roaring::RoaringBitmap;
 use synchronoise::SignalEvent;
 use time::format_description::well_known::Rfc3339;
@ -314,6 +316,9 @@ pub struct IndexScheduler {
    /// the finished tasks automatically.
    pub(crate) max_number_of_tasks: usize,

+    /// A frame to output the indexation profiling files to disk.
+    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
+
    /// The path used to create the dumps.
    pub(crate) dumps_path: PathBuf,

@ -326,6 +331,10 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

+    /// A few types of long running batches of tasks that act on a single index set this field
+    /// so that a handle to the index is available from other threads (search) in an optimized manner.
+    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
+
    // ================= test
    // The next entry is dedicated to the tests.
    /// Provide a way to set a breakpoint in multiple part of the scheduler.
@ -364,10 +373,12 @@ impl IndexScheduler {
            wake_up: self.wake_up.clone(),
            autobatching_enabled: self.autobatching_enabled,
            max_number_of_tasks: self.max_number_of_tasks,
+            puffin_frame: self.puffin_frame.clone(),
            snapshots_path: self.snapshots_path.clone(),
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
            version_file_path: self.version_file_path.clone(),
+            currently_updating_index: self.currently_updating_index.clone(),
            #[cfg(test)]
            test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
            #[cfg(test)]
@ -457,12 +468,14 @@ impl IndexScheduler {
            env,
            // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
            wake_up: Arc::new(SignalEvent::auto(true)),
+            puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
            autobatching_enabled: options.autobatching_enabled,
            max_number_of_tasks: options.max_number_of_tasks,
            dumps_path: options.dumps_path,
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
            version_file_path: options.version_file_path,
+            currently_updating_index: Arc::new(RwLock::new(None)),

            #[cfg(test)]
            test_breakpoint_sdr,
@ -572,17 +585,46 @@ impl IndexScheduler {
                run.wake_up.wait();

                loop {
+                    let puffin_enabled = run.features().check_puffin().is_ok();
+                    puffin::set_scopes_on(puffin_enabled);
+                    puffin::GlobalProfiler::lock().new_frame();
+
                    match run.tick() {
                        Ok(TickOutcome::TickAgain(_)) => (),
                        Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
                        Err(e) => {
-                            log::error!("{}", e);
+                            log::error!("{e}");
                            // Wait one second when an irrecoverable error occurs.
                            if !e.is_recoverable() {
                                std::thread::sleep(Duration::from_secs(1));
                            }
                        }
                    }
+
+                    // Let's write the previous frame to disk but only if
+                    // the user wanted to profile with puffin.
+                    if puffin_enabled {
+                        let mut frame_view = run.puffin_frame.lock();
+                        if !frame_view.is_empty() {
+                            let now = OffsetDateTime::now_utc();
+                            let mut file = match File::create(format!("{}.puffin", now)) {
+                                Ok(file) => file,
+                                Err(e) => {
+                                    log::error!("{e}");
+                                    continue;
+                                }
+                            };
+                            if let Err(e) = frame_view.save_to_writer(&mut file) {
+                                log::error!("{e}");
+                            }
+                            if let Err(e) = file.sync_all() {
+                                log::error!("{e}");
+                            }
+                            // We erase this frame view as it is no more useful. We want to
+                            // measure the new frames now that we exported the previous ones.
+                            *frame_view = FrameView::default();
+                        }
+                    }
                }
            })
            .unwrap();
@ -616,6 +658,13 @@ impl IndexScheduler {
    /// If you need to fetch information from or perform an action on all indexes,
    /// see the `try_for_each_index` function.
    pub fn index(&self, name: &str) -> Result<Index> {
+        if let Some((current_name, current_index)) =
+            self.currently_updating_index.read().unwrap().as_ref()
+        {
+            if current_name == name {
+                return Ok(current_index.clone());
+            }
+        }
        let rtxn = self.env.read_txn()?;
        self.index_mapper.index(&rtxn, name)
    }
@ -1062,8 +1111,6 @@ impl IndexScheduler {
            self.breakpoint(Breakpoint::Start);
        }

-        puffin::GlobalProfiler::lock().new_frame();
-
        self.cleanup_task_queue()?;

        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1099,6 +1146,9 @@ impl IndexScheduler {
            handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
        };

+        // Reset the currently updating index to relinquish the index handle
+        *self.currently_updating_index.write().unwrap() = None;
+
        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;

@ -1259,9 +1309,8 @@ impl IndexScheduler {
        Ok(IndexStats { is_indexing, inner_stats: index_stats })
    }

-    pub fn features(&self) -> Result<RoFeatures> {
-        let rtxn = self.read_txn()?;
-        self.features.features(rtxn)
+    pub fn features(&self) -> RoFeatures {
+        self.features.features()
    }

    pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
 japanese = ["milli/japanese"]
 # thai specialized tokenization
 thai = ["milli/thai"]
-
 # allow greek specialized tokenization
 greek = ["milli/greek"]
+# allow khmer specialized tokenization
+khmer = ["milli/khmer"]
--- a/meilisearch-types/src/features.rs
+++ b/meilisearch-types/src/features.rs
@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
 pub struct RuntimeTogglableFeatures {
    pub score_details: bool,
    pub vector_store: bool,
+    pub metrics: bool,
+    pub export_puffin_reports: bool,
 }

 #[derive(Default, Debug, Clone, Copy)]
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
 pin-project-lite = "0.2.9"
 platform-dirs = "0.3.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-puffin = "0.16.0"
-puffin_http = { version = "0.13.0", optional = true }
+puffin = { version = "0.16.0", features = ["serialization"] }
 rand = "0.8.5"
 rayon = "1.7.0"
 regex = "1.7.3"
@ -135,7 +134,6 @@ zip = { version = "0.6.4", optional = true }
 [features]
 default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
 analytics = ["segment"]
-profile-with-puffin = ["dep:puffin_http"]
 mini-dashboard = [
    "actix-web-static-files",
    "static-files",
@ -152,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"]
 japanese = ["meilisearch-types/japanese"]
 thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
+khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -114,10 +114,7 @@ pub fn create_app(
        .configure(routes::configure)
        .configure(|s| dashboard(s, enable_dashboard));

-    let app = app.wrap(actix_web::middleware::Condition::new(
-        opt.experimental_enable_metrics,
-        middleware::RouteMetrics,
-    ));
+    let app = app.wrap(middleware::RouteMetrics);
    app.wrap(
        Cors::default()
            .send_wildcard()
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@ -30,10 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
 async fn main() -> anyhow::Result<()> {
    let (opt, config_read_from) = Opt::try_build()?;

-    #[cfg(feature = "profile-with-puffin")]
-    let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
-    puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
-
    anyhow::ensure!(
        !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
        "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
--- a/meilisearch/src/middleware.rs
+++ b/meilisearch/src/middleware.rs
@ -3,8 +3,10 @@
 use std::future::{ready, Ready};

 use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
+use actix_web::web::Data;
 use actix_web::Error;
 use futures_util::future::LocalBoxFuture;
+use index_scheduler::IndexScheduler;
 use prometheus::HistogramTimer;

 pub struct RouteMetrics;
@ -47,19 +49,27 @@ where

    fn call(&self, req: ServiceRequest) -> Self::Future {
        let mut histogram_timer: Option<HistogramTimer> = None;
-        let request_path = req.path();
-        let is_registered_resource = req.resource_map().has_resource(request_path);
-        if is_registered_resource {
-            let request_method = req.method().to_string();
-            histogram_timer = Some(
-                crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+
+        // calling unwrap here is safe because index scheduler is added to app data while creating actix app.
+        // also, the tests will fail if this is not present.
+        let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
+        let features = index_scheduler.features();
+
+        if features.check_metrics().is_ok() {
+            let request_path = req.path();
+            let is_registered_resource = req.resource_map().has_resource(request_path);
+            if is_registered_resource {
+                let request_method = req.method().to_string();
+                histogram_timer = Some(
+                    crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+                        .with_label_values(&[&request_method, request_path])
+                        .start_timer(),
+                );
+                crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
                    .with_label_values(&[&request_method, request_path])
-                    .start_timer(),
-            );
-            crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
-                .with_label_values(&[&request_method, request_path])
-                .inc();
-        }
+                    .inc();
+            }
+        };

        let fut = self.service.call(req);

--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@ -29,12 +29,12 @@ async fn get_features(
    >,
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
-) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+) -> HttpResponse {
+    let features = index_scheduler.features();

    analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
    debug!("returns: {:?}", features.runtime_features());
-    Ok(HttpResponse::Ok().json(features.runtime_features()))
+    HttpResponse::Ok().json(features.runtime_features())
 }

 #[derive(Debug, Deserr)]
@ -44,6 +44,10 @@ pub struct RuntimeTogglableFeatures {
    pub score_details: Option<bool>,
    #[deserr(default)]
    pub vector_store: Option<bool>,
+    #[deserr(default)]
+    pub metrics: Option<bool>,
+    #[deserr(default)]
+    pub export_puffin_reports: Option<bool>,
 }

 async fn patch_features(
@ -55,26 +59,36 @@ async fn patch_features(
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    let old_features = features.runtime_features();
-
    let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
        score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
        vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
+        metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
+        export_puffin_reports: new_features
+            .0
+            .export_puffin_reports
+            .unwrap_or(old_features.export_puffin_reports),
    };

    // explicitly destructure for analytics rather than using the `Serialize` implementation, because
    // the it renames to camelCase, which we don't want for analytics.
    // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
-    let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
-        new_features;
+    let meilisearch_types::features::RuntimeTogglableFeatures {
+        score_details,
+        vector_store,
+        metrics,
+        export_puffin_reports,
+    } = new_features;

    analytics.publish(
        "Experimental features Updated".to_string(),
        json!({
            "score_details": score_details,
            "vector_store": vector_store,
+            "metrics": metrics,
+            "export_puffin_reports": export_puffin_reports,
        }),
        Some(&req),
    );
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -68,7 +68,7 @@ pub async fn search(
    }

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result = tokio::task::spawn_blocking(move || {
        perform_facet_search(&index, search_query, facet_query, facet_name, features)
    })
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -157,7 +157,7 @@ pub async fn search_with_url_query(
    let mut aggregate = SearchAggregator::from_query(&query, &req);

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result =
        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
    if let Ok(ref search_result) = search_result {
@ -192,7 +192,7 @@ pub async fn search_with_post(

    let index = index_scheduler.index(&index_uid)?;

-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result =
        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
    if let Ok(ref search_result) = search_result {
--- a/meilisearch/src/routes/metrics.rs
+++ b/meilisearch/src/routes/metrics.rs
@ -19,7 +19,7 @@ pub async fn get_metrics(
    index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
    auth_controller: Data<AuthController>,
 ) -> Result<HttpResponse, ResponseError> {
-    index_scheduler.features()?.check_metrics()?;
+    index_scheduler.features().check_metrics()?;
    let auth_filters = index_scheduler.filters();
    if !auth_filters.all_indexes_authorized() {
        let mut error = ResponseError::from(AuthenticationError::InvalidToken);
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
    let queries = params.into_inner().queries;

    let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
    // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
--- a/meilisearch/tests/auth/authorization.rs
+++ b/meilisearch/tests/auth/authorization.rs
@ -2,10 +2,12 @@ use std::collections::{HashMap, HashSet};

 use ::time::format_description::well_known::Rfc3339;
 use maplit::{hashmap, hashset};
+use meilisearch::Opt;
 use once_cell::sync::Lazy;
+use tempfile::TempDir;
 use time::{Duration, OffsetDateTime};

-use crate::common::{Server, Value};
+use crate::common::{default_settings, Server, Value};
 use crate::json;

 pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
@ -195,7 +197,9 @@ async fn access_authorized_master_key() {

 #[actix_rt::test]
 async fn access_authorized_restricted_index() {
-    let mut server = Server::new_auth().await;
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
    for ((method, route), actions) in AUTHORIZATIONS.iter() {
        for action in actions {
            // create a new API key letting only the needed action.
--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@ -5,9 +5,11 @@ pub mod service;

 use std::fmt::{self, Display};

+#[allow(unused)]
 pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
 use meili_snap::json_string;
 use serde::{Deserialize, Serialize};
+#[allow(unused)]
 pub use server::{default_settings, Server};

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
--- a/meilisearch/tests/common/server.rs
+++ b/meilisearch/tests/common/server.rs
@ -202,6 +202,10 @@ impl Server {
    pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
        self.service.patch("/experimental-features", value).await
    }
+
+    pub async fn get_metrics(&self) -> (Value, StatusCode) {
+        self.service.get("/metrics").await
+    }
 }

 pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@ -221,7 +225,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
            skip_index_budget: true,
            ..Parser::parse_from(None as Option<&str>)
        },
-        experimental_enable_metrics: true,
+        experimental_enable_metrics: false,
        ..Parser::parse_from(None as Option<&str>)
    }
 }
--- a/meilisearch/tests/features/mod.rs
+++ b/meilisearch/tests/features/mod.rs
@ -1,4 +1,7 @@
-use crate::common::Server;
+use meilisearch::Opt;
+use tempfile::TempDir;
+
+use crate::common::{default_settings, Server};
 use crate::json;

 /// Feature name to test against.
@ -16,7 +19,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": false
+      "vectorStore": false,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -26,7 +31,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -36,7 +43,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -47,7 +56,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -58,11 +69,73 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);
 }

+#[actix_rt::test]
+async fn experimental_feature_metrics() {
+    // instance flag for metrics enables metrics at startup
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let server = Server::new_with_options(enable_metrics).await.unwrap();
+
+    let (response, code) = server.get_features().await;
+
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "scoreDetails": false,
+      "vectorStore": false,
+      "metrics": true,
+      "exportPuffinReports": false
+    }
+    "###);
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+
+    // metrics are not returned in json format
+    // so the test server will return null
+    meili_snap::snapshot!(response, @"null");
+
+    // disabling metrics results in invalid request
+    let (response, code) = server.set_features(json!({"metrics": false})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"false");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"400 Bad Request");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
+      "code": "feature_not_enabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
+    }
+    "###);
+
+    // enabling metrics via HTTP results in valid request
+    let (response, code) = server.set_features(json!({"metrics": true})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"true");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+
+    // startup without flag respects persisted metrics value
+    let disable_metrics =
+        Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
+    let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
+    let (response, code) = server_no_flag.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+}
+
 #[actix_rt::test]
 async fn errors() {
    let server = Server::new().await;
@ -73,7 +146,7 @@ async fn errors() {
    meili_snap::snapshot!(code, @"400 Bad Request");
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
-      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
+      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
      "code": "bad_request",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#bad_request"
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@ -0,0 +1,241 @@
+use meili_snap::snapshot;
+use once_cell::sync::Lazy;
+
+use crate::common::{Server, Value};
+use crate::json;
+
+pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+      {
+        "id": 1,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Brown"
+      },
+      {
+        "id": 2,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Black"
+      },
+      {
+        "id": 3,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Blue"
+      },
+      {
+        "id": 4,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Red"
+      },
+      {
+        "id": 5,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Blue"
+      },
+      {
+        "id": 6,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "Black"
+      },
+      {
+        "id": 7,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "White"
+      },
+      {
+        "id": 8,
+        "description": "Hoodie",
+        "brand": "Puma",
+        "product_id": "987654",
+        "color": "Gray"
+      },
+      {
+        "id": 9,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Green"
+      },
+      {
+        "id": 10,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Red"
+      },
+      {
+        "id": 11,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Blue"
+      },
+      {
+        "id": 12,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Indigo"
+      },
+      {
+        "id": 13,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Black"
+      },
+      {
+        "id": 14,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Stone Wash"
+      }
+    ])
+});
+
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4078
+#[actix_rt::test]
+async fn distinct_search_with_offset_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["estimatedTotalHits"] , @"11");
+
+    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["estimatedTotalHits"], @"10");
+
+    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"1");
+    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+}
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4130
+#[actix_rt::test]
+async fn distinct_search_with_pagination_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"0");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["page"], @"1");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["page"], @"3");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"4");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"3");
+    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"2");
+    snapshot!(response["totalHits"], @"6");
+}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@ -1,6 +1,7 @@
 // This modules contains all the test concerning search. Each particular feature of the search
 // should be tested in its own module to isolate tests and keep the tests readable.

+mod distinct;
 mod errors;
 mod facet_search;
 mod formatted;
@ -816,7 +817,7 @@ async fn experimental_feature_score_details() {
                      },
                      "proximity": {
                        "order": 2,
-                        "score": 0.875
+                        "score": 0.75
                      },
                      "attribute": {
                        "order": 3,
--- a/meilitool/Cargo.toml
+++ b/meilitool/Cargo.toml
@ -0,0 +1,19 @@
+[package]
+name = "meilitool"
+description = "A CLI to edit a Meilisearch database from the command line"
+version.workspace = true
+authors.workspace = true
+homepage.workspace = true
+readme.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow = "1.0.75"
+clap = { version = "4.2.1", features = ["derive"] }
+dump = { path = "../dump" }
+file-store = { path = "../file-store" }
+meilisearch-auth = { path = "../meilisearch-auth" }
+meilisearch-types = { path = "../meilisearch-types" }
+time = { version = "0.3.30", features = ["formatting"] }
+uuid = { version = "1.5.0", features = ["v4"], default-features = false }
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@ -0,0 +1,312 @@
+use std::fs::{read_dir, read_to_string, remove_file, File};
+use std::io::BufWriter;
+use std::path::PathBuf;
+
+use anyhow::Context;
+use clap::{Parser, Subcommand};
+use dump::{DumpWriter, IndexMetadata};
+use file_store::FileStore;
+use meilisearch_auth::AuthController;
+use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
+use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
+use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
+use meilisearch_types::milli::{obkv_to_json, BEU32};
+use meilisearch_types::tasks::{Status, Task};
+use meilisearch_types::versioning::check_version_file;
+use meilisearch_types::Index;
+use time::macros::format_description;
+use time::OffsetDateTime;
+use uuid_codec::UuidCodec;
+
+mod uuid_codec;
+
+#[derive(Parser)]
+#[command(author, version, about, long_about = None)]
+struct Cli {
+    /// The database path where the Meilisearch is running.
+    #[arg(long, default_value = "data.ms/")]
+    db_path: PathBuf,
+
+    #[command(subcommand)]
+    command: Command,
+}
+
+#[derive(Subcommand)]
+enum Command {
+    /// Clears the task queue and make it empty.
+    ///
+    /// This command can be safely executed even if Meilisearch is running and processing tasks.
+    /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
+    /// even the ones that were processing. However, it's highly possible that you see the processing
+    /// tasks in the queue again with an associated internal error message.
+    ClearTaskQueue,
+
+    /// Exports a dump from the Meilisearch database.
+    ///
+    /// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
+    /// If tasks are being processed while a dump is being exported there are chances for the dump to be
+    /// malformed with missing tasks.
+    ///
+    /// TODO Verify this claim or make sure it cannot happen and we can export dumps
+    ///      without caring about killing Meilisearch first!
+    ExportADump {
+        /// The directory in which the dump will be created.
+        #[arg(long, default_value = "dumps/")]
+        dump_dir: PathBuf,
+
+        /// Skip dumping the enqueued or processing tasks.
+        ///
+        /// Can be useful when there are a lot of them and it is not particularly useful
+        /// to keep them. Note that only the enqueued tasks takes up space so skipping
+        /// the processed ones is not particularly interesting.
+        #[arg(long)]
+        skip_enqueued_tasks: bool,
+    },
+}
+
+fn main() -> anyhow::Result<()> {
+    let Cli { db_path, command } = Cli::parse();
+
+    check_version_file(&db_path).context("While checking the version file")?;
+
+    match command {
+        Command::ClearTaskQueue => clear_task_queue(db_path),
+        Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
+            export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
+        }
+    }
+}
+
+/// Clears the task queue located at `db_path`.
+fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
+    let path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&path)
+        .with_context(|| format!("While trying to open {:?}", path.display()))?;
+
+    eprintln!("Deleting tasks from the database...");
+
+    let mut wtxn = env.write_txn()?;
+    let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
+    let total = all_tasks.len(&wtxn)?;
+    let status = try_opening_poly_database(&env, &wtxn, "status")?;
+    let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
+    let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
+    let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
+    let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
+    let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
+    let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
+
+    try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
+    try_clearing_poly_database(&mut wtxn, status, "status")?;
+    try_clearing_poly_database(&mut wtxn, kind, "kind")?;
+    try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
+    try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
+    try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
+    try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
+    try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
+
+    wtxn.commit().context("While committing the transaction")?;
+
+    eprintln!("Successfully deleted {total} tasks from the tasks database!");
+    eprintln!("Deleting the content files from disk...");
+
+    let mut count = 0usize;
+    let update_files = db_path.join("update_files");
+    let entries = read_dir(&update_files).with_context(|| {
+        format!("While trying to read the content of {:?}", update_files.display())
+    })?;
+    for result in entries {
+        match result {
+            Ok(ent) => match remove_file(ent.path()) {
+                Ok(_) => count += 1,
+                Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
+            },
+            Err(e) => {
+                eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
+            }
+        }
+    }
+
+    eprintln!("Sucessfully deleted {count} content files from disk!");
+
+    Ok(())
+}
+
+fn try_opening_database<KC: 'static, DC: 'static>(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<Database<KC, DC>> {
+    env.open_database(rtxn, Some(db_name))
+        .with_context(|| format!("While opening the {db_name:?} database"))?
+        .with_context(|| format!("Missing the {db_name:?} database"))
+}
+
+fn try_opening_poly_database(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<PolyDatabase> {
+    env.open_poly_database(rtxn, Some(db_name))
+        .with_context(|| format!("While opening the {db_name:?} poly database"))?
+        .with_context(|| format!("Missing the {db_name:?} poly database"))
+}
+
+fn try_clearing_poly_database(
+    wtxn: &mut RwTxn,
+    database: PolyDatabase,
+    db_name: &str,
+) -> anyhow::Result<()> {
+    database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
+}
+
+/// Exports a dump into the dump directory.
+fn export_a_dump(
+    db_path: PathBuf,
+    dump_dir: PathBuf,
+    skip_enqueued_tasks: bool,
+) -> Result<(), anyhow::Error> {
+    let started_at = OffsetDateTime::now_utc();
+
+    // 1. Extracts the instance UID from disk
+    let instance_uid_path = db_path.join("instance-uid");
+    let instance_uid = match read_to_string(&instance_uid_path) {
+        Ok(content) => match content.trim().parse() {
+            Ok(uuid) => Some(uuid),
+            Err(e) => {
+                eprintln!("Impossible to parse instance-uid: {e}");
+                None
+            }
+        },
+        Err(e) => {
+            eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
+            None
+        }
+    };
+
+    let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
+    let file_store =
+        FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&index_scheduler_path)
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    eprintln!("Dumping the keys...");
+
+    // 2. dump the keys
+    let auth_store = AuthController::new(&db_path, &None)
+        .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
+    let mut dump_keys = dump.create_keys()?;
+    let mut count = 0;
+    for key in auth_store.list_keys()? {
+        dump_keys.push_key(&key)?;
+        count += 1;
+    }
+    dump_keys.flush()?;
+
+    eprintln!("Successfully dumped {count} keys!");
+
+    let rtxn = env.read_txn()?;
+    let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
+        try_opening_database(&env, &rtxn, "all-tasks")?;
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &rtxn, "index-mapping")?;
+
+    if skip_enqueued_tasks {
+        eprintln!("Skip dumping the enqueued tasks...");
+    } else {
+        eprintln!("Dumping the enqueued tasks...");
+
+        // 3. dump the tasks
+        let mut dump_tasks = dump.create_tasks_queue()?;
+        let mut count = 0;
+        for ret in all_tasks.iter(&rtxn)? {
+            let (_, t) = ret?;
+            let status = t.status;
+            let content_file = t.content_uuid();
+            let mut dump_content_file = dump_tasks.push_task(&t.into())?;
+
+            // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
+            if let Some(content_file_uuid) = content_file {
+                if status == Status::Enqueued {
+                    let content_file = file_store.get_update(content_file_uuid)?;
+
+                    let reader =
+                        DocumentsBatchReader::from_reader(content_file).with_context(|| {
+                            format!("While reading content file {:?}", content_file_uuid)
+                        })?;
+
+                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
+                    while let Some(doc) = cursor.next_document().with_context(|| {
+                        format!("While iterating on content file {:?}", content_file_uuid)
+                    })? {
+                        dump_content_file
+                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
+                    }
+                    dump_content_file.flush()?;
+                    count += 1;
+                }
+            }
+        }
+        dump_tasks.flush()?;
+
+        eprintln!("Successfully dumped {count} enqueued tasks!");
+    }
+
+    eprintln!("Dumping the indexes...");
+
+    // 4. Dump the indexes
+    let mut count = 0;
+    for result in index_mapping.iter(&rtxn)? {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+        let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
+            format!("While trying to open the index at path {:?}", index_path.display())
+        })?;
+
+        let rtxn = index.read_txn()?;
+        let metadata = IndexMetadata {
+            uid: uid.to_owned(),
+            primary_key: index.primary_key(&rtxn)?.map(String::from),
+            created_at: index.created_at(&rtxn)?,
+            updated_at: index.updated_at(&rtxn)?,
+        };
+        let mut index_dumper = dump.create_index(uid, &metadata)?;
+
+        let fields_ids_map = index.fields_ids_map(&rtxn)?;
+        let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
+
+        // 4.1. Dump the documents
+        for ret in index.all_documents(&rtxn)? {
+            let (_id, doc) = ret?;
+            let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+            index_dumper.push_document(&document)?;
+        }
+
+        // 4.2. Dump the settings
+        let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
+        index_dumper.settings(&settings)?;
+        count += 1;
+    }
+
+    eprintln!("Successfully dumped {count} indexes!");
+    // We will not dump experimental feature settings
+    eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
+
+    let dump_uid = started_at.format(format_description!(
+        "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
+    )).unwrap();
+
+    let path = dump_dir.join(format!("{}.dump", dump_uid));
+    let file = File::create(&path)?;
+    dump.persist_to(BufWriter::new(file))?;
+
+    eprintln!("Dump exported at path {:?}", path.display());
+
+    Ok(())
+}
--- a/meilitool/src/uuid_codec.rs
+++ b/meilitool/src/uuid_codec.rs
@ -0,0 +1,24 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+
+use meilisearch_types::heed::{BytesDecode, BytesEncode};
+use uuid::Uuid;
+
+/// A heed codec for value of struct Uuid.
+pub struct UuidCodec;
+
+impl<'a> BytesDecode<'a> for UuidCodec {
+    type DItem = Uuid;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        bytes.try_into().ok().map(Uuid::from_bytes)
+    }
+}
+
+impl BytesEncode<'_> for UuidCodec {
+    type EItem = Uuid;
+
+    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
+        Some(Cow::Borrowed(item.as_bytes()))
+    }
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.3", default-features = false }
+charabia = { version = "0.8.5", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
 deserr = { version = "0.6.0", features = ["actix-web"]}
@ -82,7 +82,7 @@ md5 = "0.7.0"
 rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
-all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
+all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
@ -106,3 +106,6 @@ thai = ["charabia/thai"]

 # allow greek specialized tokenization
 greek = ["charabia/greek"]
+
+# allow khmer specialized tokenization
+khmer = ["charabia/khmer"]
--- a/milli/src/documents/enriched.rs
+++ b/milli/src/documents/enriched.rs
@ -1,4 +1,5 @@
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, str};

 use obkv::KvReader;
@ -19,14 +20,14 @@ use crate::FieldId;
 pub struct EnrichedDocumentsBatchReader<R> {
    documents: DocumentsBatchReader<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
    pub fn new(
        documents: DocumentsBatchReader<R>,
        primary_key: String,
-        external_ids: grenad::Reader<File>,
+        external_ids: grenad::Reader<BufReader<File>>,
    ) -> Result<Self, Error> {
        if documents.documents_count() as u64 == external_ids.len() {
            Ok(EnrichedDocumentsBatchReader {
@ -75,7 +76,7 @@ pub struct EnrichedDocument<'a> {
 pub struct EnrichedDocumentsBatchCursor<R> {
    documents: DocumentsBatchCursor<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R> EnrichedDocumentsBatchCursor<R> {
--- a/milli/src/proximity.rs
+++ b/milli/src/proximity.rs
@ -2,7 +2,7 @@ use std::cmp;

 use crate::{relative_from_absolute_position, Position};

-pub const MAX_DISTANCE: u32 = 8;
+pub const MAX_DISTANCE: u32 = 4;

 pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
    if lhs <= rhs {
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
 use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
-pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
+pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
 use roaring::RoaringBitmap;
 use serde_json::Value;

--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
-pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
+pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
 use self::new::PartialSearchResult;
 use crate::error::UserError;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -53,11 +53,22 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                if excluded.contains(docid) {
                    continue;
                }
+
                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
                results.push(docid);
            }
+
            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
+            // drain the results of the skipped elements
+            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
+            // e.g. estimatedTotalHits is correct.
+            if results.len() >= from {
+                results.drain(..from);
+            } else {
+                results.clear();
+            }
+
            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
                docids: results,
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -434,7 +434,18 @@ pub fn execute_search(
        let mut search = Search::default();
        let docids = match ctx.index.vector_hnsw(ctx.txn)? {
            Some(hnsw) => {
+                if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
+                    if vector.len() != expected_size {
+                        return Err(UserError::InvalidVectorDimensions {
+                            expected: expected_size,
+                            found: vector.len(),
+                        }
+                        .into());
+                    }
+                }
+
                let vector = NDotProductPoint::new(vector.clone());
+
                let neighbors = hnsw.search(&vector, &mut search);

                let mut docids = Vec::new();
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@ -29,7 +29,7 @@ use std::hash::Hash;
 pub use cheapest_paths::PathVisitor;
 pub use condition_docids_cache::ConditionDocIdsCache;
 pub use dead_ends_cache::DeadEndsCache;
-pub use exactness::{ExactnessCondition, ExactnessGraph};
+pub use exactness::ExactnessGraph;
 pub use fid::{FidCondition, FidGraph};
 pub use position::{PositionCondition, PositionGraph};
 pub use proximity::{ProximityCondition, ProximityGraph};
--- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs
@ -1,6 +1,7 @@
 #![allow(clippy::too_many_arguments)]

 use super::ProximityCondition;
+use crate::proximity::MAX_DISTANCE;
 use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::SearchContext;
@ -35,7 +36,7 @@ pub fn build_edges(
    }

    let mut conditions = vec![];
-    for cost in right_ngram_max..(7 + right_ngram_max) {
+    for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
        conditions.push((
            cost as u32,
            conditions_interner.insert(ProximityCondition::Uninit {
@ -47,7 +48,7 @@ pub fn build_edges(
    }

    conditions.push((
-        (7 + right_ngram_max) as u32,
+        ((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
        conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
    ));

--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@ -273,7 +273,7 @@ fn test_proximity_simple() {
    s.terms_matching_strategy(TermsMatchingStrategy::All);
    s.query("the quick brown fox jumps over the lazy dog");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
    [
@ -282,11 +282,11 @@ fn test_proximity_simple() {
        "\"the quickbrown fox jumps over the lazy dog\"",
        "\"the really quick brown fox jumps over the lazy dog\"",
        "\"the really quick brown fox jumps over the very lazy dog\"",
-        "\"brown quick fox jumps over the lazy dog\"",
        "\"the quick brown fox jumps over the lazy. dog\"",
        "\"dog the quick brown fox jumps over the lazy\"",
-        "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
+        "\"brown quick fox jumps over the lazy dog\"",
        "\"the. quick brown fox jumps over the lazy. dog\"",
+        "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
    ]
    "###);
 }
@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best s");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -382,9 +382,9 @@ fn test_proximity_prefix_db() {
        "\"summer best\"",
        "\"this is the best meal of summer\"",
        "\"summer x best\"",
-        "\"this is the best meal of the summer\"",
        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best cooked meal of the summer\"",
+        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best su");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
        "\"summer best\"",
        "\"this is the best meal of summer\"",
        "\"summer x best\"",
+        "\"this is the best meal I have ever had in such a beautiful summer day\"",
+        "\"this is the best cooked meal of the summer\"",
        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
-        "\"this is the best cooked meal of the summer\"",
-        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);
@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wint");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
        "\"winter best\"",
        "\"this is the best meal of winter\"",
        "\"winter x best\"",
+        "\"this is the best meal I have ever had in such a beautiful winter day\"",
+        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
-        "\"this is the best cooked meal of the winter\"",
-        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);

@ -471,7 +471,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wi");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -481,9 +481,9 @@ fn test_proximity_prefix_db() {
        "\"winter best\"",
        "\"this is the best meal of winter\"",
        "\"winter x best\"",
-        "\"this is the best meal of the winter\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
+        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
    ]
    "###);
--- a/milli/src/search/new/tests/proximity_typo.rs
+++ b/milli/src/search/new/tests/proximity_typo.rs
@ -68,8 +68,8 @@ fn test_trap_basic() {
        [
            Proximity(
                Rank {
-                    rank: 8,
-                    max_rank: 8,
+                    rank: 4,
+                    max_rank: 4,
                },
            ),
            Typo(
@ -82,8 +82,8 @@ fn test_trap_basic() {
        [
            Proximity(
                Rank {
-                    rank: 8,
-                    max_rank: 8,
+                    rank: 4,
+                    max_rank: 4,
                },
            ),
            Typo(
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness-4.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness-4.snap
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsexactness__proximity_after_exactness.snap
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 57,
-                    max_rank: 57,
+                    rank: 25,
+                    max_rank: 25,
                },
            ),
        ],
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 56,
-                    max_rank: 57,
+                    rank: 24,
+                    max_rank: 25,
                },
            ),
        ],
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 35,
-                    max_rank: 57,
+                    rank: 9,
+                    max_rank: 25,
                },
            ),
        ],
@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 22,
-                    max_rank: 22,
+                    rank: 10,
+                    max_rank: 10,
                },
            ),
        ],
@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 21,
-                    max_rank: 22,
+                    rank: 9,
+                    max_rank: 10,
                },
            ),
        ],
@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 17,
-                    max_rank: 22,
+                    rank: 5,
+                    max_rank: 10,
                },
            ),
        ],
@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
            ),
            Proximity(
                Rank {
-                    rank: 17,
-                    max_rank: 22,
+                    rank: 5,
+                    max_rank: 10,
                },
            ),
        ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-11.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-11.snap
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
 expression: "format!(\"{document_scores:#?}\")"
 ---
 [
-    [
-        Proximity(
-            Rank {
-                rank: 8,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 7,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
                rank: 4,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 3,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +39,31 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +55,15 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -71,7 +63,15 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-5.snap
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
 expression: "format!(\"{document_scores:#?}\")"
 ---
 [
-    [
-        Proximity(
-            Rank {
-                rank: 8,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 7,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 6,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
-    [
-        Proximity(
-            Rank {
-                rank: 5,
-                max_rank: 8,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
                rank: 4,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 3,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 2,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -71,7 +47,31 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
@ -7,7 +7,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -15,7 +15,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -47,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -55,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -63,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-2.snap
@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-5.snap
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_split_word-8.snap
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 8,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_basic_and_complex1-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_basic_and_complex1-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 5,
-                max_rank: 8,
+                rank: 1,
+                max_rank: 4,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 7,
-                max_rank: 8,
+                rank: 3,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_complex2-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewteststypo_proximity__trap_complex2-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 15,
+                rank: 4,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 50,
-                max_rank: 50,
+                rank: 22,
+                max_rank: 22,
            },
        ),
    ],
@ -24,132 +24,6 @@ expression: "format!(\"{document_scores:#?}\")"
                max_matching_words: 9,
            },
        ),
-        Proximity(
-            Rank {
-                rank: 50,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 49,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 49,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 48,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 41,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 9,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 40,
-                max_rank: 50,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 8,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 43,
-                max_rank: 43,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 7,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 36,
-                max_rank: 36,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 7,
-                max_matching_words: 9,
-            },
-        ),
-        Proximity(
-            Rank {
-                rank: 31,
-                max_rank: 36,
-            },
-        ),
-    ],
-    [
-        Words(
-            Words {
-                matching_words: 5,
-                max_matching_words: 9,
-            },
-        ),
        Proximity(
            Rank {
                rank: 22,
@ -160,14 +34,126 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Words(
            Words {
-                matching_words: 4,
+                matching_words: 9,
                max_matching_words: 9,
            },
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 21,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 21,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 20,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 17,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 9,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 16,
+                max_rank: 22,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 8,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 19,
+                max_rank: 19,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 7,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 16,
+                max_rank: 16,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 7,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 13,
+                max_rank: 16,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 5,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -180,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -194,8 +180,22 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
+            },
+        ),
+    ],
+    [
+        Words(
+            Words {
+                matching_words: 4,
+                max_matching_words: 9,
+            },
+        ),
+        Proximity(
+            Rank {
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 8,
-                max_rank: 8,
+                rank: 4,
+                max_rank: 4,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_phrase-5.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 42,
-                max_rank: 43,
+                rank: 18,
+                max_rank: 19,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 42,
-                max_rank: 43,
+                rank: 18,
+                max_rank: 19,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 41,
-                max_rank: 43,
+                rank: 17,
+                max_rank: 19,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 34,
-                max_rank: 43,
+                rank: 14,
+                max_rank: 19,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 33,
-                max_rank: 43,
+                rank: 13,
+                max_rank: 19,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 36,
-                max_rank: 36,
+                rank: 16,
+                max_rank: 16,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 29,
-                max_rank: 29,
+                rank: 13,
+                max_rank: 13,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 24,
-                max_rank: 29,
+                rank: 10,
+                max_rank: 13,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-2.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 51,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 48,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 50,
-                max_rank: 50,
+                rank: 22,
+                max_rank: 22,
            },
        ),
    ],
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 43,
-                max_rank: 43,
+                rank: 19,
+                max_rank: 19,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 38,
-                max_rank: 43,
+                rank: 16,
+                max_rank: 19,
            },
        ),
    ],
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 29,
-                max_rank: 29,
+                rank: 13,
+                max_rank: 13,
            },
        ),
    ],
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 22,
-                max_rank: 22,
+                rank: 10,
+                max_rank: 10,
            },
        ),
    ],
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 15,
-                max_rank: 15,
+                rank: 7,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-5.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_proximity_tms_last_simple-5.snap
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 18,
+                max_rank: 25,
            },
        ),
    ],
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 45,
-                max_rank: 57,
+                rank: 18,
+                max_rank: 25,
            },
        ),
    ],
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 50,
+                rank: 19,
+                max_rank: 22,
            },
        ),
    ],
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 40,
-                max_rank: 43,
+                rank: 16,
+                max_rank: 19,
            },
        ),
    ],
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 35,
-                max_rank: 43,
+                rank: 13,
+                max_rank: 19,
            },
        ),
    ],
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 26,
-                max_rank: 29,
+                rank: 10,
+                max_rank: 13,
            },
        ),
    ],
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 19,
-                max_rank: 22,
+                rank: 7,
+                max_rank: 10,
            },
        ),
    ],
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
        ),
        Proximity(
            Rank {
-                rank: 13,
-                max_rank: 15,
+                rank: 5,
+                max_rank: 7,
            },
        ),
    ],
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_tms_all-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestswords_tms__words_tms_all-2.snap
@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 57,
-                max_rank: 57,
+                rank: 25,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 56,
-                max_rank: 57,
+                rank: 24,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 55,
-                max_rank: 57,
+                rank: 23,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 54,
-                max_rank: 57,
+                rank: 22,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 53,
-                max_rank: 57,
+                rank: 21,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 52,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 51,
-                max_rank: 57,
+                rank: 20,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 48,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
    [
        Proximity(
            Rank {
-                rank: 47,
-                max_rank: 57,
+                rank: 19,
+                max_rank: 25,
            },
        ),
    ],
@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
        Proximity(
            Rank {
                rank: 1,
-                max_rank: 57,
+                max_rank: 25,
            },
        ),
    ],
--- a/milli/src/search/new/tests/stop_words.rs
+++ b/milli/src/search/new/tests/stop_words.rs
@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
            ),
            Proximity(
                Rank {
-                    rank: 7,
-                    max_rank: 8,
+                    rank: 3,
+                    max_rank: 4,
                },
            ),
            Fid(
@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
            ),
            Proximity(
                Rank {
-                    rank: 6,
-                    max_rank: 8,
+                    rank: 2,
+                    max_rank: 4,
                },
            ),
            Fid(
--- a/milli/src/search/new/tests/words_tms.rs
+++ b/milli/src/search/new/tests/words_tms.rs
@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

    // 7 is better than 6 because of the proximity between "the" and its surrounding terms
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
        "\"the quick brown fox jumps over the lazy\"",
        "\"the quick brown fox jumps over the\"",
@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

    // 10 is better than 9 because of the proximity between "quick" and "brown"
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
        "\"the great quick brown fox jumps over the lazy dog\"",
        "\"the quick brown fox jumps over the really lazy dog\"",
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
-        "\"this quick brown and scary fox jumps over the lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
        "\"the quick brown fox jumps over the lazy\"",
        "\"the quick brown fox jumps over the\"",
@ -427,7 +427,7 @@ fn test_words_tms_all() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();

-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
@ -439,10 +439,10 @@ fn test_words_tms_all() {
        "\"the mighty and quick brown fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the lazy dog\"",
        "\"the brown quick fox jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy dog\"",
-        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
    ]
    "###);
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@ -108,15 +108,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        self.delete_document(docid);
        Some(docid)
    }
-    pub fn execute(self) -> Result<DocumentDeletionResult> {
-        puffin::profile_function!();

+    pub fn execute(self) -> Result<DocumentDeletionResult> {
        let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
            self.execute_inner()?;

        Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
    }
+
    pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
+        puffin::profile_function!();
+
        self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;

        // We retrieve the current documents ids that are in the database.
@ -476,6 +478,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
            + for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
    {
+        puffin::profile_function!();
+
        while let Some(result) = iter.next() {
            let (bytes, mut docids) = result?;
            let previous_len = docids.len();
@ -498,6 +502,8 @@ fn remove_from_word_prefix_docids(
    db: &Database<Str, RoaringBitmapCodec>,
    to_remove: &RoaringBitmap,
 ) -> Result<fst::Set<Vec<u8>>> {
+    puffin::profile_function!();
+
    let mut prefixes_to_delete = fst::SetBuilder::memory();

    // We iterate over the word prefix docids database and remove the deleted documents ids
@ -528,6 +534,8 @@ fn remove_from_word_docids(
    words_to_keep: &mut BTreeSet<String>,
    words_to_remove: &mut BTreeSet<String>,
 ) -> Result<()> {
+    puffin::profile_function!();
+
    // We create an iterator to be able to get the content and delete the word docids.
    // It's faster to acquire a cursor to get and delete or put, as we avoid traversing
    // the LMDB B-Tree two times but only once.
@ -559,6 +567,8 @@ fn remove_docids_from_field_id_docid_facet_value(
    field_id: FieldId,
    to_remove: &RoaringBitmap,
 ) -> heed::Result<HashSet<Vec<u8>>> {
+    puffin::profile_function!();
+
    let db = match facet_type {
        FacetType::String => {
            index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
@ -594,6 +604,8 @@ fn remove_docids_from_facet_id_docids<'a, C>(
 where
    C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
 {
+    puffin::profile_function!();
+
    let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
    while let Some(result) = iter.next() {
        let (bytes, mut docids) = result?;
--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@ -1,5 +1,6 @@
 use std::borrow::Cow;
 use std::fs::File;
+use std::io::BufReader;

 use grenad::CompressionType;
 use heed::types::ByteSlice;
@ -30,7 +31,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    new_data: Option<grenad::Reader<File>>,
+    new_data: Option<grenad::Reader<BufReader<File>>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@ -38,7 +39,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        new_data: grenad::Reader<File>,
+        new_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@ -187,7 +188,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        &self,
        field_id: FieldId,
        txn: &RoTxn,
-    ) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
+    ) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
        let mut all_docids = RoaringBitmap::new();
        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
            for bitmap in bitmaps {
@ -259,7 +260,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        field_id: u16,
        level: u8,
        handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
-    ) -> Result<Vec<grenad::Reader<File>>> {
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
        if level == 0 {
            self.read_level_0(rtxn, field_id, handle_group)?;
            // Level 0 is already in the database
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@ -1,5 +1,6 @@
 use std::collections::HashMap;
 use std::fs::File;
+use std::io::BufReader;

 use heed::types::{ByteSlice, DecodeIgnore};
 use heed::{BytesDecode, Error, RoTxn, RwTxn};
@ -34,14 +35,14 @@ pub struct FacetsUpdateIncremental<'i> {
    index: &'i Index,
    inner: FacetsUpdateIncrementalInner,
    facet_type: FacetType,
-    new_data: grenad::Reader<File>,
+    new_data: grenad::Reader<BufReader<File>>,
 }

 impl<'i> FacetsUpdateIncremental<'i> {
    pub fn new(
        index: &'i Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<File>,
+        new_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -78,6 +78,7 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;

 use std::collections::BTreeSet;
 use std::fs::File;
+use std::io::BufReader;
 use std::iter::FromIterator;

 use charabia::normalizer::{Normalize, NormalizerOption};
@ -108,13 +109,17 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    new_data: grenad::Reader<File>,
+    new_data: grenad::Reader<BufReader<File>>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
 }
 impl<'i> FacetsUpdate<'i> {
-    pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
+    pub fn new(
+        index: &'i Index,
+        facet_type: FacetType,
+        new_data: grenad::Reader<BufReader<File>>,
+    ) -> Self {
        let database = match facet_type {
            FacetType::String => index
                .facet_id_string_docids
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@ -1,4 +1,4 @@
-use std::io::{Read, Seek};
+use std::io::{BufWriter, Read, Seek};
 use std::result::Result as StdResult;
 use std::{fmt, iter};

@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(

    let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();

-    let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
+    let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
    let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];

    // The primary key *field id* that has already been set for this index or the one
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -1,6 +1,7 @@
 use std::collections::{HashMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, mem, str};

 use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
@ -31,7 +32,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
-) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
+) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
    puffin::profile_function!();

    let max_positions_per_attributes = max_positions_per_attributes
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use heed::{BytesDecode, BytesEncode};

@ -19,7 +19,7 @@ use crate::Result;
 pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
    docid_fid_facet_number: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use heed::BytesEncode;

@ -17,7 +17,7 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
 pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
    docid_fid_facet_string: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -1,7 +1,7 @@
 use std::collections::{BTreeMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};
 use std::mem::size_of;

 use heed::zerocopy::AsBytes;
@ -17,11 +17,11 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET

 /// The extracted facet values stored in grenad files by type.
 pub struct ExtractedFacetValues {
-    pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
-    pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
-    pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
-    pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
-    pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
+    pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
+    pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
 }

 /// Extracts the facet values of each faceted field of each document.
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@ -1,6 +1,6 @@
 use std::collections::HashMap;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use grenad::Sorter;

@ -21,7 +21,7 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
 pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use concat_arrays::concat_arrays;
 use serde_json::Value;
@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    primary_key_id: FieldId,
    (lat_fid, lng_fid): (FieldId, FieldId),
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let mut writer = create_writer(
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@ -1,6 +1,6 @@
 use std::convert::TryFrom;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use bytemuck::cast_slice;
 use serde_json::{from_slice, Value};
@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    primary_key_id: FieldId,
    vectors_fid: FieldId,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let mut writer = create_writer(
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -1,6 +1,6 @@
 use std::collections::HashSet;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};
 use std::iter::FromIterator;

 use roaring::RoaringBitmap;
@ -26,7 +26,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
    exact_attributes: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
+) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use super::helpers::{
    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
@ -14,7 +14,7 @@ use crate::{relative_from_absolute_position, DocumentId, Result};
 pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@ -1,6 +1,7 @@
 use std::cmp::Ordering;
 use std::collections::{BinaryHeap, HashMap};
 use std::fs::File;
+use std::io::BufReader;
 use std::{cmp, io, mem, str, vec};

 use super::helpers::{
@ -20,7 +21,7 @@ use crate::{DocumentId, Result};
 pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use super::helpers::{
    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
@ -17,7 +17,7 @@ use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Resu
 pub fn extract_word_position_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -12,6 +12,7 @@ mod extract_word_position_docids;

 use std::collections::HashSet;
 use std::fs::File;
+use std::io::BufReader;

 use crossbeam_channel::Sender;
 use log::debug;
@ -39,8 +40,8 @@ use crate::{FieldId, Result};
 /// Send data in grenad file over provided Sender.
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn data_from_obkv_documents(
-    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
-    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
+    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
+    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: Option<HashSet<FieldId>>,
@ -152,7 +153,7 @@ pub(crate) fn data_from_obkv_documents(
        });
    }

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@ -162,7 +163,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-pair-proximity-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@ -172,7 +173,11 @@ pub(crate) fn data_from_obkv_documents(
        "field-id-wordcount-docids",
    );

-    spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
+    spawn_extraction_task::<
+        _,
+        _,
+        Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
+    >(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@ -185,7 +190,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@ -194,7 +199,7 @@ pub(crate) fn data_from_obkv_documents(
        TypedChunk::WordPositionDocids,
        "word-position-docids",
    );
-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_word_positions_chunks,
        indexer,
        lmdb_writer_sx.clone(),
@ -204,7 +209,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-fid-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_fid_facet_strings_chunks,
        indexer,
        lmdb_writer_sx.clone(),
@ -214,7 +219,7 @@ pub(crate) fn data_from_obkv_documents(
        "field-id-facet-string-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
        docid_fid_facet_numbers_chunks,
        indexer,
        lmdb_writer_sx,
@ -269,7 +274,7 @@ fn spawn_extraction_task<FE, FS, M>(
 /// Extract chunked data and send it into lmdb_writer_sx sender:
 /// - documents
 fn send_original_documents_data(
-    original_documents_chunk: Result<grenad::Reader<File>>,
+    original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    vectors_field_id: Option<FieldId>,
@ -311,7 +316,7 @@ fn send_original_documents_data(
 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
 fn send_and_extract_flattened_documents_data(
-    flattened_documents_chunk: Result<grenad::Reader<File>>,
+    flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: &Option<HashSet<FieldId>>,
@ -328,7 +333,10 @@ fn send_and_extract_flattened_documents_data(
        grenad::Reader<CursorClonableMmap>,
        (
            grenad::Reader<CursorClonableMmap>,
-            (grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
+            (
+                grenad::Reader<BufReader<File>>,
+                (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
+            ),
        ),
    ),
 )> {
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::fs::File;
-use std::io::{self, Seek};
+use std::io::{self, BufReader, BufWriter, Seek};
 use std::time::Instant;

 use grenad::{CompressionType, Sorter};
@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
    typ: grenad::CompressionType,
    level: Option<u32>,
    file: R,
-) -> grenad::Writer<R> {
+) -> grenad::Writer<BufWriter<R>> {
    let mut builder = grenad::Writer::builder();
    builder.compression_type(typ);
    if let Some(level) = level {
        builder.compression_level(level);
    }
-    builder.build(file)
+    builder.build(BufWriter::new(file))
 }

 pub fn create_sorter(
@ -53,7 +53,7 @@ pub fn create_sorter(
 pub fn sorter_into_reader(
    sorter: grenad::Sorter<MergeFn>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    let mut writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
@ -64,16 +64,18 @@ pub fn sorter_into_reader(
    writer_into_reader(writer)
 }

-pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
-    let mut file = writer.into_inner()?;
+pub fn writer_into_reader(
+    writer: grenad::Writer<BufWriter<File>>,
+) -> Result<grenad::Reader<BufReader<File>>> {
+    let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
    file.rewind()?;
-    grenad::Reader::new(file).map_err(Into::into)
+    grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
 }

 pub unsafe fn as_cloneable_grenad(
-    reader: &grenad::Reader<File>,
+    reader: &grenad::Reader<BufReader<File>>,
 ) -> Result<grenad::Reader<CursorClonableMmap>> {
-    let file = reader.get_ref();
+    let file = reader.get_ref().get_ref();
    let mmap = memmap2::Mmap::map(file)?;
    let cursor = io::Cursor::new(ClonableMmap::from(mmap));
    let reader = grenad::Reader::new(cursor)?;
@ -89,8 +91,8 @@ where
    fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
 }

-impl MergeableReader for Vec<grenad::Reader<File>> {
-    type Output = grenad::Reader<File>;
+impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
+    type Output = grenad::Reader<BufReader<File>>;

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut merger = MergerBuilder::new(merge_fn);
@ -99,8 +101,8 @@ impl MergeableReader for Vec<grenad::Reader<File>> {
    }
 }

-impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
-    type Output = (grenad::Reader<File>, grenad::Reader<File>);
+impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
+    type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut m1 = MergerBuilder::new(merge_fn);
@ -125,7 +127,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
        Ok(())
    }

-    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
+    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
        let merger = self.0.build();
        let mut writer = create_writer(
            params.chunk_compression_type,
@ -176,7 +178,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
    reader: grenad::Reader<R>,
    indexer: GrenadParameters,
    documents_chunk_size: usize,
-) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
+) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
    let mut continue_reading = true;
    let mut cursor = reader.into_cursor()?;

--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@ -14,7 +14,7 @@ pub use grenad_helpers::{
 };
 pub use merge_functions::{
    concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
-    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
+    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
    serialize_roaring_bitmap, MergeFn,
 };

--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -20,10 +20,7 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{
-    extract_finite_float_from_value, validate_document_id, validate_document_id_value,
-    validate_geo_from_json, DocumentId,
-};
+pub use self::enrich::{extract_finite_float_from_value, DocumentId};
 pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@ -659,8 +659,10 @@ impl<'a, 'i> Transform<'a, 'i> {
            new_documents_ids: self.new_documents_ids,
            replaced_documents_ids: self.replaced_documents_ids,
            documents_count: self.documents_count,
-            original_documents,
-            flattened_documents,
+            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
+            flattened_documents: flattened_documents
+                .into_inner()
+                .map_err(|err| err.into_error())?,
        })
    }

@ -779,8 +781,10 @@ impl<'a, 'i> Transform<'a, 'i> {
            new_documents_ids: documents_ids,
            replaced_documents_ids: RoaringBitmap::default(),
            documents_count,
-            original_documents,
-            flattened_documents,
+            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
+            flattened_documents: flattened_documents
+                .into_inner()
+                .map_err(|err| err.into_error())?,
        };

        let new_facets = output.compute_real_facets(wtxn, self.index)?;
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -2,7 +2,7 @@ use std::borrow::Cow;
 use std::collections::HashMap;
 use std::convert::TryInto;
 use std::fs::File;
-use std::io;
+use std::io::{self, BufReader};

 use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
@ -27,22 +27,22 @@ pub(crate) enum TypedChunk {
    FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
    FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
    Documents(grenad::Reader<CursorClonableMmap>),
-    FieldIdWordcountDocids(grenad::Reader<File>),
+    FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
    NewDocumentsIds(RoaringBitmap),
    WordDocids {
-        word_docids_reader: grenad::Reader<File>,
-        exact_word_docids_reader: grenad::Reader<File>,
+        word_docids_reader: grenad::Reader<BufReader<File>>,
+        exact_word_docids_reader: grenad::Reader<BufReader<File>>,
    },
-    WordPositionDocids(grenad::Reader<File>),
-    WordFidDocids(grenad::Reader<File>),
-    WordPairProximityDocids(grenad::Reader<File>),
-    FieldIdFacetStringDocids(grenad::Reader<File>),
-    FieldIdFacetNumberDocids(grenad::Reader<File>),
-    FieldIdFacetExistsDocids(grenad::Reader<File>),
-    FieldIdFacetIsNullDocids(grenad::Reader<File>),
-    FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
-    GeoPoints(grenad::Reader<File>),
-    VectorPoints(grenad::Reader<File>),
+    WordPositionDocids(grenad::Reader<BufReader<File>>),
+    WordFidDocids(grenad::Reader<BufReader<File>>),
+    WordPairProximityDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
+    FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
+    GeoPoints(grenad::Reader<BufReader<File>>),
+    VectorPoints(grenad::Reader<BufReader<File>>),
    ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
 }

--- a/milli/src/update/prefix_word_pairs/mod.rs
+++ b/milli/src/update/prefix_word_pairs/mod.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::collections::HashSet;
-use std::io::BufReader;
+use std::io::{BufReader, BufWriter};

 use grenad::CompressionType;
 use heed::types::ByteSlice;
@ -119,9 +119,9 @@ pub fn insert_into_database(
 pub fn write_into_lmdb_database_without_merging(
    wtxn: &mut heed::RwTxn,
    database: heed::PolyDatabase,
-    writer: grenad::Writer<std::fs::File>,
+    writer: grenad::Writer<BufWriter<std::fs::File>>,
 ) -> Result<()> {
-    let file = writer.into_inner()?;
+    let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
    let reader = grenad::Reader::new(BufReader::new(file))?;
    if database.is_empty(wtxn)? {
        let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/word_prefix_pair_proximity_docids.snap
@ -20,7 +20,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [100, ]
 3  rings            a    [101, ]
 3  the              a    [101, ]
-4  at               b    [100, ]
-4  at               be   [100, ]
-4  bell             a    [101, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_pair_proximity_docids.snap
@ -30,10 +30,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  bell             5                [101, ]
 3  rings            am               [101, ]
 3  the              at               [101, ]
-4  an               house            [100, ]
-4  at               beautiful        [100, ]
-4  bell             am               [101, ]
-4  the              5                [101, ]
-5  at               house            [100, ]
-5  the              am               [101, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_prefix_pair_proximity_docids.snap
@ -28,8 +28,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  rings            a    [101, ]
 3  rings            am   [101, ]
 3  the              a    [101, ]
-4  at               b    [100, ]
-4  at               be   [100, ]
-4  bell             a    [101, ]
-4  bell             am   [101, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
@ -7,5 +7,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 2  bell             a    [51, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
@ -7,5 +7,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 2  bell             a    [51, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/initial/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/reupdate/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/reupdate/word_prefix_pair_proximity_docids.snap
@ -16,6 +16,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  at               b    [50, ]
-4  bell             a    [51, ]

--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/soft_delete_and_reupdate/second_delete/word_prefix_pair_proximity_docids.snap
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
 3  at               a    [50, ]
 3  rings            a    [51, ]
 3  the              a    [51, ]
-4  bell             a    [51, ]

--- a/milli/tests/search/distinct.rs
+++ b/milli/tests/search/distinct.rs
@ -8,7 +8,7 @@ use Criterion::*;
 use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

 macro_rules! test_distinct {
-    ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
+    ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
        #[test]
        fn $func() {
            let criteria = $criteria;
@ -27,6 +27,7 @@ macro_rules! test_distinct {
            let mut search = Search::new(&rtxn, &index);
            search.query(search::TEST_QUERY);
            search.limit($limit);
+            search.offset($offset);
            search.exhaustive_number_hits($exhaustive);

            search.terms_matching_strategy(TermsMatchingStrategy::default());
@ -47,6 +48,7 @@ macro_rules! test_distinct {
                            Some(d.id)
                        }
                    })
+                    .skip($offset)
                    .take($limit)
                    .collect();

@ -61,6 +63,7 @@ test_distinct!(
    tag,
    true,
    1,
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    3
 );
@ -69,6 +72,7 @@ test_distinct!(
    asc_desc_rank,
    true,
    1,
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    7
 );
@ -77,6 +81,7 @@ test_distinct!(
    asc_desc_rank,
    true,
    0,
+    0,
    vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
    7
 );
@ -86,6 +91,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    3
 );
@ -94,6 +100,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    7
 );
@ -102,6 +109,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words],
    3
 );
@ -110,6 +118,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words],
    7
 );
@ -118,6 +127,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo],
    3
 );
@ -126,6 +136,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Typo],
    7
 );
@ -134,6 +145,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Proximity],
    3
 );
@ -142,6 +154,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Proximity],
    7
 );
@ -150,6 +163,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Attribute],
    3
 );
@ -158,6 +172,7 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Attribute],
    7
 );
@ -166,6 +181,7 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Exactness],
    3
 );
@ -174,6 +190,47 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
+    0,
    vec![Words, Exactness],
    7
 );
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    distinct_string_limit_and_offset,
+    tag,
+    false,
+    EXTERNAL_DOCUMENTS_IDS.len(),
+    1,
+    vec![],
+    3
+);
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    exhaustive_distinct_string_limit_and_offset,
+    tag,
+    true,
+    1,
+    2,
+    vec![],
+    3
+);
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    distinct_number_limit_and_offset,
+    asc_desc_rank,
+    false,
+    EXTERNAL_DOCUMENTS_IDS.len(),
+    2,
+    vec![],
+    7
+);
+test_distinct!(
+    // testing: https://github.com/meilisearch/meilisearch/issues/4078
+    exhaustive_distinct_number_limit_and_offset,
+    asc_desc_rank,
+    true,
+    2,
+    4,
+    vec![],
+    7
+);