Don't use the last version of clap

Modify the Dockerfile to compile meilisearch and meilitool
Introduce a new flag to skip dumping enqueued/processing tasks
2025-12-24 05:16:59 +00:00 · 2023-10-30 16:57:31 +01:00 · 2023-10-30 16:32:17 +01:00 · 2023-10-30 14:32:10 +01:00 · 2023-10-30 14:31:55 +01:00 · 2023-10-30 14:30:20 +01:00
40 changed files with 1483 additions and 674 deletions
--- a/.github/workflows/benchmarks-pr.yml
+++ b/.github/workflows/benchmarks-pr.yml
@@ -90,8 +90,7 @@ jobs:
          set -x
          export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
          export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
-          export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
-          echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
+          echo 'Here are your benchmarks diff 👊' >> body.txt
          echo '```' >> body.txt
          ./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
          echo '```' >> body.txt
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@@ -50,7 +50,7 @@ jobs:
    needs: check-version
    steps:
      - name: Create PR to Homebrew
-        uses: mislav/bump-homebrew-formula-action@v3
+        uses: mislav/bump-homebrew-formula-action@v2
        with:
          formula-name: meilisearch
          formula-path: Formula/m/meilisearch.rb
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@@ -63,7 +63,7 @@ jobs:
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
--- a/.github/workflows/sdks-tests.yml
+++ b/.github/workflows/sdks-tests.yml
@@ -160,7 +160,7 @@ jobs:
        with:
          repository: meilisearch/meilisearch-js
      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v3
        with:
          cache: 'yarn'
      - name: Install dependencies
@@ -318,7 +318,7 @@ jobs:
        with:
          repository: meilisearch/meilisearch-js-plugins
      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v3
        with:
          cache: yarn
      - name: Install dependencies
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -43,7 +43,7 @@ jobs:
          toolchain: nightly
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.7.1
+        uses: Swatinem/rust-cache@v2.6.2
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@@ -65,7 +65,7 @@ jobs:
    steps:
      - uses: actions/checkout@v3
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.7.1
+        uses: Swatinem/rust-cache@v2.6.2
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@@ -149,7 +149,7 @@ jobs:
          toolchain: stable
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.7.1
+        uses: Swatinem/rust-cache@v2.6.2
      - name: Run tests in debug
        uses: actions-rs/cargo@v1
        with:
@@ -168,7 +168,7 @@ jobs:
          override: true
          components: clippy
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.7.1
+        uses: Swatinem/rust-cache@v2.6.2
      - name: Run cargo clippy
        uses: actions-rs/cargo@v1
        with:
@@ -187,7 +187,7 @@ jobs:
          override: true
          components: rustfmt
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.7.1
+        uses: Swatinem/rust-cache@v2.6.2
      - name: Run cargo fmt
        # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
        # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,6 +2,7 @@
 resolver = "2"
 members = [
    "meilisearch",
+    "meilitool",
    "meilisearch-types",
    "meilisearch-auth",
    "meili-snap",
@@ -18,7 +19,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.1"
+version = "1.5.0"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
@@ -28,7 +29,6 @@ license = "MIT"

 [profile.release]
 codegen-units = 1
-debug = true

 [profile.dev.package.flate2]
 opt-level = 3
--- a/9
+++ b/9
@@ -3,7 +3,7 @@ FROM    rust:alpine3.16 AS compiler

 RUN     apk add -q --update-cache --no-cache build-base openssl-dev

-WORKDIR /meilisearch
+WORKDIR /

 ARG     COMMIT_SHA
 ARG     COMMIT_DATE
@@ -17,7 +17,7 @@ RUN     set -eux; \
        if [ "$apkArch" = "aarch64" ]; then \
            export JEMALLOC_SYS_WITH_LG_PAGE=16; \
        fi && \
-        cargo build --release
+        cargo build --release -p meilisearch -p meilitool

 # Run
 FROM    alpine:3.16
@@ -28,9 +28,10 @@ ENV     MEILI_SERVER_PROVIDER docker
 RUN     apk update --quiet \
        && apk add -q --no-cache libgcc tini curl

-# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
-# to find.
+# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
+# and it's easy to find.
 COPY    --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
+COPY    --from=compiler /meilisearch/target/release/meilitool /bin/meilitool
 # To stay compatible with the older version of the container (pre v0.27.0) we're
 # going to symlink the meilisearch binary in the path to `/meilisearch`
 RUN     ln -s /bin/meilisearch /meilisearch
--- a/README.md
+++ b/README.md
@@ -25,6 +25,12 @@

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

+---
+
+### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
+
+---
+
 Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.

 <p align="center" name="demo">
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@@ -526,12 +526,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), @r###"
+        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "2022-10-09T20:27:22.688964637Z",
-          "updatedAt": "2022-10-09T20:27:23.951017769Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -541,12 +541,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), @r###"
+        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "2022-10-09T20:27:22.197788495Z",
-          "updatedAt": "2022-10-09T20:28:01.93111053Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -571,12 +571,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), @r###"
+        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "2022-10-09T20:27:24.242683494Z",
-          "updatedAt": "2022-10-09T20:27:24.312809641Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -617,12 +617,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), @r###"
+        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "2023-01-30T16:25:56.595257Z",
-          "updatedAt": "2023-01-30T16:25:58.70348Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -632,12 +632,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), @r###"
+        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "2023-01-30T16:25:56.192178Z",
-          "updatedAt": "2023-01-30T16:25:56.455714Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -647,12 +647,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), @r###"
+        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "2023-01-30T16:25:58.876405Z",
-          "updatedAt": "2023-01-30T16:25:59.079906Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

--- a/dump/src/reader/v2/mod.rs
+++ b/dump/src/reader/v2/mod.rs
@@ -46,7 +46,6 @@ pub type Checked = settings::Checked;
 pub type Unchecked = settings::Unchecked;

 pub type Task = updates::UpdateEntry;
-pub type Kind = updates::UpdateMeta;

 // everything related to the errors
 pub type ResponseError = errors::ResponseError;
@@ -108,11 +107,8 @@ impl V2Reader {
    pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
        Ok(self.index_uuid.iter().map(|index| -> Result<_> {
            V2IndexReader::new(
+                index.uid.clone(),
                &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
-                index,
-                BufReader::new(
-                    File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
-                ),
            )
        }))
    }
@@ -147,41 +143,16 @@ pub struct V2IndexReader {
 }

 impl V2IndexReader {
-    pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
+    pub fn new(name: String, path: &Path) -> Result<Self> {
        let meta = File::open(path.join("meta.json"))?;
        let meta: DumpMeta = serde_json::from_reader(meta)?;

-        let mut created_at = None;
-        let mut updated_at = None;
-
-        for line in tasks.lines() {
-            let task: Task = serde_json::from_str(&line?)?;
-            if !(task.uuid == index_uuid.uuid && task.is_finished()) {
-                continue;
-            }
-
-            let new_created_at = match task.update.meta() {
-                Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
-                _ => None,
-            };
-            let new_updated_at = task.update.finished_at();
-
-            if created_at.is_none() || created_at > new_created_at {
-                created_at = new_created_at;
-            }
-
-            if updated_at.is_none() || updated_at < new_updated_at {
-                updated_at = new_updated_at;
-            }
-        }
-
-        let current_time = OffsetDateTime::now_utc();
-
        let metadata = IndexMetadata {
-            uid: index_uuid.uid.clone(),
+            uid: name,
            primary_key: meta.primary_key,
-            created_at: created_at.unwrap_or(current_time),
-            updated_at: updated_at.unwrap_or(current_time),
+            // FIXME: Iterate over the whole task queue to find the creation and last update date.
+            created_at: OffsetDateTime::now_utc(),
+            updated_at: OffsetDateTime::now_utc(),
        };

        let ret = V2IndexReader {
@@ -277,12 +248,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), @r###"
+        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "2022-10-09T20:27:22.688964637Z",
-          "updatedAt": "2022-10-09T20:27:23.951017769Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -292,12 +263,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), @r###"
+        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "2022-10-09T20:27:22.197788495Z",
-          "updatedAt": "2022-10-09T20:28:01.93111053Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -322,12 +293,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), @r###"
+        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "2022-10-09T20:27:24.242683494Z",
-          "updatedAt": "2022-10-09T20:27:24.312809641Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -369,12 +340,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), @r###"
+        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "2023-01-30T16:25:56.595257Z",
-          "updatedAt": "2023-01-30T16:25:58.70348Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -384,12 +355,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), @r###"
+        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "2023-01-30T16:25:56.192178Z",
-          "updatedAt": "2023-01-30T16:25:56.455714Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

@@ -399,12 +370,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), @r###"
+        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "2023-01-30T16:25:58.876405Z",
-          "updatedAt": "2023-01-30T16:25:59.079906Z"
+          "createdAt": "[now]",
+          "updatedAt": "[now]"
        }
        "###);

--- a/dump/src/reader/v2/updates.rs
+++ b/dump/src/reader/v2/updates.rs
@@ -227,14 +227,4 @@ impl UpdateStatus {
            _ => None,
        }
    }
-
-    pub fn finished_at(&self) -> Option<OffsetDateTime> {
-        match self {
-            UpdateStatus::Processing(_) => None,
-            UpdateStatus::Enqueued(_) => None,
-            UpdateStatus::Processed(u) => Some(u.processed_at),
-            UpdateStatus::Aborted(_) => None,
-            UpdateStatus::Failed(u) => Some(u.failed_at),
-        }
-    }
 }
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -12,7 +12,6 @@ license.workspace = true

 [dependencies]
 anyhow = "1.0.70"
-backtrace = "0.3.69"
 bincode = "1.3.3"
 csv = "1.2.1"
 derive_builder = "0.12.0"
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -825,10 +825,6 @@ impl IndexScheduler {
                // 2. dump the tasks
                let mut dump_tasks = dump.create_tasks_queue()?;
                for ret in self.all_tasks.iter(&rtxn)? {
-                    if self.must_stop_processing.get() {
-                        return Err(Error::AbortedTask);
-                    }
-
                    let (_, mut t) = ret?;
                    let status = t.status;
                    let content_file = t.content_uuid();
@@ -849,9 +845,6 @@ impl IndexScheduler {

                    // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
                    if let Some(content_file) = content_file {
-                        if self.must_stop_processing.get() {
-                            return Err(Error::AbortedTask);
-                        }
                        if status == Status::Enqueued {
                            let content_file = self.file_store.get_update(content_file)?;

@@ -891,9 +884,6 @@ impl IndexScheduler {

                    // 3.1. Dump the documents
                    for ret in index.all_documents(&rtxn)? {
-                        if self.must_stop_processing.get() {
-                            return Err(Error::AbortedTask);
-                        }
                        let (_id, doc) = ret?;
                        let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
                        index_dumper.push_document(&document)?;
@@ -913,9 +903,6 @@ impl IndexScheduler {
                    "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
                )).unwrap();

-                if self.must_stop_processing.get() {
-                    return Err(Error::AbortedTask);
-                }
                let path = self.dumps_path.join(format!("{}.dump", dump_uid));
                let file = File::create(path)?;
                dump.persist_to(BufWriter::new(file))?;
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@@ -108,8 +108,6 @@ pub enum Error {
    TaskDeletionWithEmptyQuery,
    #[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
    TaskCancelationWithEmptyQuery,
-    #[error("Aborted task")]
-    AbortedTask,

    #[error(transparent)]
    Dump(#[from] dump::Error),
@@ -117,13 +115,8 @@ pub enum Error {
    Heed(#[from] heed::Error),
    #[error(transparent)]
    Milli(#[from] milli::Error),
-    #[error("An unexpected crash occurred when processing the task. {}", {
-        match .0 {
-            Some(report) => format!("Get /reports/{}", report),
-            None => "No report was saved.".into(),
-        }
-    })]
-    ProcessBatchPanicked(Option<uuid::Uuid>),
+    #[error("An unexpected crash occurred when processing the task.")]
+    ProcessBatchPanicked,
    #[error(transparent)]
    FileStore(#[from] file_store::Error),
    #[error(transparent)]
@@ -182,11 +175,10 @@ impl Error {
            | Error::TaskNotFound(_)
            | Error::TaskDeletionWithEmptyQuery
            | Error::TaskCancelationWithEmptyQuery
-            | Error::AbortedTask
            | Error::Dump(_)
            | Error::Heed(_)
            | Error::Milli(_)
-            | Error::ProcessBatchPanicked(_)
+            | Error::ProcessBatchPanicked
            | Error::FileStore(_)
            | Error::IoError(_)
            | Error::Persist(_)
@@ -229,7 +221,7 @@ impl ErrorCode for Error {
            Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
            Error::Dump(e) => e.error_code(),
            Error::Milli(e) => e.error_code(),
-            Error::ProcessBatchPanicked(_) => Code::Internal,
+            Error::ProcessBatchPanicked => Code::Internal,
            Error::Heed(e) => e.error_code(),
            Error::HeedTransaction(e) => e.error_code(),
            Error::FileStore(e) => e.error_code(),
@@ -244,9 +236,6 @@ impl ErrorCode for Error {
            Error::TaskDatabaseUpdate(_) => Code::Internal,
            Error::CreateBatch(_) => Code::Internal,

-            // This one should never be seen by the end user
-            Error::AbortedTask => Code::Internal,
-
            #[cfg(test)]
            Error::PlannedFailure => Code::Internal,
        }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -39,7 +39,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
-        panic_reader: _,
    } = scheduler;

    let rtxn = env.read_txn().unwrap();
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -26,9 +26,8 @@ mod index_mapper;
 #[cfg(test)]
 mod insta_snapshot;
 mod lru;
-mod panic_hook;
 mod utils;
-mod uuid_codec;
+pub mod uuid_codec;

 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;
@@ -54,8 +53,6 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
-use panic_hook::ReportReader;
-pub use panic_hook::{Panic, Report, ReportRegistry};
 use puffin::FrameView;
 use roaring::RoaringBitmap;
 use synchronoise::SignalEvent;
@@ -334,8 +331,6 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

-    pub(crate) panic_reader: ReportReader,
-
    // ================= test
    // The next entry is dedicated to the tests.
    /// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -386,7 +381,6 @@ impl IndexScheduler {
            #[cfg(test)]
            run_loop_iteration: self.run_loop_iteration.clone(),
            features: self.features.clone(),
-            panic_reader: self.panic_reader.clone(),
        }
    }
 }
@@ -444,12 +438,6 @@ impl IndexScheduler {
        let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
        wtxn.commit()?;

-        const MAX_REPORT_COUNT: usize = 20;
-
-        let panic_reader = panic_hook::ReportReader::install_panic_hook(
-            std::num::NonZeroUsize::new(MAX_REPORT_COUNT).unwrap(),
-        );
-
        // allow unreachable_code to get rids of the warning in the case of a test build.
        let this = Self {
            must_stop_processing: MustStopProcessing::default(),
@@ -490,7 +478,6 @@ impl IndexScheduler {
            #[cfg(test)]
            run_loop_iteration: Arc::new(RwLock::new(0)),
            features,
-            panic_reader,
        };

        this.run();
@@ -1143,10 +1130,7 @@ impl IndexScheduler {
                .name(String::from("batch-operation"))
                .spawn(move || cloned_index_scheduler.process_batch(batch))
                .unwrap();
-
-            self.panic_reader
-                .join_thread(handle)
-                .unwrap_or_else(|maybe_report| Err(Error::ProcessBatchPanicked(maybe_report)))
+            handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
        };

        #[cfg(test)]
@@ -1183,8 +1167,7 @@ impl IndexScheduler {
            // If we have an abortion error we must stop the tick here and re-schedule tasks.
            Err(Error::Milli(milli::Error::InternalError(
                milli::InternalError::AbortedIndexation,
-            )))
-            | Err(Error::AbortedTask) => {
+            ))) => {
                #[cfg(test)]
                self.breakpoint(Breakpoint::AbortedIndexation);
                wtxn.abort().map_err(Error::HeedTransaction)?;
@@ -1327,10 +1310,6 @@ impl IndexScheduler {
        }
    }

-    pub fn reports(&self) -> Arc<RwLock<ReportRegistry>> {
-        self.panic_reader.registry()
-    }
-
    /// Blocks the thread until the test handle asks to progress to/through this breakpoint.
    ///
    /// Two messages are sent through the channel for each breakpoint.
@@ -4344,26 +4323,4 @@ mod tests {
        }
        "###);
    }
-
-    #[test]
-    fn cancel_processing_dump() {
-        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
-
-        let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
-        let dump_cancellation = KindWithContent::TaskCancelation {
-            query: "cancel dump".to_owned(),
-            tasks: RoaringBitmap::from_iter([0]),
-        };
-        let _ = index_scheduler.register(dump_creation).unwrap();
-        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
-        handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
-
-        let _ = index_scheduler.register(dump_cancellation).unwrap();
-        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
-
-        snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
-
-        handle.advance_one_successful_batch();
-        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
-    }
 }
--- a/index-scheduler/src/panic_hook.rs
+++ b/index-scheduler/src/panic_hook.rs
@@ -1,211 +0,0 @@
-//! Panic hook designed to fetch a panic from a subthread and recover it on join.
-
-use std::collections::VecDeque;
-use std::num::NonZeroUsize;
-use std::panic::PanicInfo;
-use std::sync::{Arc, RwLock};
-use std::thread::{JoinHandle, ThreadId};
-
-use backtrace::Backtrace;
-
-// Represents a panic in a shallowy structured fashion
-pub struct Panic {
-    pub payload: Option<String>,
-    pub location: Option<String>,
-    pub thread_name: Option<String>,
-    pub thread_id: ThreadId,
-    pub backtrace: Backtrace,
-}
-
-/// A panic enriched with a unique id
-#[derive(serde::Serialize)]
-pub struct Report {
-    pub id: uuid::Uuid,
-    #[serde(serialize_with = "serialize_panic")]
-    pub panic: Panic,
-}
-
-fn serialize_panic<S>(panic: &Panic, s: S) -> std::result::Result<S::Ok, S::Error>
-where
-    S: serde::Serializer,
-{
-    use serde::Serialize;
-
-    panic.to_json().serialize(s)
-}
-
-impl Report {
-    pub fn new(panic: Panic) -> Self {
-        Self { id: uuid::Uuid::new_v4(), panic }
-    }
-}
-
-impl Panic {
-    pub fn to_json(&self) -> serde_json::Value {
-        json::panic_to_json(self)
-    }
-}
-
-mod json {
-    use backtrace::{Backtrace, BacktraceFrame, BacktraceSymbol};
-    use serde_json::{json, Value};
-
-    use super::Panic;
-
-    fn symbol_to_json(symbol: &BacktraceSymbol) -> Value {
-        let address = symbol.addr().map(|addr| format!("{:p}", addr));
-        let column = symbol.colno();
-        let line = symbol.lineno();
-        let function = symbol.name().map(|name| name.to_string());
-        let filename = symbol.filename();
-        json!({
-            "function": function,
-            "filename": filename,
-            "line": line,
-            "column": column,
-            "address": address,
-        })
-    }
-
-    fn frame_to_json(frame: &BacktraceFrame) -> Value {
-        let symbols: Vec<_> = frame.symbols().iter().map(symbol_to_json).collect();
-        match symbols.as_slice() {
-            [] => {
-                let address = format!("{:p}", frame.ip());
-                json!({"address": address})
-            }
-            [symbol] => json!(symbol),
-            symbols => json!(symbols),
-        }
-    }
-
-    fn backtrace_to_json(backtrace: &Backtrace) -> Value {
-        let frames: Vec<_> = backtrace.frames().iter().map(frame_to_json).collect();
-        json!(frames)
-    }
-
-    pub fn panic_to_json(panic: &Panic) -> Value {
-        let thread_id = format!("{:?}", panic.thread_id);
-        serde_json::json!({
-            "payload": panic.payload,
-            "location": panic.location,
-            "thread": {
-                "id": thread_id,
-                "name": panic.thread_name,
-            },
-            "backtrace": backtrace_to_json(&panic.backtrace),
-        })
-    }
-}
-
-struct ReportWriter(Arc<RwLock<ReportRegistry>>);
-
-/// A FIFO queue of reports.
-pub struct ReportRegistry {
-    reports: std::collections::VecDeque<Report>,
-}
-
-impl ReportRegistry {
-    pub fn new(capacity: NonZeroUsize) -> Self {
-        Self { reports: VecDeque::with_capacity(capacity.get()) }
-    }
-
-    pub fn push(&mut self, report: Report) -> Option<Report> {
-        let popped = if self.reports.len() == self.reports.capacity() {
-            self.reports.pop_back()
-        } else {
-            None
-        };
-        self.reports.push_front(report);
-        popped
-    }
-
-    pub fn iter(&self) -> impl Iterator<Item = &Report> {
-        self.reports.iter()
-    }
-
-    pub fn find(&self, report_id: uuid::Uuid) -> Option<&Report> {
-        self.iter().find(|report| report.id == report_id)
-    }
-}
-
-impl ReportWriter {
-    #[track_caller]
-    fn write_panic(&self, panic_info: &PanicInfo<'_>) {
-        let payload = panic_info
-            .payload()
-            .downcast_ref::<&str>()
-            .map(ToString::to_string)
-            .or_else(|| panic_info.payload().downcast_ref::<String>().cloned());
-        let location = panic_info.location().map(|loc| {
-            format!(
-                "{file}:{line}:{column}",
-                file = loc.file(),
-                line = loc.line(),
-                column = loc.column()
-            )
-        });
-
-        let thread_name = std::thread::current().name().map(ToString::to_string);
-        let thread_id = std::thread::current().id();
-        let backtrace = backtrace::Backtrace::new();
-
-        let panic = Panic { payload, location, thread_name, thread_id, backtrace };
-
-        let report = Report::new(panic);
-
-        log::error!(
-            "An unexpected panic occurred on thread {name} at {location}: {payload}. See report '{report}' for details.",
-            payload = report.panic.payload.as_deref().unwrap_or("Box<dyn Any>"),
-            name = report.panic.thread_name.as_deref().unwrap_or("<unnamed>"),
-            location = report.panic.location.as_deref().unwrap_or("<unknown>"),
-            report = report.id,
-        );
-
-        if let Ok(mut registry) = self.0.write() {
-            if let Some(old_report) = registry.push(report) {
-                log::trace!("Forgetting report {} to make space for new report.", old_report.id)
-            }
-        }
-    }
-}
-
-/// Reads the reports written in case of a panic.
-#[derive(Clone)]
-pub struct ReportReader(Arc<RwLock<ReportRegistry>>);
-
-impl ReportReader {
-    /// Installs a new global panic hook, overriding any existing hook.
-    ///
-    /// The hook writes any incoming panic in reports.
-    /// The reports can then be read by the returned [`ReportReader`].
-    pub fn install_panic_hook(capacity: NonZeroUsize) -> Self {
-        let registry = Arc::new(RwLock::new(ReportRegistry::new(capacity)));
-        let reader = ReportReader(registry.clone());
-        let writer = ReportWriter(registry.clone());
-
-        std::panic::set_hook(Box::new(move |panic_info| writer.write_panic(panic_info)));
-        reader
-    }
-
-    /// Join the thread corresponding to the passed handle, recovering either its value
-    /// or, in case the thread panicked, the id of the report corresponding to the panic.
-    ///
-    /// The id can be used to read the report from the [`self.registry()`].
-    pub fn join_thread<T>(&self, thread: JoinHandle<T>) -> Result<T, Option<uuid::Uuid>> {
-        let thread_id = thread.thread().id();
-        thread.join().map_err(|_e| {
-            self.0
-                .read()
-                .unwrap()
-                .iter()
-                .find(|report| report.panic.thread_id == thread_id)
-                .map(|report| report.id)
-        })
-    }
-
-    /// Returns a registry that can be used to read the reports written during a panic.
-    pub fn registry(&self) -> Arc<RwLock<ReportRegistry>> {
-        self.0.clone()
-    }
-}
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
@@ -1,35 +0,0 @@
---
-source: index-scheduler/src/lib.rs
---
-### Autobatching Enabled = true
-### Processing Tasks:
-[]
----------------------------------------------------------------------
-### All Tasks:
-0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
----------------------------------------------------------------------
-### Status:
-enqueued [0,]
----------------------------------------------------------------------
-### Kind:
-"dumpCreation" [0,]
----------------------------------------------------------------------
-### Index Tasks:
----------------------------------------------------------------------
-### Index Mapper:
-
----------------------------------------------------------------------
-### Canceled By:
-
----------------------------------------------------------------------
-### Enqueued At:
-[timestamp] [0,]
----------------------------------------------------------------------
-### Started At:
----------------------------------------------------------------------
-### Finished At:
----------------------------------------------------------------------
-### File Store:
-
----------------------------------------------------------------------
-
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
@@ -1,45 +0,0 @@
---
-source: index-scheduler/src/lib.rs
---
-### Autobatching Enabled = true
-### Processing Tasks:
-[]
----------------------------------------------------------------------
-### All Tasks:
-0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
-1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
-### Status:
-enqueued []
-succeeded [1,]
-canceled [0,]
----------------------------------------------------------------------
-### Kind:
-"taskCancelation" [1,]
-"dumpCreation" [0,]
----------------------------------------------------------------------
-### Index Tasks:
----------------------------------------------------------------------
-### Index Mapper:
-
----------------------------------------------------------------------
-### Canceled By:
-1 [0,]
-
----------------------------------------------------------------------
-### Enqueued At:
-[timestamp] [0,]
-[timestamp] [1,]
----------------------------------------------------------------------
-### Started At:
-[timestamp] [0,]
-[timestamp] [1,]
----------------------------------------------------------------------
-### Finished At:
-[timestamp] [0,]
-[timestamp] [1,]
----------------------------------------------------------------------
-### File Store:
-
----------------------------------------------------------------------
-
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
@@ -1,38 +0,0 @@
---
-source: index-scheduler/src/lib.rs
---
-### Autobatching Enabled = true
-### Processing Tasks:
-[0,]
----------------------------------------------------------------------
-### All Tasks:
-0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
-1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
-### Status:
-enqueued [0,1,]
----------------------------------------------------------------------
-### Kind:
-"taskCancelation" [1,]
-"dumpCreation" [0,]
----------------------------------------------------------------------
-### Index Tasks:
----------------------------------------------------------------------
-### Index Mapper:
-
----------------------------------------------------------------------
-### Canceled By:
-
----------------------------------------------------------------------
-### Enqueued At:
-[timestamp] [0,]
-[timestamp] [1,]
----------------------------------------------------------------------
-### Started At:
----------------------------------------------------------------------
-### Finished At:
----------------------------------------------------------------------
-### File Store:
-
----------------------------------------------------------------------
-
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
 japanese = ["milli/japanese"]
 # thai specialized tokenization
 thai = ["milli/thai"]
-
 # allow greek specialized tokenization
 greek = ["milli/greek"]
+# allow khmer specialized tokenization
+khmer = ["milli/khmer"]
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -88,6 +88,7 @@ pub trait ErrorCode {
    }
 }

+#[allow(clippy::enum_variant_names)]
 enum ErrorType {
    Internal,
    InvalidRequest,
@@ -297,7 +298,6 @@ MissingSwapIndexes                    , InvalidRequest       , BAD_REQUEST ;
 MissingTaskFilters                    , InvalidRequest       , BAD_REQUEST ;
 NoSpaceLeftOnDevice                   , System               , UNPROCESSABLE_ENTITY;
 PayloadTooLarge                       , InvalidRequest       , PAYLOAD_TOO_LARGE ;
-ReportNotFound                        , InvalidRequest       , NOT_FOUND ;
 TaskNotFound                          , InvalidRequest       , NOT_FOUND ;
 TooManyOpenFiles                      , System               , UNPROCESSABLE_ENTITY ;
 UnretrievableDocument                 , Internal             , BAD_REQUEST ;
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -150,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"]
 japanese = ["meilisearch-types/japanese"]
 thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
+khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@@ -51,8 +51,6 @@ pub enum MeilisearchHttpError {
    DocumentFormat(#[from] DocumentFormatError),
    #[error(transparent)]
    Join(#[from] JoinError),
-    #[error("Report `{0}` not found. Either its id is incorrect, or it was deleted. To save on memory, only a limited amount of reports are kept.")]
-    ReportNotFound(uuid::Uuid),
 }

 impl ErrorCode for MeilisearchHttpError {
@@ -76,7 +74,6 @@ impl ErrorCode for MeilisearchHttpError {
            MeilisearchHttpError::FileStore(_) => Code::Internal,
            MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
            MeilisearchHttpError::Join(_) => Code::Internal,
-            MeilisearchHttpError::ReportNotFound(_) => Code::ReportNotFound,
        }
    }
 }
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -24,7 +24,6 @@ pub mod features;
 pub mod indexes;
 mod metrics;
 mod multi_search;
-mod reports;
 mod snapshot;
 mod swap_indexes;
 pub mod tasks;
@@ -41,8 +40,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
        .service(web::scope("/multi-search").configure(multi_search::configure))
        .service(web::scope("/swap-indexes").configure(swap_indexes::configure))
        .service(web::scope("/metrics").configure(metrics::configure))
-        .service(web::scope("/experimental-features").configure(features::configure))
-        .service(web::scope("/reports").configure(reports::configure));
+        .service(web::scope("/experimental-features").configure(features::configure));
 }

 #[derive(Debug, Serialize)]
--- a/meilisearch/src/routes/reports.rs
+++ b/meilisearch/src/routes/reports.rs
@@ -1,39 +0,0 @@
-use actix_web::web::{self, Data};
-use actix_web::HttpResponse;
-use index_scheduler::{IndexScheduler, Report};
-use meilisearch_types::error::ResponseError;
-use meilisearch_types::keys::actions;
-
-use crate::extractors::authentication::policies::ActionPolicy;
-use crate::extractors::authentication::GuardedData;
-use crate::extractors::sequential_extractor::SeqHandler;
-
-pub fn configure(cfg: &mut web::ServiceConfig) {
-    cfg.service(web::resource("").route(web::get().to(list_reports))).service(
-        web::scope("/{report_uid}")
-            .service(web::resource("").route(web::get().to(SeqHandler(get_report)))),
-    );
-}
-
-pub async fn list_reports(
-    index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
-) -> Result<HttpResponse, ResponseError> {
-    let reports = &index_scheduler.reports();
-    let reports = &reports.read().unwrap();
-    let reports: Vec<&Report> = reports.iter().collect();
-
-    Ok(HttpResponse::Ok().json(reports))
-}
-
-pub async fn get_report(
-    index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
-    report_id: web::Path<uuid::Uuid>,
-) -> Result<HttpResponse, ResponseError> {
-    let reports = &index_scheduler.reports();
-    let reports = &reports.read().unwrap();
-    let report = reports
-        .find(*report_id)
-        .ok_or(crate::error::MeilisearchHttpError::ReportNotFound(*report_id))?;
-
-    Ok(HttpResponse::Ok().json(report))
-}
--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@@ -5,9 +5,11 @@ pub mod service;

 use std::fmt::{self, Display};

+#[allow(unused)]
 pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
 use meili_snap::json_string;
 use serde::{Deserialize, Serialize};
+#[allow(unused)]
 pub use server::{default_settings, Server};

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@@ -6,21 +6,109 @@ use crate::json;

 pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
-        {"productId": 1, "shopId": 1},
-        {"productId": 2, "shopId": 1},
-        {"productId": 3, "shopId": 2},
-        {"productId": 4, "shopId": 2},
-        {"productId": 5, "shopId": 3},
-        {"productId": 6, "shopId": 3},
-        {"productId": 7, "shopId": 4},
-        {"productId": 8, "shopId": 4},
-        {"productId": 9, "shopId": 5},
-        {"productId": 10, "shopId": 5}
+      {
+        "id": 1,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Brown"
+      },
+      {
+        "id": 2,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Black"
+      },
+      {
+        "id": 3,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Blue"
+      },
+      {
+        "id": 4,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Red"
+      },
+      {
+        "id": 5,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Blue"
+      },
+      {
+        "id": 6,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "Black"
+      },
+      {
+        "id": 7,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "White"
+      },
+      {
+        "id": 8,
+        "description": "Hoodie",
+        "brand": "Puma",
+        "product_id": "987654",
+        "color": "Gray"
+      },
+      {
+        "id": 9,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Green"
+      },
+      {
+        "id": 10,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Red"
+      },
+      {
+        "id": 11,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Blue"
+      },
+      {
+        "id": 12,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Indigo"
+      },
+      {
+        "id": 13,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Black"
+      },
+      {
+        "id": 14,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Stone Wash"
+      }
    ])
 });

-pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
-pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";

 /// testing: https://github.com/meilisearch/meilisearch/issues/4078
 #[actix_rt::test]
@@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
    index.wait_task(1).await;

-    fn get_hits(Value(response): Value) -> Vec<i64> {
+    fn get_hits(response: &Value) -> Vec<&str> {
        let hits_array = response["hits"].as_array().unwrap();
-        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
    }

-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[1, 2]");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["estimatedTotalHits"] , @"11");

-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[3, 4]");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["estimatedTotalHits"], @"10");

-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"1");
-    snapshot!(format!("{:?}", hits), @"[5]");
+    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");

-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+}
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4130
+#[actix_rt::test]
+async fn distinct_search_with_pagination_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"0");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["page"], @"1");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["page"], @"3");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"4");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"3");
+    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"2");
+    snapshot!(response["totalHits"], @"6");
 }
--- a/meilitool/Cargo.toml
+++ b/meilitool/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "meilitool"
+description = "A CLI to edit a Meilisearch database from the command line"
+version.workspace = true
+authors.workspace = true
+homepage.workspace = true
+readme.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow = "1.0.75"
+clap = { version = "4.2.1", features = ["derive"] }
+dump = { path = "../dump" }
+file-store = { path = "../file-store" }
+meilisearch-auth = { path = "../meilisearch-auth" }
+meilisearch-types = { path = "../meilisearch-types" }
+time = { version = "0.3.30", features = ["formatting"] }
+uuid = { version = "1.5.0", features = ["v4"], default-features = false }
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -0,0 +1,312 @@
+use std::fs::{read_dir, read_to_string, remove_file, File};
+use std::io::BufWriter;
+use std::path::PathBuf;
+
+use anyhow::Context;
+use clap::{Parser, Subcommand};
+use dump::{DumpWriter, IndexMetadata};
+use file_store::FileStore;
+use meilisearch_auth::AuthController;
+use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
+use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
+use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
+use meilisearch_types::milli::{obkv_to_json, BEU32};
+use meilisearch_types::tasks::{Status, Task};
+use meilisearch_types::versioning::check_version_file;
+use meilisearch_types::Index;
+use time::macros::format_description;
+use time::OffsetDateTime;
+use uuid_codec::UuidCodec;
+
+mod uuid_codec;
+
+#[derive(Parser)]
+#[command(author, version, about, long_about = None)]
+struct Cli {
+    /// The database path where the Meilisearch is running.
+    #[arg(long, default_value = "data.ms/")]
+    db_path: PathBuf,
+
+    #[command(subcommand)]
+    command: Command,
+}
+
+#[derive(Subcommand)]
+enum Command {
+    /// Clears the task queue and make it empty.
+    ///
+    /// This command can be safely executed even if Meilisearch is running and processing tasks.
+    /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
+    /// even the ones that were processing. However, it's highly possible that you see the processing
+    /// tasks in the queue again with an associated internal error message.
+    ClearTaskQueue,
+
+    /// Exports a dump from the Meilisearch database.
+    ///
+    /// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
+    /// If tasks are being processed while a dump is being exported there are chances for the dump to be
+    /// malformed with missing tasks.
+    ///
+    /// TODO Verify this claim or make sure it cannot happen and we can export dumps
+    ///      without caring about killing Meilisearch first!
+    ExportADump {
+        /// The directory in which the dump will be created.
+        #[arg(long, default_value = "dumps/")]
+        dump_dir: PathBuf,
+
+        /// Skip dumping the enqueued or processing tasks.
+        ///
+        /// Can be useful when there are a lot of them and it is not particularly useful
+        /// to keep them. Note that only the enqueued tasks takes up space so skipping
+        /// the processed ones is not particularly interesting.
+        #[arg(long)]
+        skip_enqueued_tasks: bool,
+    },
+}
+
+fn main() -> anyhow::Result<()> {
+    let Cli { db_path, command } = Cli::parse();
+
+    check_version_file(&db_path).context("While checking the version file")?;
+
+    match command {
+        Command::ClearTaskQueue => clear_task_queue(db_path),
+        Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
+            export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
+        }
+    }
+}
+
+/// Clears the task queue located at `db_path`.
+fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
+    let path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&path)
+        .with_context(|| format!("While trying to open {:?}", path.display()))?;
+
+    eprintln!("Deleting tasks from the database...");
+
+    let mut wtxn = env.write_txn()?;
+    let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
+    let total = all_tasks.len(&wtxn)?;
+    let status = try_opening_poly_database(&env, &wtxn, "status")?;
+    let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
+    let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
+    let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
+    let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
+    let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
+    let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
+
+    try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
+    try_clearing_poly_database(&mut wtxn, status, "status")?;
+    try_clearing_poly_database(&mut wtxn, kind, "kind")?;
+    try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
+    try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
+    try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
+    try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
+    try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
+
+    wtxn.commit().context("While committing the transaction")?;
+
+    eprintln!("Successfully deleted {total} tasks from the tasks database!");
+    eprintln!("Deleting the content files from disk...");
+
+    let mut count = 0usize;
+    let update_files = db_path.join("update_files");
+    let entries = read_dir(&update_files).with_context(|| {
+        format!("While trying to read the content of {:?}", update_files.display())
+    })?;
+    for result in entries {
+        match result {
+            Ok(ent) => match remove_file(ent.path()) {
+                Ok(_) => count += 1,
+                Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
+            },
+            Err(e) => {
+                eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
+            }
+        }
+    }
+
+    eprintln!("Sucessfully deleted {count} content files from disk!");
+
+    Ok(())
+}
+
+fn try_opening_database<KC: 'static, DC: 'static>(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<Database<KC, DC>> {
+    env.open_database(rtxn, Some(db_name))
+        .with_context(|| format!("While opening the {db_name:?} database"))?
+        .with_context(|| format!("Missing the {db_name:?} database"))
+}
+
+fn try_opening_poly_database(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<PolyDatabase> {
+    env.open_poly_database(rtxn, Some(db_name))
+        .with_context(|| format!("While opening the {db_name:?} poly database"))?
+        .with_context(|| format!("Missing the {db_name:?} poly database"))
+}
+
+fn try_clearing_poly_database(
+    wtxn: &mut RwTxn,
+    database: PolyDatabase,
+    db_name: &str,
+) -> anyhow::Result<()> {
+    database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
+}
+
+/// Exports a dump into the dump directory.
+fn export_a_dump(
+    db_path: PathBuf,
+    dump_dir: PathBuf,
+    skip_enqueued_tasks: bool,
+) -> Result<(), anyhow::Error> {
+    let started_at = OffsetDateTime::now_utc();
+
+    // 1. Extracts the instance UID from disk
+    let instance_uid_path = db_path.join("instance-uid");
+    let instance_uid = match read_to_string(&instance_uid_path) {
+        Ok(content) => match content.trim().parse() {
+            Ok(uuid) => Some(uuid),
+            Err(e) => {
+                eprintln!("Impossible to parse instance-uid: {e}");
+                None
+            }
+        },
+        Err(e) => {
+            eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
+            None
+        }
+    };
+
+    let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
+    let file_store =
+        FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&index_scheduler_path)
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    eprintln!("Dumping the keys...");
+
+    // 2. dump the keys
+    let auth_store = AuthController::new(&db_path, &None)
+        .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
+    let mut dump_keys = dump.create_keys()?;
+    let mut count = 0;
+    for key in auth_store.list_keys()? {
+        dump_keys.push_key(&key)?;
+        count += 1;
+    }
+    dump_keys.flush()?;
+
+    eprintln!("Successfully dumped {count} keys!");
+
+    let rtxn = env.read_txn()?;
+    let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
+        try_opening_database(&env, &rtxn, "all-tasks")?;
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &rtxn, "index-mapping")?;
+
+    if skip_enqueued_tasks {
+        eprintln!("Skip dumping the enqueued tasks...");
+    } else {
+        eprintln!("Dumping the enqueued tasks...");
+
+        // 3. dump the tasks
+        let mut dump_tasks = dump.create_tasks_queue()?;
+        let mut count = 0;
+        for ret in all_tasks.iter(&rtxn)? {
+            let (_, t) = ret?;
+            let status = t.status;
+            let content_file = t.content_uuid();
+            let mut dump_content_file = dump_tasks.push_task(&t.into())?;
+
+            // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
+            if let Some(content_file_uuid) = content_file {
+                if status == Status::Enqueued {
+                    let content_file = file_store.get_update(content_file_uuid)?;
+
+                    let reader =
+                        DocumentsBatchReader::from_reader(content_file).with_context(|| {
+                            format!("While reading content file {:?}", content_file_uuid)
+                        })?;
+
+                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
+                    while let Some(doc) = cursor.next_document().with_context(|| {
+                        format!("While iterating on content file {:?}", content_file_uuid)
+                    })? {
+                        dump_content_file
+                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
+                    }
+                    dump_content_file.flush()?;
+                    count += 1;
+                }
+            }
+        }
+        dump_tasks.flush()?;
+
+        eprintln!("Successfully dumped {count} enqueued tasks!");
+    }
+
+    eprintln!("Dumping the indexes...");
+
+    // 4. Dump the indexes
+    let mut count = 0;
+    for result in index_mapping.iter(&rtxn)? {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+        let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
+            format!("While trying to open the index at path {:?}", index_path.display())
+        })?;
+
+        let rtxn = index.read_txn()?;
+        let metadata = IndexMetadata {
+            uid: uid.to_owned(),
+            primary_key: index.primary_key(&rtxn)?.map(String::from),
+            created_at: index.created_at(&rtxn)?,
+            updated_at: index.updated_at(&rtxn)?,
+        };
+        let mut index_dumper = dump.create_index(uid, &metadata)?;
+
+        let fields_ids_map = index.fields_ids_map(&rtxn)?;
+        let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
+
+        // 4.1. Dump the documents
+        for ret in index.all_documents(&rtxn)? {
+            let (_id, doc) = ret?;
+            let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+            index_dumper.push_document(&document)?;
+        }
+
+        // 4.2. Dump the settings
+        let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
+        index_dumper.settings(&settings)?;
+        count += 1;
+    }
+
+    eprintln!("Successfully dumped {count} indexes!");
+    // We will not dump experimental feature settings
+    eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
+
+    let dump_uid = started_at.format(format_description!(
+        "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
+    )).unwrap();
+
+    let path = dump_dir.join(format!("{}.dump", dump_uid));
+    let file = File::create(&path)?;
+    dump.persist_to(BufWriter::new(file))?;
+
+    eprintln!("Dump exported at path {:?}", path.display());
+
+    Ok(())
+}
--- a/meilitool/src/uuid_codec.rs
+++ b/meilitool/src/uuid_codec.rs
@@ -0,0 +1,24 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+
+use meilisearch_types::heed::{BytesDecode, BytesEncode};
+use uuid::Uuid;
+
+/// A heed codec for value of struct Uuid.
+pub struct UuidCodec;
+
+impl<'a> BytesDecode<'a> for UuidCodec {
+    type DItem = Uuid;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        bytes.try_into().ok().map(Uuid::from_bytes)
+    }
+}
+
+impl BytesEncode<'_> for UuidCodec {
+    type EItem = Uuid;
+
+    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
+        Some(Cow::Borrowed(item.as_bytes()))
+    }
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.3", default-features = false }
+charabia = { version = "0.8.5", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
 deserr = { version = "0.6.0", features = ["actix-web"]}
@@ -82,7 +82,7 @@ md5 = "0.7.0"
 rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
-all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
+all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
@@ -106,3 +106,6 @@ thai = ["charabia/thai"]

 # allow greek specialized tokenization
 greek = ["charabia/greek"]
+
+# allow khmer specialized tokenization
+khmer = ["charabia/khmer"]
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
 use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
-pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
+pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
 use roaring::RoaringBitmap;
 use serde_json::Value;

--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
-pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
+pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
 use self::new::PartialSearchResult;
 use crate::error::UserError;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        if let Some(distinct_fid) = distinct_fid {
            let mut excluded = RoaringBitmap::new();
            let mut results = vec![];
-            let mut skip = 0;
            for docid in universe.iter() {
-                if results.len() >= length {
+                if results.len() >= from + length {
                    break;
                }
                if excluded.contains(docid) {
@@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                }

                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
-                skip += 1;
-                if skip <= from {
-                    continue;
-                }
-
                results.push(docid);
            }

            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
+            // drain the results of the skipped elements
+            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
+            // e.g. estimatedTotalHits is correct.
+            if results.len() >= from {
+                results.drain(..from);
+            } else {
+                results.clear();
+            }

            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@@ -29,7 +29,7 @@ use std::hash::Hash;
 pub use cheapest_paths::PathVisitor;
 pub use condition_docids_cache::ConditionDocIdsCache;
 pub use dead_ends_cache::DeadEndsCache;
-pub use exactness::{ExactnessCondition, ExactnessGraph};
+pub use exactness::ExactnessGraph;
 pub use fid::{FidCondition, FidGraph};
 pub use position::{PositionCondition, PositionGraph};
 pub use proximity::{ProximityCondition, ProximityGraph};
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@@ -14,7 +14,7 @@ pub use grenad_helpers::{
 };
 pub use merge_functions::{
    concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
-    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
+    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
    serialize_roaring_bitmap, MergeFn,
 };

--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -20,10 +20,7 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{
-    extract_finite_float_from_value, validate_document_id, validate_document_id_value,
-    validate_geo_from_json, DocumentId,
-};
+pub use self::enrich::{extract_finite_float_from_value, DocumentId};
 pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
--- a/milli/tests/search/distinct.rs
+++ b/milli/tests/search/distinct.rs
@@ -202,7 +202,7 @@ test_distinct!(
    EXTERNAL_DOCUMENTS_IDS.len(),
    1,
    vec![],
-    2
+    3
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -212,7 +212,7 @@ test_distinct!(
    1,
    2,
    vec![],
-    1
+    3
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -222,7 +222,7 @@ test_distinct!(
    EXTERNAL_DOCUMENTS_IDS.len(),
    2,
    vec![],
-    5
+    7
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -232,5 +232,5 @@ test_distinct!(
    2,
    4,
    vec![],
-    3
+    7
 );
Author	SHA1	Message	Date
Clément Renault	b57b818b67	Don't use the last version of clap	2023-10-30 16:57:31 +01:00
Clément Renault	f7ea94e5f4	Modify the Dockerfile to compile meilisearch and meilitool	2023-10-30 16:32:17 +01:00
Clément Renault	53382bb1b8	Introduce a new flag to skip dumping enqueued/processing tasks	2023-10-30 14:32:10 +01:00
Clément Renault	5b004a2583	Add more logs to the dump exporter	2023-10-30 14:31:55 +01:00
Clément Renault	13416ccbf7	Introduce a new meilitool to help the cloud team	2023-10-30 14:30:20 +01:00
meili-bors[bot]	2614e7d9ca	Merge #4174 4174: Fix warnings r=dureuill a=irevoire Fix all the warnings found in the CI: https://github.com/meilisearch/meilisearch/actions/runs/6622576021/job/17988323623 Co-authored-by: Tamo <tamo@meilisearch.com>	2023-10-30 10:12:54 +00:00
Tamo	e7244aa485	fix warnings	2023-10-30 11:00:46 +01:00
meili-bors[bot]	9cacc82307	Merge #4169 4169: update charabia r=curquiza a=ManyTheFish Update Charabia to v0.8.5 and add the new khmer tokenizer Co-authored-by: ManyTheFish <many@meilisearch.com>	2023-10-26 17:21:30 +00:00
ManyTheFish	4c6fddb1cb	update charabia	2023-10-26 17:01:10 +02:00
meili-bors[bot]	ca52021079	Merge #4154 4154: Update version for the next release (v1.5.0) in Cargo.toml r=curquiza a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: curquiza <curquiza@users.noreply.github.com>	2023-10-23 12:00:50 +00:00
curquiza	ee6f79d60b	Update version for the next release (v1.5.0) in Cargo.toml	2023-10-23 11:49:07 +00:00
meili-bors[bot]	e4c24ca6a3	Merge #4151 4151: Bring back changes from v1.4.2 into `release-v1.5.0` r=dureuill a=curquiza This will bring the fixes in v1.4.2 for v1.5.0 release Co-authored-by: curquiza <curquiza@users.noreply.github.com> Co-authored-by: Vivek Kumar <vivek.26@outlook.com> Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>	2023-10-23 10:11:11 +00:00
Louis Dureuil	2bae9550c8	Add explanatory comment	2023-10-23 12:06:28 +02:00
Vivek Kumar	32c78ac8b1	add/update tests when search with distinct attribute & pagination with no ranking	2023-10-23 12:06:27 +02:00
Vivek Kumar	5fe7c4545a	compute all candidates correctly when skipping	2023-10-23 12:02:45 +02:00
curquiza	2042229927	Update version for the next release (v1.4.2) in Cargo.toml	2023-10-23 12:02:45 +02:00