update the CI

remove the use unstable in rustfmt
Merge #4132
2025-12-03 11:15:35 +00:00 · 2023-10-26 13:39:52 +02:00 · 2023-10-26 13:38:11 +02:00 · 2023-10-24 08:50:57 +00:00 · 2023-10-24 14:10:46 +05:30 · 2023-10-18 13:31:39 +05:30
27 changed files with 202 additions and 1484 deletions
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -183,7 +183,7 @@ jobs:
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
-          toolchain: nightly
+          toolchain: 1.71.1
          override: true
          components: rustfmt
      - name: Cache dependencies
--- a/.rustfmt.toml
+++ b/.rustfmt.toml
@@ -1,5 +1,3 @@
-unstable_features = true
-
 use_small_heuristics = "max"
 imports_granularity = "Module"
 group_imports = "StdExternalCrate"
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,7 +2,6 @@
 resolver = "2"
 members = [
    "meilisearch",
-    "meilitool",
    "meilisearch-types",
    "meilisearch-auth",
    "meili-snap",
@@ -19,7 +18,7 @@ members = [
 ]

 [workspace.package]
-version = "1.5.0"
+version = "1.4.1"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/11
+++ b/11
@@ -3,7 +3,7 @@ FROM    rust:alpine3.16 AS compiler

 RUN     apk add -q --update-cache --no-cache build-base openssl-dev

-WORKDIR /
+WORKDIR /meilisearch

 ARG     COMMIT_SHA
 ARG     COMMIT_DATE
@@ -17,7 +17,7 @@ RUN     set -eux; \
        if [ "$apkArch" = "aarch64" ]; then \
            export JEMALLOC_SYS_WITH_LG_PAGE=16; \
        fi && \
-        cargo build --release -p meilisearch -p meilitool
+        cargo build --release

 # Run
 FROM    alpine:3.16
@@ -28,10 +28,9 @@ ENV     MEILI_SERVER_PROVIDER docker
 RUN     apk update --quiet \
        && apk add -q --no-cache libgcc tini curl

-# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
-# and it's easy to find.
-COPY    --from=compiler /target/release/meilisearch /bin/meilisearch
-COPY    --from=compiler /target/release/meilitool /bin/meilitool
+# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
+# to find.
+COPY    --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
 # To stay compatible with the older version of the container (pre v0.27.0) we're
 # going to symlink the meilisearch binary in the path to `/meilisearch`
 RUN     ln -s /bin/meilisearch /meilisearch
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@@ -526,12 +526,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@@ -541,12 +541,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@@ -571,12 +571,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@@ -617,12 +617,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@@ -632,12 +632,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@@ -647,12 +647,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/v2/mod.rs
+++ b/dump/src/reader/v2/mod.rs
@@ -46,6 +46,7 @@ pub type Checked = settings::Checked;
 pub type Unchecked = settings::Unchecked;

 pub type Task = updates::UpdateEntry;
+pub type Kind = updates::UpdateMeta;

 // everything related to the errors
 pub type ResponseError = errors::ResponseError;
@@ -107,8 +108,11 @@ impl V2Reader {
    pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
        Ok(self.index_uuid.iter().map(|index| -> Result<_> {
            V2IndexReader::new(
-                index.uid.clone(),
                &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
+                index,
+                BufReader::new(
+                    File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
+                ),
            )
        }))
    }
@@ -143,16 +147,41 @@ pub struct V2IndexReader {
 }

 impl V2IndexReader {
-    pub fn new(name: String, path: &Path) -> Result<Self> {
+    pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
        let meta = File::open(path.join("meta.json"))?;
        let meta: DumpMeta = serde_json::from_reader(meta)?;

+        let mut created_at = None;
+        let mut updated_at = None;
+
+        for line in tasks.lines() {
+            let task: Task = serde_json::from_str(&line?)?;
+            if !(task.uuid == index_uuid.uuid && task.is_finished()) {
+                continue;
+            }
+
+            let new_created_at = match task.update.meta() {
+                Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
+                _ => None,
+            };
+            let new_updated_at = task.update.finished_at();
+
+            if created_at.is_none() || created_at > new_created_at {
+                created_at = new_created_at;
+            }
+
+            if updated_at.is_none() || updated_at < new_updated_at {
+                updated_at = new_updated_at;
+            }
+        }
+
+        let current_time = OffsetDateTime::now_utc();
+
        let metadata = IndexMetadata {
-            uid: name,
+            uid: index_uuid.uid.clone(),
            primary_key: meta.primary_key,
-            // FIXME: Iterate over the whole task queue to find the creation and last update date.
-            created_at: OffsetDateTime::now_utc(),
-            updated_at: OffsetDateTime::now_utc(),
+            created_at: created_at.unwrap_or(current_time),
+            updated_at: updated_at.unwrap_or(current_time),
        };

        let ret = V2IndexReader {
@@ -248,12 +277,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@@ -263,12 +292,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@@ -293,12 +322,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@@ -340,12 +369,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@@ -355,12 +384,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@@ -370,12 +399,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/v2/updates.rs
+++ b/dump/src/reader/v2/updates.rs
@@ -227,4 +227,14 @@ impl UpdateStatus {
            _ => None,
        }
    }
+
+    pub fn finished_at(&self) -> Option<OffsetDateTime> {
+        match self {
+            UpdateStatus::Processing(_) => None,
+            UpdateStatus::Enqueued(_) => None,
+            UpdateStatus::Processed(u) => Some(u.processed_at),
+            UpdateStatus::Aborted(_) => None,
+            UpdateStatus::Failed(u) => Some(u.failed_at),
+        }
+    }
 }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -923,10 +923,6 @@ impl IndexScheduler {
                    self.index_mapper.index(&rtxn, &index_uid)?
                };

-                // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
-                *self.currently_updating_index.write().unwrap() =
-                    Some((index_uid.clone(), index.clone()));
-
                let mut index_wtxn = index.write_txn()?;
                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
                index_wtxn.commit()?;
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -39,7 +39,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
-        currently_updating_index: _,
    } = scheduler;

    let rtxn = env.read_txn().unwrap();
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -27,7 +27,7 @@ mod index_mapper;
 mod insta_snapshot;
 mod lru;
 mod utils;
-pub mod uuid_codec;
+mod uuid_codec;

 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;
@@ -331,10 +331,6 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

-    /// A few types of long running batches of tasks that act on a single index set this field
-    /// so that a handle to the index is available from other threads (search) in an optimized manner.
-    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
-
    // ================= test
    // The next entry is dedicated to the tests.
    /// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -378,7 +374,6 @@ impl IndexScheduler {
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
            version_file_path: self.version_file_path.clone(),
-            currently_updating_index: self.currently_updating_index.clone(),
            #[cfg(test)]
            test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
            #[cfg(test)]
@@ -475,7 +470,6 @@ impl IndexScheduler {
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
            version_file_path: options.version_file_path,
-            currently_updating_index: Arc::new(RwLock::new(None)),

            #[cfg(test)]
            test_breakpoint_sdr,
@@ -658,13 +652,6 @@ impl IndexScheduler {
    /// If you need to fetch information from or perform an action on all indexes,
    /// see the `try_for_each_index` function.
    pub fn index(&self, name: &str) -> Result<Index> {
-        if let Some((current_name, current_index)) =
-            self.currently_updating_index.read().unwrap().as_ref()
-        {
-            if current_name == name {
-                return Ok(current_index.clone());
-            }
-        }
        let rtxn = self.env.read_txn()?;
        self.index_mapper.index(&rtxn, name)
    }
@@ -1146,9 +1133,6 @@ impl IndexScheduler {
            handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
        };

-        // Reset the currently updating index to relinquish the index handle
-        *self.currently_updating_index.write().unwrap() = None;
-
        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;

--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -50,7 +50,6 @@ hebrew = ["milli/hebrew"]
 japanese = ["milli/japanese"]
 # thai specialized tokenization
 thai = ["milli/thai"]
+
 # allow greek specialized tokenization
 greek = ["milli/greek"]
-# allow khmer specialized tokenization
-khmer = ["milli/khmer"]
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -150,7 +150,6 @@ hebrew = ["meilisearch-types/hebrew"]
 japanese = ["meilisearch-types/japanese"]
 thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
-khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@@ -5,11 +5,9 @@ pub mod service;

 use std::fmt::{self, Display};

-#[allow(unused)]
 pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
 use meili_snap::json_string;
 use serde::{Deserialize, Serialize};
-#[allow(unused)]
 pub use server::{default_settings, Server};

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@@ -6,109 +6,21 @@ use crate::json;

 pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
-      {
-        "id": 1,
-        "description": "Leather Jacket",
-        "brand": "Lee Jeans",
-        "product_id": "123456",
-        "color": "Brown"
-      },
-      {
-        "id": 2,
-        "description": "Leather Jacket",
-        "brand": "Lee Jeans",
-        "product_id": "123456",
-        "color": "Black"
-      },
-      {
-        "id": 3,
-        "description": "Leather Jacket",
-        "brand": "Lee Jeans",
-        "product_id": "123456",
-        "color": "Blue"
-      },
-      {
-        "id": 4,
-        "description": "T-Shirt",
-        "brand": "Nike",
-        "product_id": "789012",
-        "color": "Red"
-      },
-      {
-        "id": 5,
-        "description": "T-Shirt",
-        "brand": "Nike",
-        "product_id": "789012",
-        "color": "Blue"
-      },
-      {
-        "id": 6,
-        "description": "Running Shoes",
-        "brand": "Adidas",
-        "product_id": "456789",
-        "color": "Black"
-      },
-      {
-        "id": 7,
-        "description": "Running Shoes",
-        "brand": "Adidas",
-        "product_id": "456789",
-        "color": "White"
-      },
-      {
-        "id": 8,
-        "description": "Hoodie",
-        "brand": "Puma",
-        "product_id": "987654",
-        "color": "Gray"
-      },
-      {
-        "id": 9,
-        "description": "Sweater",
-        "brand": "Gap",
-        "product_id": "234567",
-        "color": "Green"
-      },
-      {
-        "id": 10,
-        "description": "Sweater",
-        "brand": "Gap",
-        "product_id": "234567",
-        "color": "Red"
-      },
-      {
-        "id": 11,
-        "description": "Sweater",
-        "brand": "Gap",
-        "product_id": "234567",
-        "color": "Blue"
-      },
-      {
-        "id": 12,
-        "description": "Jeans",
-        "brand": "Levi's",
-        "product_id": "345678",
-        "color": "Indigo"
-      },
-      {
-        "id": 13,
-        "description": "Jeans",
-        "brand": "Levi's",
-        "product_id": "345678",
-        "color": "Black"
-      },
-      {
-        "id": 14,
-        "description": "Jeans",
-        "brand": "Levi's",
-        "product_id": "345678",
-        "color": "Stone Wash"
-      }
+        {"productId": 1, "shopId": 1},
+        {"productId": 2, "shopId": 1},
+        {"productId": 3, "shopId": 2},
+        {"productId": 4, "shopId": 2},
+        {"productId": 5, "shopId": 3},
+        {"productId": 6, "shopId": 3},
+        {"productId": 7, "shopId": 4},
+        {"productId": 8, "shopId": 4},
+        {"productId": 9, "shopId": 5},
+        {"productId": 10, "shopId": 5}
    ])
 });

-pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
-pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";

 /// testing: https://github.com/meilisearch/meilisearch/issues/4078
 #[actix_rt::test]
@@ -121,121 +33,31 @@ async fn distinct_search_with_offset_no_ranking() {
    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
    index.wait_task(1).await;

-    fn get_hits(response: &Value) -> Vec<&str> {
+    fn get_hits(Value(response): Value) -> Vec<i64> {
        let hits_array = response["hits"].as_array().unwrap();
-        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
    }

-    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
-    let hits = get_hits(&response);
+    let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
+    let hits = get_hits(response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
-    snapshot!(response["estimatedTotalHits"] , @"11");
+    snapshot!(format!("{:?}", hits), @"[1, 2]");

-    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
-    let hits = get_hits(&response);
+    let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
+    let hits = get_hits(response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
-    snapshot!(response["estimatedTotalHits"], @"10");
+    snapshot!(format!("{:?}", hits), @"[3, 4]");

-    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
-    snapshot!(response["estimatedTotalHits"], @"6");
-
-    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
-    let hits = get_hits(&response);
+    let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
+    let hits = get_hits(response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"1");
-    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
-    snapshot!(response["estimatedTotalHits"], @"6");
+    snapshot!(format!("{:?}", hits), @"[5]");

-    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
-    let hits = get_hits(&response);
+    let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
+    let hits = get_hits(response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"0");
-    snapshot!(format!("{:?}", hits), @r#"[]"#);
-    snapshot!(response["estimatedTotalHits"], @"6");
-
-    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"0");
-    snapshot!(format!("{:?}", hits), @r#"[]"#);
-    snapshot!(response["estimatedTotalHits"], @"6");
-}
-
-/// testing: https://github.com/meilisearch/meilisearch/issues/4130
-#[actix_rt::test]
-async fn distinct_search_with_pagination_no_ranking() {
-    let server = Server::new().await;
-    let index = server.index("test");
-
-    let documents = DOCUMENTS.clone();
-    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
-    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
-    index.wait_task(1).await;
-
-    fn get_hits(response: &Value) -> Vec<&str> {
-        let hits_array = response["hits"].as_array().unwrap();
-        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
-    }
-
-    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"0");
-    snapshot!(format!("{:?}", hits), @r#"[]"#);
-    snapshot!(response["page"], @"0");
-    snapshot!(response["totalPages"], @"3");
-    snapshot!(response["totalHits"], @"6");
-
-    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
-    snapshot!(response["page"], @"1");
-    snapshot!(response["totalPages"], @"3");
-    snapshot!(response["totalHits"], @"6");
-
-    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
-    snapshot!(response["page"], @"2");
-    snapshot!(response["totalPages"], @"3");
-    snapshot!(response["totalHits"], @"6");
-
-    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
-    snapshot!(response["page"], @"3");
-    snapshot!(response["totalPages"], @"3");
-    snapshot!(response["totalHits"], @"6");
-
-    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"0");
-    snapshot!(format!("{:?}", hits), @r#"[]"#);
-    snapshot!(response["page"], @"4");
-    snapshot!(response["totalPages"], @"3");
-    snapshot!(response["totalHits"], @"6");
-
-    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
-    let hits = get_hits(&response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"3");
-    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
-    snapshot!(response["page"], @"2");
-    snapshot!(response["totalPages"], @"2");
-    snapshot!(response["totalHits"], @"6");
 }
--- a/meilitool/Cargo.toml
+++ b/meilitool/Cargo.toml
@@ -1,19 +0,0 @@
-[package]
-name = "meilitool"
-description = "A CLI to edit a Meilisearch database from the command line"
-version.workspace = true
-authors.workspace = true
-homepage.workspace = true
-readme.workspace = true
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-anyhow = "1.0.75"
-clap = { version = "4.2.1", features = ["derive"] }
-dump = { path = "../dump" }
-file-store = { path = "../file-store" }
-meilisearch-auth = { path = "../meilisearch-auth" }
-meilisearch-types = { path = "../meilisearch-types" }
-time = { version = "0.3.30", features = ["formatting"] }
-uuid = { version = "1.5.0", features = ["v4"], default-features = false }
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -1,312 +0,0 @@
-use std::fs::{read_dir, read_to_string, remove_file, File};
-use std::io::BufWriter;
-use std::path::PathBuf;
-
-use anyhow::Context;
-use clap::{Parser, Subcommand};
-use dump::{DumpWriter, IndexMetadata};
-use file_store::FileStore;
-use meilisearch_auth::AuthController;
-use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
-use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
-use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
-use meilisearch_types::milli::{obkv_to_json, BEU32};
-use meilisearch_types::tasks::{Status, Task};
-use meilisearch_types::versioning::check_version_file;
-use meilisearch_types::Index;
-use time::macros::format_description;
-use time::OffsetDateTime;
-use uuid_codec::UuidCodec;
-
-mod uuid_codec;
-
-#[derive(Parser)]
-#[command(author, version, about, long_about = None)]
-struct Cli {
-    /// The database path where the Meilisearch is running.
-    #[arg(long, default_value = "data.ms/")]
-    db_path: PathBuf,
-
-    #[command(subcommand)]
-    command: Command,
-}
-
-#[derive(Subcommand)]
-enum Command {
-    /// Clears the task queue and make it empty.
-    ///
-    /// This command can be safely executed even if Meilisearch is running and processing tasks.
-    /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
-    /// even the ones that were processing. However, it's highly possible that you see the processing
-    /// tasks in the queue again with an associated internal error message.
-    ClearTaskQueue,
-
-    /// Exports a dump from the Meilisearch database.
-    ///
-    /// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
-    /// If tasks are being processed while a dump is being exported there are chances for the dump to be
-    /// malformed with missing tasks.
-    ///
-    /// TODO Verify this claim or make sure it cannot happen and we can export dumps
-    ///      without caring about killing Meilisearch first!
-    ExportADump {
-        /// The directory in which the dump will be created.
-        #[arg(long, default_value = "dumps/")]
-        dump_dir: PathBuf,
-
-        /// Skip dumping the enqueued or processing tasks.
-        ///
-        /// Can be useful when there are a lot of them and it is not particularly useful
-        /// to keep them. Note that only the enqueued tasks takes up space so skipping
-        /// the processed ones is not particularly interesting.
-        #[arg(long)]
-        skip_enqueued_tasks: bool,
-    },
-}
-
-fn main() -> anyhow::Result<()> {
-    let Cli { db_path, command } = Cli::parse();
-
-    check_version_file(&db_path).context("While checking the version file")?;
-
-    match command {
-        Command::ClearTaskQueue => clear_task_queue(db_path),
-        Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
-            export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
-        }
-    }
-}
-
-/// Clears the task queue located at `db_path`.
-fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
-    let path = db_path.join("tasks");
-    let env = EnvOpenOptions::new()
-        .max_dbs(100)
-        .open(&path)
-        .with_context(|| format!("While trying to open {:?}", path.display()))?;
-
-    eprintln!("Deleting tasks from the database...");
-
-    let mut wtxn = env.write_txn()?;
-    let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
-    let total = all_tasks.len(&wtxn)?;
-    let status = try_opening_poly_database(&env, &wtxn, "status")?;
-    let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
-    let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
-    let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
-    let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
-    let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
-    let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
-
-    try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
-    try_clearing_poly_database(&mut wtxn, status, "status")?;
-    try_clearing_poly_database(&mut wtxn, kind, "kind")?;
-    try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
-    try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
-    try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
-    try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
-    try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
-
-    wtxn.commit().context("While committing the transaction")?;
-
-    eprintln!("Successfully deleted {total} tasks from the tasks database!");
-    eprintln!("Deleting the content files from disk...");
-
-    let mut count = 0usize;
-    let update_files = db_path.join("update_files");
-    let entries = read_dir(&update_files).with_context(|| {
-        format!("While trying to read the content of {:?}", update_files.display())
-    })?;
-    for result in entries {
-        match result {
-            Ok(ent) => match remove_file(ent.path()) {
-                Ok(_) => count += 1,
-                Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
-            },
-            Err(e) => {
-                eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
-            }
-        }
-    }
-
-    eprintln!("Sucessfully deleted {count} content files from disk!");
-
-    Ok(())
-}
-
-fn try_opening_database<KC: 'static, DC: 'static>(
-    env: &Env,
-    rtxn: &RoTxn,
-    db_name: &str,
-) -> anyhow::Result<Database<KC, DC>> {
-    env.open_database(rtxn, Some(db_name))
-        .with_context(|| format!("While opening the {db_name:?} database"))?
-        .with_context(|| format!("Missing the {db_name:?} database"))
-}
-
-fn try_opening_poly_database(
-    env: &Env,
-    rtxn: &RoTxn,
-    db_name: &str,
-) -> anyhow::Result<PolyDatabase> {
-    env.open_poly_database(rtxn, Some(db_name))
-        .with_context(|| format!("While opening the {db_name:?} poly database"))?
-        .with_context(|| format!("Missing the {db_name:?} poly database"))
-}
-
-fn try_clearing_poly_database(
-    wtxn: &mut RwTxn,
-    database: PolyDatabase,
-    db_name: &str,
-) -> anyhow::Result<()> {
-    database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
-}
-
-/// Exports a dump into the dump directory.
-fn export_a_dump(
-    db_path: PathBuf,
-    dump_dir: PathBuf,
-    skip_enqueued_tasks: bool,
-) -> Result<(), anyhow::Error> {
-    let started_at = OffsetDateTime::now_utc();
-
-    // 1. Extracts the instance UID from disk
-    let instance_uid_path = db_path.join("instance-uid");
-    let instance_uid = match read_to_string(&instance_uid_path) {
-        Ok(content) => match content.trim().parse() {
-            Ok(uuid) => Some(uuid),
-            Err(e) => {
-                eprintln!("Impossible to parse instance-uid: {e}");
-                None
-            }
-        },
-        Err(e) => {
-            eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
-            None
-        }
-    };
-
-    let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
-    let file_store =
-        FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
-
-    let index_scheduler_path = db_path.join("tasks");
-    let env = EnvOpenOptions::new()
-        .max_dbs(100)
-        .open(&index_scheduler_path)
-        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
-
-    eprintln!("Dumping the keys...");
-
-    // 2. dump the keys
-    let auth_store = AuthController::new(&db_path, &None)
-        .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
-    let mut dump_keys = dump.create_keys()?;
-    let mut count = 0;
-    for key in auth_store.list_keys()? {
-        dump_keys.push_key(&key)?;
-        count += 1;
-    }
-    dump_keys.flush()?;
-
-    eprintln!("Successfully dumped {count} keys!");
-
-    let rtxn = env.read_txn()?;
-    let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
-        try_opening_database(&env, &rtxn, "all-tasks")?;
-    let index_mapping: Database<Str, UuidCodec> =
-        try_opening_database(&env, &rtxn, "index-mapping")?;
-
-    if skip_enqueued_tasks {
-        eprintln!("Skip dumping the enqueued tasks...");
-    } else {
-        eprintln!("Dumping the enqueued tasks...");
-
-        // 3. dump the tasks
-        let mut dump_tasks = dump.create_tasks_queue()?;
-        let mut count = 0;
-        for ret in all_tasks.iter(&rtxn)? {
-            let (_, t) = ret?;
-            let status = t.status;
-            let content_file = t.content_uuid();
-            let mut dump_content_file = dump_tasks.push_task(&t.into())?;
-
-            // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
-            if let Some(content_file_uuid) = content_file {
-                if status == Status::Enqueued {
-                    let content_file = file_store.get_update(content_file_uuid)?;
-
-                    let reader =
-                        DocumentsBatchReader::from_reader(content_file).with_context(|| {
-                            format!("While reading content file {:?}", content_file_uuid)
-                        })?;
-
-                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
-                    while let Some(doc) = cursor.next_document().with_context(|| {
-                        format!("While iterating on content file {:?}", content_file_uuid)
-                    })? {
-                        dump_content_file
-                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
-                    }
-                    dump_content_file.flush()?;
-                    count += 1;
-                }
-            }
-        }
-        dump_tasks.flush()?;
-
-        eprintln!("Successfully dumped {count} enqueued tasks!");
-    }
-
-    eprintln!("Dumping the indexes...");
-
-    // 4. Dump the indexes
-    let mut count = 0;
-    for result in index_mapping.iter(&rtxn)? {
-        let (uid, uuid) = result?;
-        let index_path = db_path.join("indexes").join(uuid.to_string());
-        let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
-            format!("While trying to open the index at path {:?}", index_path.display())
-        })?;
-
-        let rtxn = index.read_txn()?;
-        let metadata = IndexMetadata {
-            uid: uid.to_owned(),
-            primary_key: index.primary_key(&rtxn)?.map(String::from),
-            created_at: index.created_at(&rtxn)?,
-            updated_at: index.updated_at(&rtxn)?,
-        };
-        let mut index_dumper = dump.create_index(uid, &metadata)?;
-
-        let fields_ids_map = index.fields_ids_map(&rtxn)?;
-        let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
-
-        // 4.1. Dump the documents
-        for ret in index.all_documents(&rtxn)? {
-            let (_id, doc) = ret?;
-            let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
-            index_dumper.push_document(&document)?;
-        }
-
-        // 4.2. Dump the settings
-        let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
-        index_dumper.settings(&settings)?;
-        count += 1;
-    }
-
-    eprintln!("Successfully dumped {count} indexes!");
-    // We will not dump experimental feature settings
-    eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
-
-    let dump_uid = started_at.format(format_description!(
-        "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
-    )).unwrap();
-
-    let path = dump_dir.join(format!("{}.dump", dump_uid));
-    let file = File::create(&path)?;
-    dump.persist_to(BufWriter::new(file))?;
-
-    eprintln!("Dump exported at path {:?}", path.display());
-
-    Ok(())
-}
--- a/meilitool/src/uuid_codec.rs
+++ b/meilitool/src/uuid_codec.rs
@@ -1,24 +0,0 @@
-use std::borrow::Cow;
-use std::convert::TryInto;
-
-use meilisearch_types::heed::{BytesDecode, BytesEncode};
-use uuid::Uuid;
-
-/// A heed codec for value of struct Uuid.
-pub struct UuidCodec;
-
-impl<'a> BytesDecode<'a> for UuidCodec {
-    type DItem = Uuid;
-
-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
-        bytes.try_into().ok().map(Uuid::from_bytes)
-    }
-}
-
-impl BytesEncode<'_> for UuidCodec {
-    type EItem = Uuid;
-
-    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
-        Some(Cow::Borrowed(item.as_bytes()))
-    }
-}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.5", default-features = false }
+charabia = { version = "0.8.3", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
 deserr = { version = "0.6.0", features = ["actix-web"]}
@@ -82,7 +82,7 @@ md5 = "0.7.0"
 rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
-all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
+all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
@@ -106,6 +106,3 @@ thai = ["charabia/thai"]

 # allow greek specialized tokenization
 greek = ["charabia/greek"]
-
-# allow khmer specialized tokenization
-khmer = ["charabia/khmer"]
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
 use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
-pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
+pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
 use roaring::RoaringBitmap;
 use serde_json::Value;

--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
-pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
+pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
 use self::new::PartialSearchResult;
 use crate::error::UserError;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -46,8 +46,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        if let Some(distinct_fid) = distinct_fid {
            let mut excluded = RoaringBitmap::new();
            let mut results = vec![];
+            let mut skip = 0;
            for docid in universe.iter() {
-                if results.len() >= from + length {
+                if results.len() >= length {
                    break;
                }
                if excluded.contains(docid) {
@@ -55,19 +56,16 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                }

                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
+                skip += 1;
+                if skip <= from {
+                    continue;
+                }
+
                results.push(docid);
            }

            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
-            // drain the results of the skipped elements
-            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
-            // e.g. estimatedTotalHits is correct.
-            if results.len() >= from {
-                results.drain(..from);
-            } else {
-                results.clear();
-            }

            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -434,18 +434,7 @@ pub fn execute_search(
        let mut search = Search::default();
        let docids = match ctx.index.vector_hnsw(ctx.txn)? {
            Some(hnsw) => {
-                if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
-                    if vector.len() != expected_size {
-                        return Err(UserError::InvalidVectorDimensions {
-                            expected: expected_size,
-                            found: vector.len(),
-                        }
-                        .into());
-                    }
-                }
-
                let vector = NDotProductPoint::new(vector.clone());
-
                let neighbors = hnsw.search(&vector, &mut search);

                let mut docids = Vec::new();
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@@ -29,7 +29,7 @@ use std::hash::Hash;
 pub use cheapest_paths::PathVisitor;
 pub use condition_docids_cache::ConditionDocIdsCache;
 pub use dead_ends_cache::DeadEndsCache;
-pub use exactness::ExactnessGraph;
+pub use exactness::{ExactnessCondition, ExactnessGraph};
 pub use fid::{FidCondition, FidGraph};
 pub use position::{PositionCondition, PositionGraph};
 pub use proximity::{ProximityCondition, ProximityGraph};
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@@ -14,7 +14,7 @@ pub use grenad_helpers::{
 };
 pub use merge_functions::{
    concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
-    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
+    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
    serialize_roaring_bitmap, MergeFn,
 };

--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -20,7 +20,10 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{extract_finite_float_from_value, DocumentId};
+pub use self::enrich::{
+    extract_finite_float_from_value, validate_document_id, validate_document_id_value,
+    validate_geo_from_json, DocumentId,
+};
 pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
--- a/milli/tests/search/distinct.rs
+++ b/milli/tests/search/distinct.rs
@@ -202,7 +202,7 @@ test_distinct!(
    EXTERNAL_DOCUMENTS_IDS.len(),
    1,
    vec![],
-    3
+    2
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -212,7 +212,7 @@ test_distinct!(
    1,
    2,
    vec![],
-    3
+    1
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -222,7 +222,7 @@ test_distinct!(
    EXTERNAL_DOCUMENTS_IDS.len(),
    2,
    vec![],
-    7
+    5
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -232,5 +232,5 @@ test_distinct!(
    2,
    4,
    vec![],
-    7
+    3
 );
Author	SHA1	Message	Date
Tamo	0a34f70566	update the CI	2023-10-26 13:39:52 +02:00
Tamo	75d8d4f3a8	remove the use unstable in rustfmt	2023-10-26 13:38:11 +02:00
meili-bors[bot]	62ea81bef6	Merge #4132 4132: Extract the creation and last updated timestamp from v2 dumps r=irevoire a=vivek-26 # Pull Request ## Related issue Fixes #2989 ## What does this PR do? This PR - - extracts the `created_at` and `updated_at` dates from v2 dumps. - updates the unit tests. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Vivek Kumar <vivek.26@outlook.com>	2023-10-24 08:50:57 +00:00
Vivek Kumar	f28f09ae2f	update tests for v2 dumps	2023-10-24 14:10:46 +05:30
Vivek Kumar	62cc97ba70	update tests to include created_at and updated-at in v2 dumps	2023-10-18 13:31:39 +05:30
Vivek Kumar	fed59cc1d5	extract created_at and updated_at dates from v2 dumps	2023-10-18 13:30:24 +05:30