Activate only the necessary features for Japanese

Merge #4746
4746: Fix hybrid search limit offset r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4745 ## What does this PR do? - Apply offset and limit to the keyword search results when they are returned early. - Add a test that is initially failing, and then passes Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-12-03 19:25:36 +00:00 · 2024-07-08 18:17:11 +02:00 · 2024-06-27 12:47:08 +00:00 · 2024-06-27 14:25:33 +02:00 · 2024-06-27 14:21:34 +02:00 · 2024-06-27 10:06:28 +00:00
21 changed files with 708 additions and 73 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -381,9 +381,9 @@ dependencies = [

 [[package]]
 name = "arroy"
-version = "0.3.1"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
+checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -679,9 +679,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"

 [[package]]
 name = "bytemuck"
-version = "1.15.0"
+version = "1.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
+checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
 dependencies = [
 "bytemuck_derive",
 ]
@@ -2273,9 +2273,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"

 [[package]]
 name = "heed"
-version = "0.20.1"
+version = "0.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
+checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8"
 dependencies = [
 "bitflags 2.5.0",
 "byteorder",
@@ -3172,9 +3172,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"

 [[package]]
 name = "lmdb-master-sys"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
+checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3"
 dependencies = [
 "cc",
 "doxygen-rs",
@@ -5053,18 +5053,18 @@ dependencies = [

 [[package]]
 name = "thiserror"
-version = "1.0.58"
+version = "1.0.61"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
+checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
 dependencies = [
 "thiserror-impl",
 ]

 [[package]]
 name = "thiserror-impl"
-version = "1.0.58"
+version = "1.0.61"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
+checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/2
+++ b/2
@@ -17,7 +17,7 @@ RUN     set -eux; \
        if [ "$apkArch" = "aarch64" ]; then \
            export JEMALLOC_SYS_WITH_LG_PAGE=16; \
        fi && \
-        cargo build --release -p meilisearch -p meilitool
+        cargo build --release -p meilisearch -p meilitool --no-default-features --features "analytics mini-dashboard japanese"

 # Run
 FROM    alpine:3.16
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -40,7 +40,7 @@ ureq = "2.9.7"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }

 [dev-dependencies]
-arroy = "0.3.1"
+arroy = "0.4.0"
 big_s = "1.0.2"
 crossbeam = "0.8.4"
 insta = { version = "1.34.0", features = ["json", "redactions"] }
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -5396,7 +5396,7 @@ mod tests {
            let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
                .map(Some)
                .or_else(|e| match e {
-                    arroy::Error::MissingMetadata => Ok(None),
+                    arroy::Error::MissingMetadata(_) => Ok(None),
                    e => Err(e),
                })
                .transpose();
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -398,7 +398,8 @@ impl ErrorCode for milli::Error {
                    UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
                    UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
                    UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
-                    UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
+                    UserError::InvalidVectorsMapType { .. }
+                    | UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
                    UserError::TooManyVectors(_, _) => Code::TooManyVectors,
                    UserError::SortError(_) => Code::InvalidSearchSort,
                    UserError::InvalidMinTypoWordLenSetting(_, _) => {
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -158,5 +158,5 @@ vietnamese = ["meilisearch-types/vietnamese"]
 swedish-recomposition = ["meilisearch-types/swedish-recomposition"]

 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
-sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
+sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@@ -65,7 +65,7 @@ impl Display for Value {
        write!(
            f,
            "{}",
-            json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
+            json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
        )
    }
 }
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -150,6 +150,35 @@ async fn simple_search() {
    snapshot!(response["semanticHitCount"], @"3");
 }

+#[actix_rt::test]
+async fn limit_offset() {
+    let server = Server::new().await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
+    snapshot!(response["semanticHitCount"], @"0");
+    assert_eq!(response["hits"].as_array().unwrap().len(), 1);
+
+    let server = Server::new().await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
+    snapshot!(response["semanticHitCount"], @"1");
+    assert_eq!(response["hits"].as_array().unwrap().len(), 1);
+}
+
 #[actix_rt::test]
 async fn simple_search_hf() {
    let server = Server::new().await;
--- a/meilisearch/tests/vector/mod.rs
+++ b/meilisearch/tests/vector/mod.rs
@@ -190,6 +190,285 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
    index
 }

+#[actix_rt::test]
+async fn user_provided_embeddings_error() {
+    let server = Server::new().await;
+    let index = generate_default_user_provided_documents(&server).await;
+
+    // First case, we forget to specify the `regenerate`
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // Second case, we don't specify anything
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 3,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // Third case, we specify something wrong in place of regenerate
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 4,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 5,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 6,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 7,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task["status"], @r###""succeeded""###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task["status"], @r###""succeeded""###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 10,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 11,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 12,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn clear_documents() {
    let server = Server::new().await;
@@ -213,11 +492,11 @@ async fn clear_documents() {

    // Make sure the arroy DB has been cleared
    let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
-    snapshot!(json_string!(documents), @r###"
+    snapshot!(documents, @r###"
    {
      "hits": [],
      "query": "",
-      "processingTimeMs": 0,
+      "processingTimeMs": "[duration]",
      "limit": 20,
      "offset": 0,
      "estimatedTotalHits": 0,
@@ -225,3 +504,85 @@ async fn clear_documents() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn add_remove_one_vector_4588() {
+    // https://github.com/meilisearch/meilisearch/issues/4588
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    let task = server.wait_task(response.uid()).await;
+    snapshot!(task, name: "settings-processed");
+
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, name: "document-added");
+
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": null }},
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, name: "document-deleted");
+
+    let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
+    snapshot!(documents, @r###"
+    {
+      "hits": [
+        {
+          "id": 0,
+          "name": "kefir"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 1,
+      "semanticHitCount": 1
+    }
+    "###);
+
+    let (documents, _code) = index
+        .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+        .await;
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "name": "kefir",
+          "_vectors": {}
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
--- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap
+++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap
@@ -0,0 +1,19 @@
+---
+source: meilisearch/tests/vector/mod.rs
+---
+{
+  "uid": 1,
+  "indexUid": "doggo",
+  "status": "succeeded",
+  "type": "documentAdditionOrUpdate",
+  "canceledBy": null,
+  "details": {
+    "receivedDocuments": 1,
+    "indexedDocuments": 1
+  },
+  "error": null,
+  "duration": "[duration]",
+  "enqueuedAt": "[date]",
+  "startedAt": "[date]",
+  "finishedAt": "[date]"
+}
--- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap
+++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap
@@ -0,0 +1,19 @@
+---
+source: meilisearch/tests/vector/mod.rs
+---
+{
+  "uid": 2,
+  "indexUid": "doggo",
+  "status": "succeeded",
+  "type": "documentAdditionOrUpdate",
+  "canceledBy": null,
+  "details": {
+    "receivedDocuments": 1,
+    "indexedDocuments": 1
+  },
+  "error": null,
+  "duration": "[duration]",
+  "enqueuedAt": "[date]",
+  "startedAt": "[date]",
+  "finishedAt": "[date]"
+}
--- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap
+++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap
@@ -0,0 +1,23 @@
+---
+source: meilisearch/tests/vector/mod.rs
+---
+{
+  "uid": 0,
+  "indexUid": "doggo",
+  "status": "succeeded",
+  "type": "settingsUpdate",
+  "canceledBy": null,
+  "details": {
+    "embedders": {
+      "manual": {
+        "source": "userProvided",
+        "dimensions": 3
+      }
+    }
+  },
+  "error": null,
+  "duration": "[duration]",
+  "enqueuedAt": "[date]",
+  "startedAt": "[date]",
+  "finishedAt": "[date]"
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.8"
 liquid = "0.26.4"
-arroy = "0.3.1"
+arroy = "0.4.0"
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.9.7", features = ["json"] }
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -119,6 +119,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidVectorDimensions { expected: usize, found: usize },
    #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
    InvalidVectorsMapType { document_id: String, value: Value },
+    #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
+    InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError },
    #[error("{0}")]
    InvalidFilter(String),
    #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
@@ -281,8 +283,9 @@ impl From<arroy::Error> for Error {
            arroy::Error::DatabaseFull
            | arroy::Error::InvalidItemAppend
            | arroy::Error::UnmatchingDistance { .. }
-            | arroy::Error::MissingNode
-            | arroy::Error::MissingMetadata => {
+            | arroy::Error::NeedBuild(_)
+            | arroy::Error::MissingKey { .. }
+            | arroy::Error::MissingMetadata(_) => {
                Error::InternalError(InternalError::ArroyError(value))
            }
        }
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1610,7 +1610,7 @@ impl Index {
            arroy::Reader::open(rtxn, k, self.vector_arroy)
                .map(Some)
                .or_else(|e| match e {
-                    arroy::Error::MissingMetadata => Ok(None),
+                    arroy::Error::MissingMetadata(_) => Ok(None),
                    e => Err(e.into()),
                })
                .transpose()
@@ -1643,7 +1643,7 @@ impl Index {
                let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
                    .map(Some)
                    .or_else(|e| match e {
-                        arroy::Error::MissingMetadata => Ok(None),
+                        arroy::Error::MissingMetadata(_) => Ok(None),
                        e => Err(e),
                    })
                    .transpose();
--- a/milli/src/search/facet/facet_distribution_iter.rs
+++ b/milli/src/search/facet/facet_distribution_iter.rs
@@ -6,9 +6,11 @@ use heed::Result;
 use roaring::RoaringBitmap;

 use super::{get_first_facet_value, get_highest_level};
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
 use crate::heed_codec::BytesRefCodec;
-use crate::DocumentId;
+use crate::{CboRoaringBitmapCodec, DocumentId};

 /// Call the given closure on the facet distribution of the candidate documents.
 ///
@@ -31,12 +33,9 @@ pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
 where
    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
 {
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
    let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
-    let highest_level = get_highest_level(
-        rtxn,
-        db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
-        field_id,
-    )?;
+    let highest_level = get_highest_level(rtxn, db, field_id)?;

    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
@@ -75,11 +74,8 @@ where

    // Represents the list of keys that we must explore.
    let mut heap = BinaryHeap::new();
-    let highest_level = get_highest_level(
-        rtxn,
-        db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
-        field_id,
-    )?;
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
+    let highest_level = get_highest_level(rtxn, db, field_id)?;

    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        // We first fill the heap with values from the highest level
@@ -92,7 +88,10 @@ where
            if key.field_id != field_id {
                break;
            }
-            let intersection = value.bitmap & candidates;
+            let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
            let count = intersection.len();
            if count != 0 {
                heap.push(LevelEntry {
@@ -121,7 +120,10 @@ where
                    if key.field_id != field_id {
                        break;
                    }
-                    let intersection = value.bitmap & candidates;
+                    let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        candidates,
+                    )?;
                    let count = intersection.len();
                    if count != 0 {
                        heap.push(LevelEntry {
@@ -146,7 +148,7 @@ where
    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
 {
    rtxn: &'t heed::RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
    field_id: u16,
    callback: CB,
 }
@@ -171,7 +173,10 @@ where
            if key.field_id != self.field_id {
                return Ok(ControlFlow::Break(()));
            }
-            let docids_in_common = value.bitmap & candidates;
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
            if !docids_in_common.is_empty() {
                let any_docid_in_common = docids_in_common.min().unwrap();
                match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)?
@@ -205,7 +210,10 @@ where
            if key.field_id != self.field_id {
                return Ok(ControlFlow::Break(()));
            }
-            let docids_in_common = value.bitmap & candidates;
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
            if !docids_in_common.is_empty() {
                let cf = self.iterate(
                    &docids_in_common,
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@@ -178,16 +178,16 @@ impl<'a> Search<'a> {

        // completely skip semantic search if the results of the keyword search are good enough
        if self.results_good_enough(&keyword_results, semantic_ratio) {
-            return Ok((keyword_results, Some(0)));
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        }

        // no vector search against placeholder search
        let Some(query) = search.query.take() else {
-            return Ok((keyword_results, Some(0)));
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        };
        // no embedder, no semantic search
        let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
-            return Ok((keyword_results, Some(0)));
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        };

        let vector_query = match vector {
@@ -239,3 +239,44 @@ impl<'a> Search<'a> {
        true
    }
 }
+
+fn return_keyword_results(
+    limit: usize,
+    offset: usize,
+    SearchResult {
+        matching_words,
+        candidates,
+        mut documents_ids,
+        mut document_scores,
+        degraded,
+        used_negative_operator,
+    }: SearchResult,
+) -> (SearchResult, Option<u32>) {
+    let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
+    // technically redudant because documents_ids.len() == document_scores.len(),
+    // defensive programming
+    offset >= document_scores.len()
+    {
+        (vec![], vec![])
+    } else {
+        // PANICS: offset < len
+        documents_ids.rotate_left(offset);
+        documents_ids.truncate(limit);
+
+        // PANICS: offset < len
+        document_scores.rotate_left(offset);
+        document_scores.truncate(limit);
+        (documents_ids, document_scores)
+    };
+    (
+        SearchResult {
+            matching_words,
+            candidates,
+            documents_ids,
+            document_scores,
+            degraded,
+            used_negative_operator,
+        },
+        Some(0),
+    )
+}
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -290,7 +290,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                        regenerate_if_prompt_changed(
                            obkv,
                            (old_prompt, prompt),
-                            (&old_fields_ids_map, &new_fields_ids_map),
+                            (old_fields_ids_map, new_fields_ids_map),
                        )?
                    } else {
                        // we can simply ignore user provided vectors as they are not regenerated and are
@@ -306,7 +306,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                    prompt,
                    (add_to_user_provided, remove_from_user_provided),
                    (old, new),
-                    (&old_fields_ids_map, &new_fields_ids_map),
+                    (old_fields_ids_map, new_fields_ids_map),
                    document_id,
                )?,
            };
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -11,7 +11,7 @@ mod extract_word_position_docids;

 use std::fs::File;
 use std::io::BufReader;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};

 use crossbeam_channel::Sender;
 use rayon::prelude::*;
@@ -32,7 +32,7 @@ use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
 use super::{helpers, TypedChunk};
 use crate::index::IndexEmbeddingConfig;
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
+use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};

 /// Extract data for each databases from obkv documents in parallel.
 /// Send data in grenad file over provided Sender.
@@ -207,6 +207,18 @@ fn run_extraction_task<FE, FS, M>(
    })
 }

+fn request_threads() -> &'static ThreadPoolNoAbort {
+    static REQUEST_THREADS: OnceLock<ThreadPoolNoAbort> = OnceLock::new();
+
+    REQUEST_THREADS.get_or_init(|| {
+        ThreadPoolNoAbortBuilder::new()
+            .num_threads(crate::vector::REQUEST_PARALLELISM)
+            .thread_name(|index| format!("embedding-request-{index}"))
+            .build()
+            .unwrap()
+    })
+}
+
 /// Extract chunked data and send it into lmdb_writer_sx sender:
 /// - documents
 fn send_original_documents_data(
@@ -219,11 +231,6 @@ fn send_original_documents_data(
    let original_documents_chunk =
        original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;

-    let request_threads = ThreadPoolNoAbortBuilder::new()
-        .num_threads(crate::vector::REQUEST_PARALLELISM)
-        .thread_name(|index| format!("embedding-request-{index}"))
-        .build()?;
-
    let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only())
        // no point in indexing vectors without embedders
        && (!settings_diff.new.embedding_configs.inner_as_ref().is_empty());
@@ -256,7 +263,7 @@ fn send_original_documents_data(
                            prompts,
                            indexer,
                            embedder.clone(),
-                            &request_threads,
+                            request_threads(),
                        ) {
                            Ok(results) => Some(results),
                            Err(error) => {
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -547,10 +547,11 @@ where
            pool.install(|| {
                for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
                    let writer = arroy::Writer::new(vector_arroy, k, dimension);
-                    if writer.is_empty(wtxn)? {
+                    if writer.need_build(wtxn)? {
+                        writer.build(wtxn, &mut rng, None)?;
+                    } else if writer.is_empty(wtxn)? {
                        break;
                    }
-                    writer.build(wtxn, &mut rng, None)?;
                }
                Result::Ok(())
            })
--- a/milli/src/vector/parsed_vectors.rs
+++ b/milli/src/vector/parsed_vectors.rs
@@ -1,5 +1,6 @@
 use std::collections::{BTreeMap, BTreeSet};

+use deserr::{take_cf_content, DeserializeError, Deserr, Sequence};
 use obkv::KvReader;
 use serde_json::{from_slice, Value};

@@ -10,13 +11,44 @@ use crate::{DocumentId, FieldId, InternalError, UserError};

 pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";

-#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[derive(serde::Serialize, Debug)]
 #[serde(untagged)]
 pub enum Vectors {
    ImplicitlyUserProvided(VectorOrArrayOfVectors),
    Explicit(ExplicitVectors),
 }

+impl<E: DeserializeError> Deserr<E> for Vectors {
+    fn deserialize_from_value<V: deserr::IntoValue>(
+        value: deserr::Value<V>,
+        location: deserr::ValuePointerRef,
+    ) -> Result<Self, E> {
+        match value {
+            deserr::Value::Sequence(_) | deserr::Value::Null => {
+                Ok(Vectors::ImplicitlyUserProvided(VectorOrArrayOfVectors::deserialize_from_value(
+                    value, location,
+                )?))
+            }
+            deserr::Value::Map(_) => {
+                Ok(Vectors::Explicit(ExplicitVectors::deserialize_from_value(value, location)?))
+            }
+
+            value => Err(take_cf_content(E::error(
+                None,
+                deserr::ErrorKind::IncorrectValueKind {
+                    actual: value,
+                    accepted: &[
+                        deserr::ValueKind::Sequence,
+                        deserr::ValueKind::Map,
+                        deserr::ValueKind::Null,
+                    ],
+                },
+                location,
+            ))),
+        }
+    }
+}
+
 impl Vectors {
    pub fn must_regenerate(&self) -> bool {
        match self {
@@ -37,9 +69,11 @@ impl Vectors {
    }
 }

-#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[derive(serde::Serialize, Deserr, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct ExplicitVectors {
+    #[serde(default)]
+    #[deserr(default)]
    pub embeddings: Option<VectorOrArrayOfVectors>,
    pub regenerate: bool,
 }
@@ -149,13 +183,20 @@ impl ParsedVectorsDiff {

 pub struct ParsedVectors(pub BTreeMap<String, Vectors>);

+impl<E: DeserializeError> Deserr<E> for ParsedVectors {
+    fn deserialize_from_value<V: deserr::IntoValue>(
+        value: deserr::Value<V>,
+        location: deserr::ValuePointerRef,
+    ) -> Result<Self, E> {
+        let value = <BTreeMap<String, Vectors>>::deserialize_from_value(value, location)?;
+        Ok(ParsedVectors(value))
+    }
+}
+
 impl ParsedVectors {
    pub fn from_bytes(value: &[u8]) -> Result<Self, Error> {
-        let Ok(value) = from_slice(value) else {
-            let value = from_slice(value).map_err(Error::InternalSerdeJson)?;
-            return Err(Error::InvalidMap(value));
-        };
-        Ok(ParsedVectors(value))
+        let value: serde_json::Value = from_slice(value).map_err(Error::InternalSerdeJson)?;
+        deserr::deserialize(value).map_err(|error| Error::InvalidEmbedderConf { error })
    }

    pub fn retain_not_embedded_vectors(&mut self, embedders: &BTreeSet<String>) {
@@ -165,6 +206,7 @@ impl ParsedVectors {

 pub enum Error {
    InvalidMap(Value),
+    InvalidEmbedderConf { error: deserr::errors::JsonError },
    InternalSerdeJson(serde_json::Error),
 }

@@ -174,6 +216,12 @@ impl Error {
            Error::InvalidMap(value) => {
                crate::Error::UserError(UserError::InvalidVectorsMapType { document_id, value })
            }
+            Error::InvalidEmbedderConf { error } => {
+                crate::Error::UserError(UserError::InvalidVectorsEmbedderConf {
+                    document_id,
+                    error,
+                })
+            }
            Error::InternalSerdeJson(error) => {
                crate::Error::InternalError(InternalError::SerdeJson(error))
            }
@@ -194,13 +242,84 @@ fn to_vector_map(
 }

 /// Represents either a vector or an array of multiple vectors.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[derive(serde::Serialize, Debug)]
 #[serde(transparent)]
 pub struct VectorOrArrayOfVectors {
    #[serde(with = "either::serde_untagged_optional")]
    inner: Option<either::Either<Vec<Embedding>, Embedding>>,
 }

+impl<E: DeserializeError> Deserr<E> for VectorOrArrayOfVectors {
+    fn deserialize_from_value<V: deserr::IntoValue>(
+        value: deserr::Value<V>,
+        location: deserr::ValuePointerRef,
+    ) -> Result<Self, E> {
+        match value {
+            deserr::Value::Null => Ok(VectorOrArrayOfVectors { inner: None }),
+            deserr::Value::Sequence(seq) => {
+                let mut iter = seq.into_iter();
+                match iter.next().map(|v| v.into_value()) {
+                    None => {
+                        // With the strange way serde serialize the `Either`, we must send the left part
+                        // otherwise it'll consider we returned [[]]
+                        Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Left(Vec::new())) })
+                    }
+                    Some(val @ deserr::Value::Sequence(_)) => {
+                        let first = Embedding::deserialize_from_value(val, location.push_index(0))?;
+                        let mut collect = vec![first];
+                        let mut tail = iter
+                            .enumerate()
+                            .map(|(i, v)| {
+                                Embedding::deserialize_from_value(
+                                    v.into_value(),
+                                    location.push_index(i + 1),
+                                )
+                            })
+                            .collect::<Result<Vec<_>, _>>()?;
+                        collect.append(&mut tail);
+
+                        Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Left(collect)) })
+                    }
+                    Some(
+                        val @ deserr::Value::Integer(_)
+                        | val @ deserr::Value::NegativeInteger(_)
+                        | val @ deserr::Value::Float(_),
+                    ) => {
+                        let first = <f32>::deserialize_from_value(val, location.push_index(0))?;
+                        let mut embedding = iter
+                            .enumerate()
+                            .map(|(i, v)| {
+                                <f32>::deserialize_from_value(
+                                    v.into_value(),
+                                    location.push_index(i + 1),
+                                )
+                            })
+                            .collect::<Result<Vec<_>, _>>()?;
+                        embedding.insert(0, first);
+                        Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Right(embedding)) })
+                    }
+                    Some(value) => Err(take_cf_content(E::error(
+                        None,
+                        deserr::ErrorKind::IncorrectValueKind {
+                            actual: value,
+                            accepted: &[deserr::ValueKind::Sequence, deserr::ValueKind::Float],
+                        },
+                        location.push_index(0),
+                    ))),
+                }
+            }
+            value => Err(take_cf_content(E::error(
+                None,
+                deserr::ErrorKind::IncorrectValueKind {
+                    actual: value,
+                    accepted: &[deserr::ValueKind::Sequence, deserr::ValueKind::Null],
+                },
+                location,
+            ))),
+        }
+    }
+}
+
 impl VectorOrArrayOfVectors {
    pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> {
        match self.inner? {
@@ -234,15 +353,19 @@ impl From<Vec<Embedding>> for VectorOrArrayOfVectors {
 mod test {
    use super::VectorOrArrayOfVectors;

+    fn embedding_from_str(s: &str) -> Result<VectorOrArrayOfVectors, deserr::errors::JsonError> {
+        let value: serde_json::Value = serde_json::from_str(s).unwrap();
+        deserr::deserialize(value)
+    }
+
    #[test]
    fn array_of_vectors() {
-        let null: VectorOrArrayOfVectors = serde_json::from_str("null").unwrap();
-        let empty: VectorOrArrayOfVectors = serde_json::from_str("[]").unwrap();
-        let one: VectorOrArrayOfVectors = serde_json::from_str("[0.1]").unwrap();
-        let two: VectorOrArrayOfVectors = serde_json::from_str("[0.1, 0.2]").unwrap();
-        let one_vec: VectorOrArrayOfVectors = serde_json::from_str("[[0.1, 0.2]]").unwrap();
-        let two_vecs: VectorOrArrayOfVectors =
-            serde_json::from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap();
+        let null = embedding_from_str("null").unwrap();
+        let empty = embedding_from_str("[]").unwrap();
+        let one = embedding_from_str("[0.1]").unwrap();
+        let two = embedding_from_str("[0.1, 0.2]").unwrap();
+        let one_vec = embedding_from_str("[[0.1, 0.2]]").unwrap();
+        let two_vecs = embedding_from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap();

        insta::assert_json_snapshot!(null.into_array_of_vectors(), @"null");
        insta::assert_json_snapshot!(empty.into_array_of_vectors(), @"[]");
Author	SHA1	Message	Date
ManyTheFish	a87c390244	Activate only the necessary features for Japanese	2024-07-08 18:17:11 +02:00
meili-bors[bot]	0df84bbba7	Merge #4746 4746: Fix hybrid search limit offset r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4745 ## What does this PR do? - Apply offset and limit to the keyword search results when they are returned early. - Add a test that is initially failing, and then passes Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-27 12:47:08 +00:00
Louis Dureuil	e53de15b8e	Fix behavior of limit and offset for hybrid search when keyword results are returned early The test is fixed	2024-06-27 14:25:33 +02:00
Louis Dureuil	8c4921b9dd	Add failing test on limit+offset for hybrid search	2024-06-27 14:21:34 +02:00
meili-bors[bot]	f6a00f4a90	Merge #4740 4740: Make `embeddings` optional and improve error message for `regenerate` r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4741 ## What does this PR do? - Make the `embeddings` parameter optional when manually specifying embeddings for an embedder - Adds a lot of tests around malformed `_vectors.embedder` objects - Use `deserr` to deserialize the `_vectors.embedder` field, improving error messages Co-authored-by: Tamo <tamo@meilisearch.com>	2024-06-27 10:06:28 +00:00
Tamo	ce08dc509b	add more tests and improve the location of the error	2024-06-27 11:51:45 +02:00
Tamo	1daaed163a	Make _vectors.:embedding.regenerate mandatory + tests + error messages	2024-06-27 11:04:58 +02:00
meili-bors[bot]	298c7b0c93	Merge #4715 4715: Build all arroy indexes that need to be built r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4588 ## What does this PR do? - Update arroy - Ensure we always rebuild the arroy indexes that need to be built Co-authored-by: Tamo <tamo@meilisearch.com>	2024-06-24 09:32:04 +00:00
Tamo	606e108420	fix all the flaky snapshots	2024-06-24 11:13:45 +02:00
Tamo	7be17b7e4c	add the missing snapshots	2024-06-24 10:52:57 +02:00
Tamo	1693332cab	Update arroy and always build the tree that need to be built	2024-06-24 10:14:03 +02:00
meili-bors[bot]	ddd564665b	Merge #4713 4713: Speed up facet distribution r=ManyTheFish a=Kerollmops This PR is akin to #4682, but this time, the same logic is applied to the facets. Bitmaps are not decoded, and we do an intersection on the bytes with the search candidates instead of materializing the RoaringBitmap to destroy it just after the operation. A prospect raised some slow requests when performing facet searches, and I found out that the disk optimization intersection wasn't performed on the facets. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-06-24 05:23:46 +00:00
meili-bors[bot]	4ae11bfd31	Merge #4710 4710: Only spawn thread pool once (v1.9) r=irevoire a=dureuill # Pull Request See #4707 Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-20 11:45:32 +00:00
Clément Renault	9736e16a88	Make clippy happy	2024-06-20 13:02:44 +02:00
Clément Renault	6fa4da8ae7	Improve facet distribution speed in count mode	2024-06-20 12:58:51 +02:00
Clément Renault	19d7cdc20d	Improve facet distribution speed in lexico mode	2024-06-20 12:57:08 +02:00
meili-bors[bot]	c229200820	Merge #4712 4712: Update mini-dashboard 2.14 r=irevoire a=curquiza Fixes #4668 Co-authored-by: curquiza <clementine@meilisearch.com>	2024-06-20 08:47:22 +00:00
curquiza	bad28cc9e2	Update mini-dashboard 2.14	2024-06-20 10:01:36 +02:00
Louis Dureuil	a04041c8f2	Only spawn the pool once	2024-06-19 16:25:33 +02:00