diff --git a/crates/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs index 67da51702..2a7038fcb 100644 --- a/crates/meilisearch/tests/vector/mod.rs +++ b/crates/meilisearch/tests/vector/mod.rs @@ -100,7 +100,7 @@ async fn add_remove_user_provided() { let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .await; - snapshot!(json_string!(documents), @r###" + snapshot!(json_string!(documents), @r#" { "results": [ { @@ -134,7 +134,7 @@ async fn add_remove_user_provided() { "limit": 20, "total": 2 } - "###); + "#); let (value, code) = index.delete_document(0).await; snapshot!(code, @"202 Accepted"); @@ -143,7 +143,7 @@ async fn add_remove_user_provided() { let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .await; - snapshot!(json_string!(documents), @r###" + snapshot!(json_string!(documents), @r#" { "results": [ { @@ -161,6 +161,97 @@ async fn add_remove_user_provided() { "limit": 20, "total": 1 } + "#); +} + +#[actix_rt::test] +async fn user_provide_mismatched_embedding_dimension() { + let server = Server::new().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r#" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "doggo", + "status": "failed", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 0 + }, + "error": { + "message": "Index `doggo`: Invalid vector dimensions: expected: `3`, found: `2`.", + "code": "invalid_vector_dimensions", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "#); + + // FIXME: /!\ Case where number of embeddings is divisor of `dimensions` would still pass + let new_document = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }}, + ]); + let (response, code) = index.add_documents(new_document, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(response.uid()).await.succeeded(); + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + 0.0, + 0.0, + 1.0 + ], + [ + 1.0, + 2.0, + 2.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } "###); } @@ -678,7 +769,7 @@ async fn add_remove_one_vector_4588() { let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .await; - snapshot!(json_string!(documents), @r###" + snapshot!(json_string!(documents), @r#" { "results": [ { @@ -696,5 +787,5 @@ async fn add_remove_one_vector_4588() { "limit": 20, "total": 1 } - "###); + "#); } diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index 8618b4b21..ca860bbff 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -13,7 +13,7 @@ use crate::index::IndexEmbeddingConfig; use crate::progress::Progress; use crate::update::settings::InnerIndexSettings; use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; -use crate::{Error, Index, InternalError, Result}; +use crate::{Error, Index, InternalError, Result, UserError}; pub fn write_to_db( mut writer_receiver: WriterBbqueueReceiver<'_>, @@ -218,7 +218,12 @@ pub fn write_from_bbqueue( arroy_writers.get(&embedder_id).expect("requested a missing embedder"); let mut embeddings = Embeddings::new(*dimensions); let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); - embeddings.append(all_embeddings.to_vec()).unwrap(); + if embeddings.append(all_embeddings.to_vec()).is_err() { + return Err(Error::UserError(UserError::InvalidVectorDimensions { + expected: *dimensions, + found: all_embeddings.len(), + })); + } writer.del_items(wtxn, *dimensions, docid)?; writer.add_items(wtxn, docid, &embeddings)?; }