mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Merge pull request #5449 from vuthanhtung2412/fix-dim-mismatch
Display more detailed error message instead of panic on embeddings dimension mismatch
This commit is contained in:
		| @@ -100,7 +100,7 @@ async fn add_remove_user_provided() { | |||||||
|     let (documents, _code) = index |     let (documents, _code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(json_string!(documents), @r###" |     snapshot!(json_string!(documents), @r#" | ||||||
|     { |     { | ||||||
|       "results": [ |       "results": [ | ||||||
|         { |         { | ||||||
| @@ -134,7 +134,7 @@ async fn add_remove_user_provided() { | |||||||
|       "limit": 20, |       "limit": 20, | ||||||
|       "total": 2 |       "total": 2 | ||||||
|     } |     } | ||||||
|     "###); |     "#); | ||||||
|  |  | ||||||
|     let (value, code) = index.delete_document(0).await; |     let (value, code) = index.delete_document(0).await; | ||||||
|     snapshot!(code, @"202 Accepted"); |     snapshot!(code, @"202 Accepted"); | ||||||
| @@ -143,7 +143,7 @@ async fn add_remove_user_provided() { | |||||||
|     let (documents, _code) = index |     let (documents, _code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(json_string!(documents), @r###" |     snapshot!(json_string!(documents), @r#" | ||||||
|     { |     { | ||||||
|       "results": [ |       "results": [ | ||||||
|         { |         { | ||||||
| @@ -161,6 +161,97 @@ async fn add_remove_user_provided() { | |||||||
|       "limit": 20, |       "limit": 20, | ||||||
|       "total": 1 |       "total": 1 | ||||||
|     } |     } | ||||||
|  |     "#); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn user_provide_mismatched_embedding_dimension() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |               "manual": { | ||||||
|  |                   "source": "userProvided", | ||||||
|  |                   "dimensions": 3, | ||||||
|  |               } | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0] }}, | ||||||
|  |     ]); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r#" | ||||||
|  |     { | ||||||
|  |       "uid": "[uid]", | ||||||
|  |       "batchUid": "[batch_uid]", | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Index `doggo`: Invalid vector dimensions: expected: `3`, found: `2`.", | ||||||
|  |         "code": "invalid_vector_dimensions", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "#); | ||||||
|  |  | ||||||
|  |     // FIXME: /!\ Case where number of embeddings is divisor of `dimensions` would still pass | ||||||
|  |     let new_document = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }}, | ||||||
|  |     ]); | ||||||
|  |     let (response, code) = index.add_documents(new_document, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   0.0, | ||||||
|  |                   0.0, | ||||||
|  |                   1.0 | ||||||
|  |                 ], | ||||||
|  |                 [ | ||||||
|  |                   1.0, | ||||||
|  |                   2.0, | ||||||
|  |                   2.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 1 | ||||||
|  |     } | ||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -678,7 +769,7 @@ async fn add_remove_one_vector_4588() { | |||||||
|     let (documents, _code) = index |     let (documents, _code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(json_string!(documents), @r###" |     snapshot!(json_string!(documents), @r#" | ||||||
|     { |     { | ||||||
|       "results": [ |       "results": [ | ||||||
|         { |         { | ||||||
| @@ -696,5 +787,5 @@ async fn add_remove_one_vector_4588() { | |||||||
|       "limit": 20, |       "limit": 20, | ||||||
|       "total": 1 |       "total": 1 | ||||||
|     } |     } | ||||||
|     "###); |     "#); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -13,7 +13,7 @@ use crate::index::IndexEmbeddingConfig; | |||||||
| use crate::progress::Progress; | use crate::progress::Progress; | ||||||
| use crate::update::settings::InnerIndexSettings; | use crate::update::settings::InnerIndexSettings; | ||||||
| use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; | use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; | ||||||
| use crate::{Error, Index, InternalError, Result}; | use crate::{Error, Index, InternalError, Result, UserError}; | ||||||
|  |  | ||||||
| pub fn write_to_db( | pub fn write_to_db( | ||||||
|     mut writer_receiver: WriterBbqueueReceiver<'_>, |     mut writer_receiver: WriterBbqueueReceiver<'_>, | ||||||
| @@ -218,7 +218,12 @@ pub fn write_from_bbqueue( | |||||||
|                     arroy_writers.get(&embedder_id).expect("requested a missing embedder"); |                     arroy_writers.get(&embedder_id).expect("requested a missing embedder"); | ||||||
|                 let mut embeddings = Embeddings::new(*dimensions); |                 let mut embeddings = Embeddings::new(*dimensions); | ||||||
|                 let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); |                 let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); | ||||||
|                 embeddings.append(all_embeddings.to_vec()).unwrap(); |                 if embeddings.append(all_embeddings.to_vec()).is_err() { | ||||||
|  |                     return Err(Error::UserError(UserError::InvalidVectorDimensions { | ||||||
|  |                         expected: *dimensions, | ||||||
|  |                         found: all_embeddings.len(), | ||||||
|  |                     })); | ||||||
|  |                 } | ||||||
|                 writer.del_items(wtxn, *dimensions, docid)?; |                 writer.del_items(wtxn, *dimensions, docid)?; | ||||||
|                 writer.add_items(wtxn, docid, &embeddings)?; |                 writer.add_items(wtxn, docid, &embeddings)?; | ||||||
|             } |             } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user