mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	always push the user defined vectors in arroy
This commit is contained in:
		| @@ -5173,8 +5173,8 @@ mod tests { | ||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); | ||||
|  | ||||
|         println!("HEEEEERE"); | ||||
|         // handle.advance_one_successful_batch(); | ||||
|         handle.advance_one_failed_batch(); | ||||
|         handle.advance_one_successful_batch(); | ||||
|         // handle.advance_one_failed_batch(); | ||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds"); | ||||
|  | ||||
|         { | ||||
| @@ -5351,9 +5351,9 @@ mod tests { | ||||
|         // as user provided since we explicitely marked it as NOT user provided. | ||||
|         snapshot!(format!("{conf:#?}"), @r###" | ||||
|         [ | ||||
|             ( | ||||
|                 "my_doggo_embedder", | ||||
|                 EmbeddingConfig { | ||||
|             IndexEmbeddingConfig { | ||||
|                 name: "my_doggo_embedder", | ||||
|                 config: EmbeddingConfig { | ||||
|                     embedder_options: HuggingFace( | ||||
|                         EmbedderOptions { | ||||
|                             model: "sentence-transformers/all-MiniLM-L6-v2", | ||||
| @@ -5367,8 +5367,8 @@ mod tests { | ||||
|                         template: "{{doc.doggo}}", | ||||
|                     }, | ||||
|                 }, | ||||
|                 RoaringBitmap<[1, 2]>, | ||||
|             ), | ||||
|                 user_defined: RoaringBitmap<[1, 2]>, | ||||
|             }, | ||||
|         ] | ||||
|         "###); | ||||
|         let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); | ||||
|   | ||||
| @@ -6,10 +6,6 @@ expression: doc | ||||
|   "doggo": "Intel", | ||||
|   "breed": "beagle", | ||||
|   "_vectors": { | ||||
|     "A_fakerest": { | ||||
|       "embeddings": "[vector]", | ||||
|       "userProvided": true | ||||
|     }, | ||||
|     "noise": [ | ||||
|       0.1, | ||||
|       0.2, | ||||
| @@ -6,10 +6,6 @@ expression: doc | ||||
|   "doggo": "kefir", | ||||
|   "breed": "patou", | ||||
|   "_vectors": { | ||||
|     "A_fakerest": { | ||||
|       "embeddings": "[vector]", | ||||
|       "userProvided": true | ||||
|     }, | ||||
|     "noise": [ | ||||
|       0.1, | ||||
|       0.2, | ||||
| @@ -1,4 +0,0 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| [{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}] | ||||
| @@ -204,7 +204,7 @@ async fn distribution_shift() { | ||||
|     let server = Server::new().await; | ||||
|     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; | ||||
|  | ||||
|     let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}}); | ||||
|     let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); | ||||
|     let (response, code) = index.search_post(search.clone()).await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###); | ||||
| @@ -239,20 +239,23 @@ async fn highlighter() { | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||
|             "hybrid": {"semanticRatio": 0.2}, | ||||
|            "retrieveVectors": true, | ||||
|            "attributesToHighlight": [ | ||||
|                      "desc" | ||||
|                      "desc", | ||||
|                      "_vectors", | ||||
|                    ], | ||||
|            "highlightPreTag": "**BEGIN**", | ||||
|                    "highlightPostTag": "**END**" | ||||
|            "highlightPostTag": "**END**", | ||||
|         })) | ||||
|         .await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###); | ||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###); | ||||
|     snapshot!(response["semanticHitCount"], @"0"); | ||||
|  | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||
|             "hybrid": {"semanticRatio": 0.8}, | ||||
|             "retrieveVectors": true, | ||||
|             "showRankingScore": true, | ||||
|             "attributesToHighlight": [ | ||||
|                      "desc" | ||||
| @@ -262,13 +265,14 @@ async fn highlighter() { | ||||
|         })) | ||||
|         .await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###); | ||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); | ||||
|     snapshot!(response["semanticHitCount"], @"3"); | ||||
|  | ||||
|     // no highlighting on full semantic | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||
|             "hybrid": {"semanticRatio": 1.0}, | ||||
|             "retrieveVectors": true, | ||||
|             "showRankingScore": true, | ||||
|             "attributesToHighlight": [ | ||||
|                      "desc" | ||||
| @@ -278,7 +282,7 @@ async fn highlighter() { | ||||
|         })) | ||||
|         .await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###); | ||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); | ||||
|     snapshot!(response["semanticHitCount"], @"3"); | ||||
| } | ||||
|  | ||||
| @@ -361,7 +365,7 @@ async fn single_document() { | ||||
|  | ||||
|     let (response, code) = index | ||||
|     .search_post( | ||||
|         json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}), | ||||
|         json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), | ||||
|     ) | ||||
|     .await; | ||||
|  | ||||
| @@ -377,7 +381,7 @@ async fn query_combination() { | ||||
|  | ||||
|     // search without query and vector, but with hybrid => still placeholder | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) | ||||
|         .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||
|         .await; | ||||
|  | ||||
|     snapshot!(code, @"200 OK"); | ||||
| @@ -386,7 +390,7 @@ async fn query_combination() { | ||||
|  | ||||
|     // same with a different semantic ratio | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true})) | ||||
|         .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) | ||||
|         .await; | ||||
|  | ||||
|     snapshot!(code, @"200 OK"); | ||||
| @@ -395,7 +399,7 @@ async fn query_combination() { | ||||
|  | ||||
|     // wrong vector dimensions | ||||
|     let (response, code) = index | ||||
|     .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) | ||||
|     .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||
|     .await; | ||||
|  | ||||
|     snapshot!(code, @"400 Bad Request"); | ||||
| @@ -410,7 +414,7 @@ async fn query_combination() { | ||||
|  | ||||
|     // full vector | ||||
|     let (response, code) = index | ||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) | ||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||
|     .await; | ||||
|  | ||||
|     snapshot!(code, @"200 OK"); | ||||
| @@ -419,7 +423,7 @@ async fn query_combination() { | ||||
|  | ||||
|     // full keyword, without a query | ||||
|     let (response, code) = index | ||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true})) | ||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||
|     .await; | ||||
|  | ||||
|     snapshot!(code, @"200 OK"); | ||||
| @@ -428,7 +432,7 @@ async fn query_combination() { | ||||
|  | ||||
|     // query + vector, full keyword => keyword | ||||
|     let (response, code) = index | ||||
|     .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true})) | ||||
|     .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||
|     .await; | ||||
|  | ||||
|     snapshot!(code, @"200 OK"); | ||||
| @@ -437,7 +441,7 @@ async fn query_combination() { | ||||
|  | ||||
|     // query + vector, no hybrid keyword => | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true})) | ||||
|         .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true})) | ||||
|         .await; | ||||
|  | ||||
|     snapshot!(code, @"400 Bad Request"); | ||||
| @@ -453,7 +457,7 @@ async fn query_combination() { | ||||
|     // full vector, without a vector => error | ||||
|     let (response, code) = index | ||||
|         .search_post( | ||||
|             json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}), | ||||
|             json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), | ||||
|         ) | ||||
|         .await; | ||||
|  | ||||
| @@ -470,7 +474,7 @@ async fn query_combination() { | ||||
|     // hybrid without a vector => full keyword | ||||
|     let (response, code) = index | ||||
|         .search_post( | ||||
|             json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}), | ||||
|             json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), | ||||
|         ) | ||||
|         .await; | ||||
|  | ||||
|   | ||||
| @@ -1337,6 +1337,7 @@ async fn experimental_feature_vector_store() { | ||||
|         .search_post(json!({ | ||||
|             "vector": [1.0, 2.0, 3.0], | ||||
|             "showRankingScore": true, | ||||
|             "retrieveVectors": true, | ||||
|         })) | ||||
|         .await; | ||||
|  | ||||
|   | ||||
| @@ -78,7 +78,7 @@ async fn basic() { | ||||
|     index.wait_task(value.uid()).await; | ||||
|  | ||||
|     index | ||||
|         .similar(json!({"id": 143}), |response, code| { | ||||
|         .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { | ||||
|             snapshot!(code, @"200 OK"); | ||||
|             snapshot!(json_string!(response["hits"]), @r###" | ||||
|             [ | ||||
| @@ -88,9 +88,9 @@ async fn basic() { | ||||
|                 "id": "522681", | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     0.1, | ||||
|                     0.6, | ||||
|                     0.8 | ||||
|                     0.10000000149011612, | ||||
|                     0.6000000238418579, | ||||
|                     0.800000011920929 | ||||
|                   ] | ||||
|                 } | ||||
|               }, | ||||
| @@ -100,9 +100,9 @@ async fn basic() { | ||||
|                 "id": "299537", | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     0.6, | ||||
|                     0.8, | ||||
|                     -0.2 | ||||
|                     0.6000000238418579, | ||||
|                     0.800000011920929, | ||||
|                     -0.20000000298023224 | ||||
|                   ] | ||||
|                 } | ||||
|               }, | ||||
| @@ -112,9 +112,9 @@ async fn basic() { | ||||
|                 "id": "166428", | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     0.7, | ||||
|                     0.7, | ||||
|                     -0.4 | ||||
|                     0.699999988079071, | ||||
|                     0.699999988079071, | ||||
|                     -0.4000000059604645 | ||||
|                   ] | ||||
|                 } | ||||
|               }, | ||||
| @@ -124,8 +124,8 @@ async fn basic() { | ||||
|                 "id": "287947", | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     0.8, | ||||
|                     0.4, | ||||
|                     0.800000011920929, | ||||
|                     0.4000000059604645, | ||||
|                     -0.5 | ||||
|                   ] | ||||
|                 } | ||||
| @@ -136,7 +136,7 @@ async fn basic() { | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|         .similar(json!({"id": "299537"}), |response, code| { | ||||
|         .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { | ||||
|             snapshot!(code, @"200 OK"); | ||||
|             snapshot!(json_string!(response["hits"]), @r###" | ||||
|             [ | ||||
| @@ -146,9 +146,9 @@ async fn basic() { | ||||
|                 "id": "166428", | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     0.7, | ||||
|                     0.7, | ||||
|                     -0.4 | ||||
|                     0.699999988079071, | ||||
|                     0.699999988079071, | ||||
|                     -0.4000000059604645 | ||||
|                   ] | ||||
|                 } | ||||
|               }, | ||||
| @@ -158,8 +158,8 @@ async fn basic() { | ||||
|                 "id": "287947", | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     0.8, | ||||
|                     0.4, | ||||
|                     0.800000011920929, | ||||
|                     0.4000000059604645, | ||||
|                     -0.5 | ||||
|                   ] | ||||
|                 } | ||||
| @@ -170,9 +170,9 @@ async fn basic() { | ||||
|                 "id": "522681", | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     0.1, | ||||
|                     0.6, | ||||
|                     0.8 | ||||
|                     0.10000000149011612, | ||||
|                     0.6000000238418579, | ||||
|                     0.800000011920929 | ||||
|                   ] | ||||
|                 } | ||||
|               }, | ||||
| @@ -183,8 +183,8 @@ async fn basic() { | ||||
|                 "_vectors": { | ||||
|                   "manual": [ | ||||
|                     -0.5, | ||||
|                     0.3, | ||||
|                     0.85 | ||||
|                     0.30000001192092896, | ||||
|                     0.8500000238418579 | ||||
|                   ] | ||||
|                 } | ||||
|               } | ||||
| @@ -456,7 +456,9 @@ async fn filter() { | ||||
|     index.wait_task(value.uid()).await; | ||||
|  | ||||
|     index | ||||
|         .similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| { | ||||
|         .similar( | ||||
|             json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), | ||||
|             |response, code| { | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|                 snapshot!(json_string!(response["hits"]), @r###" | ||||
|                 [ | ||||
| @@ -466,9 +468,9 @@ async fn filter() { | ||||
|                     "id": "299537", | ||||
|                     "_vectors": { | ||||
|                       "manual": [ | ||||
|                     0.6, | ||||
|                     0.8, | ||||
|                     -0.2 | ||||
|                         0.6000000238418579, | ||||
|                         0.800000011920929, | ||||
|                         -0.20000000298023224 | ||||
|                       ] | ||||
|                     } | ||||
|                   }, | ||||
| @@ -478,9 +480,9 @@ async fn filter() { | ||||
|                     "id": "166428", | ||||
|                     "_vectors": { | ||||
|                       "manual": [ | ||||
|                     0.7, | ||||
|                     0.7, | ||||
|                     -0.4 | ||||
|                         0.699999988079071, | ||||
|                         0.699999988079071, | ||||
|                         -0.4000000059604645 | ||||
|                       ] | ||||
|                     } | ||||
|                   }, | ||||
| @@ -490,19 +492,22 @@ async fn filter() { | ||||
|                     "id": "287947", | ||||
|                     "_vectors": { | ||||
|                       "manual": [ | ||||
|                     0.8, | ||||
|                     0.4, | ||||
|                         0.800000011920929, | ||||
|                         0.4000000059604645, | ||||
|                         -0.5 | ||||
|                       ] | ||||
|                     } | ||||
|                   } | ||||
|                 ] | ||||
|                 "###); | ||||
|         }) | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|         .similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| { | ||||
|         .similar( | ||||
|             json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), | ||||
|             |response, code| { | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|                 snapshot!(json_string!(response["hits"]), @r###" | ||||
|                 [ | ||||
| @@ -513,14 +518,15 @@ async fn filter() { | ||||
|                     "_vectors": { | ||||
|                       "manual": [ | ||||
|                         -0.5, | ||||
|                     0.3, | ||||
|                     0.85 | ||||
|                         0.30000001192092896, | ||||
|                         0.8500000238418579 | ||||
|                       ] | ||||
|                     } | ||||
|                   } | ||||
|                 ] | ||||
|                 "###); | ||||
|         }) | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
| } | ||||
|  | ||||
| @@ -579,7 +585,9 @@ async fn limit_and_offset() { | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|         .similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| { | ||||
|         .similar( | ||||
|             json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), | ||||
|             |response, code| { | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|                 snapshot!(json_string!(response["hits"]), @r###" | ||||
|                 [ | ||||
| @@ -589,14 +597,15 @@ async fn limit_and_offset() { | ||||
|                     "id": "299537", | ||||
|                     "_vectors": { | ||||
|                       "manual": [ | ||||
|                     0.6, | ||||
|                     0.8, | ||||
|                     -0.2 | ||||
|                         0.6000000238418579, | ||||
|                         0.800000011920929, | ||||
|                         -0.20000000298023224 | ||||
|                       ] | ||||
|                     } | ||||
|                   } | ||||
|                 ] | ||||
|                 "###); | ||||
|         }) | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
| } | ||||
|   | ||||
| @@ -1,244 +0,0 @@ | ||||
| --- | ||||
| source: milli/src/search/new/tests/attribute_fid.rs | ||||
| expression: "format!(\"{document_ids_scores:#?}\")" | ||||
| --- | ||||
| [ | ||||
|     ( | ||||
|         2, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 19, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 91, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         6, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 15, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 81, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         5, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 14, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 79, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         4, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 13, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 77, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         3, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 12, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 83, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         9, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 11, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 75, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         8, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 10, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 79, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         7, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 10, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 73, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         11, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 7, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 77, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         10, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 6, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 81, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         13, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 6, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 81, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         12, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 6, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 78, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         14, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 5, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 75, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         0, | ||||
|         [ | ||||
|             Fid( | ||||
|                 Rank { | ||||
|                     rank: 1, | ||||
|                     max_rank: 19, | ||||
|                 }, | ||||
|             ), | ||||
|             Position( | ||||
|                 Rank { | ||||
|                     rank: 91, | ||||
|                     max_rank: 91, | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
| ] | ||||
| @@ -1,7 +0,0 @@ | ||||
| --- | ||||
| source: milli/src/index.rs | ||||
| --- | ||||
| age              1      | | ||||
| id               2      | | ||||
| name             2      | | ||||
|  | ||||
| @@ -1,7 +0,0 @@ | ||||
| --- | ||||
| source: milli/src/index.rs | ||||
| --- | ||||
| age              1      | | ||||
| id               2      | | ||||
| name             2      | | ||||
|  | ||||
| @@ -8,7 +8,6 @@ use std::sync::Arc; | ||||
|  | ||||
| use bytemuck::cast_slice; | ||||
| use grenad::Writer; | ||||
| use itertools::EitherOrBoth; | ||||
| use ordered_float::OrderedFloat; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde_json::Value; | ||||
| @@ -50,7 +49,7 @@ enum VectorStateDelta { | ||||
|     // Note: changing the value of the manually specified vector **should not record** this delta | ||||
|     WasGeneratedNowManual(Vec<Vec<f32>>), | ||||
|  | ||||
|     ManualDelta(Vec<Vec<f32>>, Vec<Vec<f32>>), | ||||
|     ManualDelta(Vec<Vec<f32>>), | ||||
|  | ||||
|     // Add the vector computed from the specified prompt | ||||
|     // Remove any previous vector | ||||
| @@ -59,14 +58,12 @@ enum VectorStateDelta { | ||||
| } | ||||
|  | ||||
| impl VectorStateDelta { | ||||
|     fn into_values(self) -> (bool, String, (Vec<Vec<f32>>, Vec<Vec<f32>>)) { | ||||
|     fn into_values(self) -> (bool, String, Vec<Vec<f32>>) { | ||||
|         match self { | ||||
|             VectorStateDelta::NoChange => Default::default(), | ||||
|             VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()), | ||||
|             VectorStateDelta::WasGeneratedNowManual(add) => { | ||||
|                 (true, Default::default(), (Default::default(), add)) | ||||
|             } | ||||
|             VectorStateDelta::ManualDelta(del, add) => (false, Default::default(), (del, add)), | ||||
|             VectorStateDelta::WasGeneratedNowManual(add) => (true, Default::default(), add), | ||||
|             VectorStateDelta::ManualDelta(add) => (false, Default::default(), add), | ||||
|             VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()), | ||||
|         } | ||||
|     } | ||||
| @@ -166,7 +163,13 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|         // lazily get it when needed | ||||
|         let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() }; | ||||
|  | ||||
|         let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid) | ||||
|         let mut parsed_vectors = ParsedVectorsDiff::new( | ||||
|             docid, | ||||
|             embedders_configs, | ||||
|             obkv, | ||||
|             old_vectors_fid, | ||||
|             new_vectors_fid, | ||||
|         ) | ||||
|         .map_err(|error| error.to_crate_error(document_id().to_string()))?; | ||||
|  | ||||
|         for EmbedderVectorExtractor { | ||||
| @@ -182,7 +185,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|         { | ||||
|             let delta = match parsed_vectors.remove(embedder_name) { | ||||
|                 (Some(old), Some(new)) => { | ||||
|                     match (old.is_user_provided(), new.is_user_provided()) { | ||||
|                     match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) { | ||||
|                         (true, true) | (false, false) => (), | ||||
|                         (true, false) => { | ||||
|                             remove_from_user_defined.insert(docid); | ||||
| @@ -193,7 +196,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|                     } | ||||
|  | ||||
|                     // no autogeneration | ||||
|                     let del_vectors = old.into_array_of_vectors(); | ||||
|                     let add_vectors = new.into_array_of_vectors(); | ||||
|  | ||||
|                     if add_vectors.len() > usize::from(u8::MAX) { | ||||
| @@ -203,15 +205,15 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|                         ))); | ||||
|                     } | ||||
|  | ||||
|                     VectorStateDelta::ManualDelta(del_vectors, add_vectors) | ||||
|                     VectorStateDelta::ManualDelta(add_vectors) | ||||
|                 } | ||||
|                 (Some(_old), None) => { | ||||
|                 (Some(old), None) => { | ||||
|                     // Do we keep this document? | ||||
|                     let document_is_kept = obkv | ||||
|                         .iter() | ||||
|                         .map(|(_, deladd)| KvReaderDelAdd::new(deladd)) | ||||
|                         .any(|deladd| deladd.get(DelAdd::Addition).is_some()); | ||||
|                     if document_is_kept { | ||||
|                     if document_is_kept && old.is_some() { | ||||
|                         remove_from_user_defined.insert(docid); | ||||
|                         // becomes autogenerated | ||||
|                         VectorStateDelta::NowGenerated(prompt.render( | ||||
| @@ -219,6 +221,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|                             DelAdd::Addition, | ||||
|                             new_fields_ids_map, | ||||
|                         )?) | ||||
|                     } else if document_is_kept && old.is_none() { | ||||
|                         VectorStateDelta::NoChange | ||||
|                     } else { | ||||
|                         VectorStateDelta::NowRemoved | ||||
|                     } | ||||
| @@ -315,8 +319,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|     Ok(results) | ||||
| } | ||||
|  | ||||
| /// Computes the diff between both Del and Add numbers and | ||||
| /// only inserts the parts that differ in the sorter. | ||||
| /// We cannot compute the diff between both Del and Add vectors. | ||||
| /// We'll push every vector and compute the difference later in TypedChunk. | ||||
| fn push_vectors_diff( | ||||
|     remove_vectors_writer: &mut Writer<BufWriter<File>>, | ||||
|     prompts_writer: &mut Writer<BufWriter<File>>, | ||||
| @@ -325,7 +329,7 @@ fn push_vectors_diff( | ||||
|     delta: VectorStateDelta, | ||||
|     reindex_vectors: bool, | ||||
| ) -> Result<()> { | ||||
|     let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values(); | ||||
|     let (must_remove, prompt, mut add_vectors) = delta.into_values(); | ||||
|     if must_remove | ||||
|     // TODO: the below condition works because we erase the vec database when a embedding setting changes. | ||||
|     // When vector pipeline will be optimized, this should be removed. | ||||
| @@ -340,36 +344,19 @@ fn push_vectors_diff( | ||||
|     } | ||||
|  | ||||
|     // We sort and dedup the vectors | ||||
|     del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); | ||||
|     add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); | ||||
|     del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); | ||||
|     add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); | ||||
|  | ||||
|     let merged_vectors_iter = | ||||
|         itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add)); | ||||
|     // let merged_vectors_iter = | ||||
|     //     itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add)); | ||||
|  | ||||
|     // insert vectors into the writer | ||||
|     for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) { | ||||
|     for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) { | ||||
|         // Generate the key by extending the unique index to it. | ||||
|         key_buffer.truncate(TRUNCATE_SIZE); | ||||
|         let index = u16::try_from(i).unwrap(); | ||||
|         key_buffer.extend_from_slice(&index.to_be_bytes()); | ||||
|  | ||||
|         match eob { | ||||
|             EitherOrBoth::Both(_, _) => (), // no need to touch anything | ||||
|             EitherOrBoth::Left(vector) => { | ||||
|                 // TODO: the below condition works because we erase the vec database when a embedding setting changes. | ||||
|                 // When vector pipeline will be optimized, this should be removed. | ||||
|                 if !reindex_vectors { | ||||
|                     // We insert only the Del part of the Obkv to inform | ||||
|                     // that we only want to remove all those vectors. | ||||
|                     let mut obkv = KvWriterDelAdd::memory(); | ||||
|                     obkv.insert(DelAdd::Deletion, cast_slice(&vector))?; | ||||
|                     let bytes = obkv.into_inner()?; | ||||
|                     manual_vectors_writer.insert(&key_buffer, bytes)?; | ||||
|                 } | ||||
|             } | ||||
|             EitherOrBoth::Right(vector) => { | ||||
|         // We insert only the Add part of the Obkv to inform | ||||
|         // that we only want to remove all those vectors. | ||||
|         let mut obkv = KvWriterDelAdd::memory(); | ||||
| @@ -377,8 +364,6 @@ fn push_vectors_diff( | ||||
|         let bytes = obkv.into_inner()?; | ||||
|         manual_vectors_writer.insert(&key_buffer, bytes)?; | ||||
|     } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|   | ||||
| @@ -4,8 +4,9 @@ use obkv::KvReader; | ||||
| use serde_json::{from_slice, Value}; | ||||
|  | ||||
| use super::Embedding; | ||||
| use crate::index::IndexEmbeddingConfig; | ||||
| use crate::update::del_add::{DelAdd, KvReaderDelAdd}; | ||||
| use crate::{FieldId, InternalError, UserError}; | ||||
| use crate::{DocumentId, FieldId, InternalError, UserError}; | ||||
|  | ||||
| pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors"; | ||||
|  | ||||
| @@ -42,17 +43,19 @@ pub struct ExplicitVectors { | ||||
| } | ||||
|  | ||||
| pub struct ParsedVectorsDiff { | ||||
|     pub old: Option<BTreeMap<String, Vectors>>, | ||||
|     pub old: BTreeMap<String, Option<Vectors>>, | ||||
|     pub new: Option<BTreeMap<String, Vectors>>, | ||||
| } | ||||
|  | ||||
| impl ParsedVectorsDiff { | ||||
|     pub fn new( | ||||
|         docid: DocumentId, | ||||
|         embedders_configs: &[IndexEmbeddingConfig], | ||||
|         documents_diff: KvReader<'_, FieldId>, | ||||
|         old_vectors_fid: Option<FieldId>, | ||||
|         new_vectors_fid: Option<FieldId>, | ||||
|     ) -> Result<Self, Error> { | ||||
|         let old = match old_vectors_fid | ||||
|         let mut old = match old_vectors_fid | ||||
|             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) | ||||
|             .map(KvReaderDelAdd::new) | ||||
|             .map(|obkv| to_vector_map(obkv, DelAdd::Deletion)) | ||||
| @@ -68,7 +71,13 @@ impl ParsedVectorsDiff { | ||||
|                 return Err(error); | ||||
|             } | ||||
|         } | ||||
|         .flatten(); | ||||
|         .flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect()); | ||||
|         for embedding_config in embedders_configs { | ||||
|             if embedding_config.user_defined.contains(docid) { | ||||
|                 old.entry(embedding_config.name.to_string()).or_insert(None); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let new = new_vectors_fid | ||||
|             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) | ||||
|             .map(KvReaderDelAdd::new) | ||||
| @@ -78,8 +87,9 @@ impl ParsedVectorsDiff { | ||||
|         Ok(Self { old, new }) | ||||
|     } | ||||
|  | ||||
|     pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) { | ||||
|         let old = self.old.as_mut().and_then(|old| old.remove(embedder_name)); | ||||
|     /// Return (Some(None), _) in case the vector is user defined and contained in the database. | ||||
|     pub fn remove(&mut self, embedder_name: &str) -> (Option<Option<Vectors>>, Option<Vectors>) { | ||||
|         let old = self.old.remove(embedder_name); | ||||
|         let new = self.new.as_mut().and_then(|new| new.remove(embedder_name)); | ||||
|         (old, new) | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user