mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	always push the user defined vectors in arroy
This commit is contained in:
		| @@ -5173,8 +5173,8 @@ mod tests { | |||||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); |         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); | ||||||
|  |  | ||||||
|         println!("HEEEEERE"); |         println!("HEEEEERE"); | ||||||
|         // handle.advance_one_successful_batch(); |         handle.advance_one_successful_batch(); | ||||||
|         handle.advance_one_failed_batch(); |         // handle.advance_one_failed_batch(); | ||||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds"); |         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds"); | ||||||
|  |  | ||||||
|         { |         { | ||||||
| @@ -5351,9 +5351,9 @@ mod tests { | |||||||
|         // as user provided since we explicitely marked it as NOT user provided. |         // as user provided since we explicitely marked it as NOT user provided. | ||||||
|         snapshot!(format!("{conf:#?}"), @r###" |         snapshot!(format!("{conf:#?}"), @r###" | ||||||
|         [ |         [ | ||||||
|             ( |             IndexEmbeddingConfig { | ||||||
|                 "my_doggo_embedder", |                 name: "my_doggo_embedder", | ||||||
|                 EmbeddingConfig { |                 config: EmbeddingConfig { | ||||||
|                     embedder_options: HuggingFace( |                     embedder_options: HuggingFace( | ||||||
|                         EmbedderOptions { |                         EmbedderOptions { | ||||||
|                             model: "sentence-transformers/all-MiniLM-L6-v2", |                             model: "sentence-transformers/all-MiniLM-L6-v2", | ||||||
| @@ -5367,8 +5367,8 @@ mod tests { | |||||||
|                         template: "{{doc.doggo}}", |                         template: "{{doc.doggo}}", | ||||||
|                     }, |                     }, | ||||||
|                 }, |                 }, | ||||||
|                 RoaringBitmap<[1, 2]>, |                 user_defined: RoaringBitmap<[1, 2]>, | ||||||
|             ), |             }, | ||||||
|         ] |         ] | ||||||
|         "###); |         "###); | ||||||
|         let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); |         let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); | ||||||
|   | |||||||
| @@ -6,10 +6,6 @@ expression: doc | |||||||
|   "doggo": "Intel", |   "doggo": "Intel", | ||||||
|   "breed": "beagle", |   "breed": "beagle", | ||||||
|   "_vectors": { |   "_vectors": { | ||||||
|     "A_fakerest": { |  | ||||||
|       "embeddings": "[vector]", |  | ||||||
|       "userProvided": true |  | ||||||
|     }, |  | ||||||
|     "noise": [ |     "noise": [ | ||||||
|       0.1, |       0.1, | ||||||
|       0.2, |       0.2, | ||||||
| @@ -6,10 +6,6 @@ expression: doc | |||||||
|   "doggo": "kefir", |   "doggo": "kefir", | ||||||
|   "breed": "patou", |   "breed": "patou", | ||||||
|   "_vectors": { |   "_vectors": { | ||||||
|     "A_fakerest": { |  | ||||||
|       "embeddings": "[vector]", |  | ||||||
|       "userProvided": true |  | ||||||
|     }, |  | ||||||
|     "noise": [ |     "noise": [ | ||||||
|       0.1, |       0.1, | ||||||
|       0.2, |       0.2, | ||||||
| @@ -1,4 +0,0 @@ | |||||||
| --- |  | ||||||
| source: index-scheduler/src/lib.rs |  | ||||||
| --- |  | ||||||
| [{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}] |  | ||||||
| @@ -204,7 +204,7 @@ async fn distribution_shift() { | |||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
|     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; |     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; | ||||||
|  |  | ||||||
|     let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}}); |     let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); | ||||||
|     let (response, code) = index.search_post(search.clone()).await; |     let (response, code) = index.search_post(search.clone()).await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###); | ||||||
| @@ -239,20 +239,23 @@ async fn highlighter() { | |||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], |         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||||
|             "hybrid": {"semanticRatio": 0.2}, |             "hybrid": {"semanticRatio": 0.2}, | ||||||
|             "attributesToHighlight": [ |            "retrieveVectors": true, | ||||||
|                      "desc" |            "attributesToHighlight": [ | ||||||
|  |                      "desc", | ||||||
|  |                      "_vectors", | ||||||
|                    ], |                    ], | ||||||
|                    "highlightPreTag": "**BEGIN**", |            "highlightPreTag": "**BEGIN**", | ||||||
|                    "highlightPostTag": "**END**" |            "highlightPostTag": "**END**", | ||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"0"); |     snapshot!(response["semanticHitCount"], @"0"); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], |         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||||
|             "hybrid": {"semanticRatio": 0.8}, |             "hybrid": {"semanticRatio": 0.8}, | ||||||
|  |             "retrieveVectors": true, | ||||||
|             "showRankingScore": true, |             "showRankingScore": true, | ||||||
|             "attributesToHighlight": [ |             "attributesToHighlight": [ | ||||||
|                      "desc" |                      "desc" | ||||||
| @@ -262,13 +265,14 @@ async fn highlighter() { | |||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"3"); |     snapshot!(response["semanticHitCount"], @"3"); | ||||||
|  |  | ||||||
|     // no highlighting on full semantic |     // no highlighting on full semantic | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], |         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||||
|             "hybrid": {"semanticRatio": 1.0}, |             "hybrid": {"semanticRatio": 1.0}, | ||||||
|  |             "retrieveVectors": true, | ||||||
|             "showRankingScore": true, |             "showRankingScore": true, | ||||||
|             "attributesToHighlight": [ |             "attributesToHighlight": [ | ||||||
|                      "desc" |                      "desc" | ||||||
| @@ -278,7 +282,7 @@ async fn highlighter() { | |||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"3"); |     snapshot!(response["semanticHitCount"], @"3"); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -361,7 +365,7 @@ async fn single_document() { | |||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post( |     .search_post( | ||||||
|         json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}), |         json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|     ) |     ) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
| @@ -377,7 +381,7 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // search without query and vector, but with hybrid => still placeholder |     // search without query and vector, but with hybrid => still placeholder | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) |         .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
| @@ -386,7 +390,7 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // same with a different semantic ratio |     // same with a different semantic ratio | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true})) |         .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
| @@ -395,7 +399,7 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // wrong vector dimensions |     // wrong vector dimensions | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) |     .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
| @@ -410,7 +414,7 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // full vector |     // full vector | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) |     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
| @@ -419,7 +423,7 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // full keyword, without a query |     // full keyword, without a query | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true})) |     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
| @@ -428,7 +432,7 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // query + vector, full keyword => keyword |     // query + vector, full keyword => keyword | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true})) |     .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
| @@ -437,7 +441,7 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // query + vector, no hybrid keyword => |     // query + vector, no hybrid keyword => | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true})) |         .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true})) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
| @@ -453,7 +457,7 @@ async fn query_combination() { | |||||||
|     // full vector, without a vector => error |     // full vector, without a vector => error | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post( |         .search_post( | ||||||
|             json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}), |             json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
| @@ -470,7 +474,7 @@ async fn query_combination() { | |||||||
|     // hybrid without a vector => full keyword |     // hybrid without a vector => full keyword | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post( |         .search_post( | ||||||
|             json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}), |             json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1337,6 +1337,7 @@ async fn experimental_feature_vector_store() { | |||||||
|         .search_post(json!({ |         .search_post(json!({ | ||||||
|             "vector": [1.0, 2.0, 3.0], |             "vector": [1.0, 2.0, 3.0], | ||||||
|             "showRankingScore": true, |             "showRankingScore": true, | ||||||
|  |             "retrieveVectors": true, | ||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -78,7 +78,7 @@ async fn basic() { | |||||||
|     index.wait_task(value.uid()).await; |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 143}), |response, code| { |         .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { | ||||||
|             snapshot!(code, @"200 OK"); |             snapshot!(code, @"200 OK"); | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             snapshot!(json_string!(response["hits"]), @r###" | ||||||
|             [ |             [ | ||||||
| @@ -88,9 +88,9 @@ async fn basic() { | |||||||
|                 "id": "522681", |                 "id": "522681", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     0.1, |                     0.10000000149011612, | ||||||
|                     0.6, |                     0.6000000238418579, | ||||||
|                     0.8 |                     0.800000011920929 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
| @@ -100,9 +100,9 @@ async fn basic() { | |||||||
|                 "id": "299537", |                 "id": "299537", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     0.6, |                     0.6000000238418579, | ||||||
|                     0.8, |                     0.800000011920929, | ||||||
|                     -0.2 |                     -0.20000000298023224 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
| @@ -112,9 +112,9 @@ async fn basic() { | |||||||
|                 "id": "166428", |                 "id": "166428", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     0.7, |                     0.699999988079071, | ||||||
|                     0.7, |                     0.699999988079071, | ||||||
|                     -0.4 |                     -0.4000000059604645 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
| @@ -124,8 +124,8 @@ async fn basic() { | |||||||
|                 "id": "287947", |                 "id": "287947", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     0.8, |                     0.800000011920929, | ||||||
|                     0.4, |                     0.4000000059604645, | ||||||
|                     -0.5 |                     -0.5 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
| @@ -136,7 +136,7 @@ async fn basic() { | |||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": "299537"}), |response, code| { |         .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { | ||||||
|             snapshot!(code, @"200 OK"); |             snapshot!(code, @"200 OK"); | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             snapshot!(json_string!(response["hits"]), @r###" | ||||||
|             [ |             [ | ||||||
| @@ -146,9 +146,9 @@ async fn basic() { | |||||||
|                 "id": "166428", |                 "id": "166428", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     0.7, |                     0.699999988079071, | ||||||
|                     0.7, |                     0.699999988079071, | ||||||
|                     -0.4 |                     -0.4000000059604645 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
| @@ -158,8 +158,8 @@ async fn basic() { | |||||||
|                 "id": "287947", |                 "id": "287947", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     0.8, |                     0.800000011920929, | ||||||
|                     0.4, |                     0.4000000059604645, | ||||||
|                     -0.5 |                     -0.5 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
| @@ -170,9 +170,9 @@ async fn basic() { | |||||||
|                 "id": "522681", |                 "id": "522681", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     0.1, |                     0.10000000149011612, | ||||||
|                     0.6, |                     0.6000000238418579, | ||||||
|                     0.8 |                     0.800000011920929 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
| @@ -183,8 +183,8 @@ async fn basic() { | |||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": [ | ||||||
|                     -0.5, |                     -0.5, | ||||||
|                     0.3, |                     0.30000001192092896, | ||||||
|                     0.85 |                     0.8500000238418579 | ||||||
|                   ] |                   ] | ||||||
|                 } |                 } | ||||||
|               } |               } | ||||||
| @@ -456,71 +456,77 @@ async fn filter() { | |||||||
|     index.wait_task(value.uid()).await; |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| { |         .similar( | ||||||
|             snapshot!(code, @"200 OK"); |             json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             |response, code| { | ||||||
|             [ |                 snapshot!(code, @"200 OK"); | ||||||
|               { |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|                 "title": "Captain Marvel", |                 [ | ||||||
|                 "release_year": 2019, |                   { | ||||||
|                 "id": "299537", |                     "title": "Captain Marvel", | ||||||
|                 "_vectors": { |                     "release_year": 2019, | ||||||
|                   "manual": [ |                     "id": "299537", | ||||||
|                     0.6, |                     "_vectors": { | ||||||
|                     0.8, |                       "manual": [ | ||||||
|                     -0.2 |                         0.6000000238418579, | ||||||
|                   ] |                         0.800000011920929, | ||||||
|                 } |                         -0.20000000298023224 | ||||||
|               }, |                       ] | ||||||
|               { |                     } | ||||||
|                 "title": "How to Train Your Dragon: The Hidden World", |                   }, | ||||||
|                 "release_year": 2019, |                   { | ||||||
|                 "id": "166428", |                     "title": "How to Train Your Dragon: The Hidden World", | ||||||
|                 "_vectors": { |                     "release_year": 2019, | ||||||
|                   "manual": [ |                     "id": "166428", | ||||||
|                     0.7, |                     "_vectors": { | ||||||
|                     0.7, |                       "manual": [ | ||||||
|                     -0.4 |                         0.699999988079071, | ||||||
|                   ] |                         0.699999988079071, | ||||||
|                 } |                         -0.4000000059604645 | ||||||
|               }, |                       ] | ||||||
|               { |                     } | ||||||
|                 "title": "Shazam!", |                   }, | ||||||
|                 "release_year": 2019, |                   { | ||||||
|                 "id": "287947", |                     "title": "Shazam!", | ||||||
|                 "_vectors": { |                     "release_year": 2019, | ||||||
|                   "manual": [ |                     "id": "287947", | ||||||
|                     0.8, |                     "_vectors": { | ||||||
|                     0.4, |                       "manual": [ | ||||||
|                     -0.5 |                         0.800000011920929, | ||||||
|                   ] |                         0.4000000059604645, | ||||||
|                 } |                         -0.5 | ||||||
|               } |                       ] | ||||||
|             ] |                     } | ||||||
|             "###); |                   } | ||||||
|         }) |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| { |         .similar( | ||||||
|             snapshot!(code, @"200 OK"); |             json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             |response, code| { | ||||||
|             [ |                 snapshot!(code, @"200 OK"); | ||||||
|               { |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|                 "title": "All Quiet on the Western Front", |                 [ | ||||||
|                 "release_year": 1930, |                   { | ||||||
|                 "id": "143", |                     "title": "All Quiet on the Western Front", | ||||||
|                 "_vectors": { |                     "release_year": 1930, | ||||||
|                   "manual": [ |                     "id": "143", | ||||||
|                     -0.5, |                     "_vectors": { | ||||||
|                     0.3, |                       "manual": [ | ||||||
|                     0.85 |                         -0.5, | ||||||
|                   ] |                         0.30000001192092896, | ||||||
|                 } |                         0.8500000238418579 | ||||||
|               } |                       ] | ||||||
|             ] |                     } | ||||||
|             "###); |                   } | ||||||
|         }) |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -579,24 +585,27 @@ async fn limit_and_offset() { | |||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| { |         .similar( | ||||||
|             snapshot!(code, @"200 OK"); |             json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             |response, code| { | ||||||
|             [ |                 snapshot!(code, @"200 OK"); | ||||||
|               { |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|                 "title": "Captain Marvel", |                 [ | ||||||
|                 "release_year": 2019, |                   { | ||||||
|                 "id": "299537", |                     "title": "Captain Marvel", | ||||||
|                 "_vectors": { |                     "release_year": 2019, | ||||||
|                   "manual": [ |                     "id": "299537", | ||||||
|                     0.6, |                     "_vectors": { | ||||||
|                     0.8, |                       "manual": [ | ||||||
|                     -0.2 |                         0.6000000238418579, | ||||||
|                   ] |                         0.800000011920929, | ||||||
|                 } |                         -0.20000000298023224 | ||||||
|               } |                       ] | ||||||
|             ] |                     } | ||||||
|             "###); |                   } | ||||||
|         }) |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,244 +0,0 @@ | |||||||
| --- |  | ||||||
| source: milli/src/search/new/tests/attribute_fid.rs |  | ||||||
| expression: "format!(\"{document_ids_scores:#?}\")" |  | ||||||
| --- |  | ||||||
| [ |  | ||||||
|     ( |  | ||||||
|         2, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 19, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 91, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         6, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 15, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 81, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         5, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 14, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 79, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         4, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 13, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 77, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         3, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 12, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 83, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         9, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 11, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 75, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         8, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 10, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 79, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         7, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 10, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 73, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         11, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 7, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 77, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         10, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 6, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 81, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         13, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 6, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 81, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         12, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 6, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 78, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         14, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 5, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 75, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         0, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 1, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 91, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
| ] |  | ||||||
| @@ -1,7 +0,0 @@ | |||||||
| --- |  | ||||||
| source: milli/src/index.rs |  | ||||||
| --- |  | ||||||
| age              1      | |  | ||||||
| id               2      | |  | ||||||
| name             2      | |  | ||||||
|  |  | ||||||
| @@ -1,7 +0,0 @@ | |||||||
| --- |  | ||||||
| source: milli/src/index.rs |  | ||||||
| --- |  | ||||||
| age              1      | |  | ||||||
| id               2      | |  | ||||||
| name             2      | |  | ||||||
|  |  | ||||||
| @@ -8,7 +8,6 @@ use std::sync::Arc; | |||||||
|  |  | ||||||
| use bytemuck::cast_slice; | use bytemuck::cast_slice; | ||||||
| use grenad::Writer; | use grenad::Writer; | ||||||
| use itertools::EitherOrBoth; |  | ||||||
| use ordered_float::OrderedFloat; | use ordered_float::OrderedFloat; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
| @@ -50,7 +49,7 @@ enum VectorStateDelta { | |||||||
|     // Note: changing the value of the manually specified vector **should not record** this delta |     // Note: changing the value of the manually specified vector **should not record** this delta | ||||||
|     WasGeneratedNowManual(Vec<Vec<f32>>), |     WasGeneratedNowManual(Vec<Vec<f32>>), | ||||||
|  |  | ||||||
|     ManualDelta(Vec<Vec<f32>>, Vec<Vec<f32>>), |     ManualDelta(Vec<Vec<f32>>), | ||||||
|  |  | ||||||
|     // Add the vector computed from the specified prompt |     // Add the vector computed from the specified prompt | ||||||
|     // Remove any previous vector |     // Remove any previous vector | ||||||
| @@ -59,14 +58,12 @@ enum VectorStateDelta { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl VectorStateDelta { | impl VectorStateDelta { | ||||||
|     fn into_values(self) -> (bool, String, (Vec<Vec<f32>>, Vec<Vec<f32>>)) { |     fn into_values(self) -> (bool, String, Vec<Vec<f32>>) { | ||||||
|         match self { |         match self { | ||||||
|             VectorStateDelta::NoChange => Default::default(), |             VectorStateDelta::NoChange => Default::default(), | ||||||
|             VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()), |             VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()), | ||||||
|             VectorStateDelta::WasGeneratedNowManual(add) => { |             VectorStateDelta::WasGeneratedNowManual(add) => (true, Default::default(), add), | ||||||
|                 (true, Default::default(), (Default::default(), add)) |             VectorStateDelta::ManualDelta(add) => (false, Default::default(), add), | ||||||
|             } |  | ||||||
|             VectorStateDelta::ManualDelta(del, add) => (false, Default::default(), (del, add)), |  | ||||||
|             VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()), |             VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -166,8 +163,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|         // lazily get it when needed |         // lazily get it when needed | ||||||
|         let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() }; |         let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() }; | ||||||
|  |  | ||||||
|         let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid) |         let mut parsed_vectors = ParsedVectorsDiff::new( | ||||||
|             .map_err(|error| error.to_crate_error(document_id().to_string()))?; |             docid, | ||||||
|  |             embedders_configs, | ||||||
|  |             obkv, | ||||||
|  |             old_vectors_fid, | ||||||
|  |             new_vectors_fid, | ||||||
|  |         ) | ||||||
|  |         .map_err(|error| error.to_crate_error(document_id().to_string()))?; | ||||||
|  |  | ||||||
|         for EmbedderVectorExtractor { |         for EmbedderVectorExtractor { | ||||||
|             embedder_name, |             embedder_name, | ||||||
| @@ -182,7 +185,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|         { |         { | ||||||
|             let delta = match parsed_vectors.remove(embedder_name) { |             let delta = match parsed_vectors.remove(embedder_name) { | ||||||
|                 (Some(old), Some(new)) => { |                 (Some(old), Some(new)) => { | ||||||
|                     match (old.is_user_provided(), new.is_user_provided()) { |                     match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) { | ||||||
|                         (true, true) | (false, false) => (), |                         (true, true) | (false, false) => (), | ||||||
|                         (true, false) => { |                         (true, false) => { | ||||||
|                             remove_from_user_defined.insert(docid); |                             remove_from_user_defined.insert(docid); | ||||||
| @@ -193,7 +196,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|                     } |                     } | ||||||
|  |  | ||||||
|                     // no autogeneration |                     // no autogeneration | ||||||
|                     let del_vectors = old.into_array_of_vectors(); |  | ||||||
|                     let add_vectors = new.into_array_of_vectors(); |                     let add_vectors = new.into_array_of_vectors(); | ||||||
|  |  | ||||||
|                     if add_vectors.len() > usize::from(u8::MAX) { |                     if add_vectors.len() > usize::from(u8::MAX) { | ||||||
| @@ -203,15 +205,15 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|                         ))); |                         ))); | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
|                     VectorStateDelta::ManualDelta(del_vectors, add_vectors) |                     VectorStateDelta::ManualDelta(add_vectors) | ||||||
|                 } |                 } | ||||||
|                 (Some(_old), None) => { |                 (Some(old), None) => { | ||||||
|                     // Do we keep this document? |                     // Do we keep this document? | ||||||
|                     let document_is_kept = obkv |                     let document_is_kept = obkv | ||||||
|                         .iter() |                         .iter() | ||||||
|                         .map(|(_, deladd)| KvReaderDelAdd::new(deladd)) |                         .map(|(_, deladd)| KvReaderDelAdd::new(deladd)) | ||||||
|                         .any(|deladd| deladd.get(DelAdd::Addition).is_some()); |                         .any(|deladd| deladd.get(DelAdd::Addition).is_some()); | ||||||
|                     if document_is_kept { |                     if document_is_kept && old.is_some() { | ||||||
|                         remove_from_user_defined.insert(docid); |                         remove_from_user_defined.insert(docid); | ||||||
|                         // becomes autogenerated |                         // becomes autogenerated | ||||||
|                         VectorStateDelta::NowGenerated(prompt.render( |                         VectorStateDelta::NowGenerated(prompt.render( | ||||||
| @@ -219,6 +221,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|                             DelAdd::Addition, |                             DelAdd::Addition, | ||||||
|                             new_fields_ids_map, |                             new_fields_ids_map, | ||||||
|                         )?) |                         )?) | ||||||
|  |                     } else if document_is_kept && old.is_none() { | ||||||
|  |                         VectorStateDelta::NoChange | ||||||
|                     } else { |                     } else { | ||||||
|                         VectorStateDelta::NowRemoved |                         VectorStateDelta::NowRemoved | ||||||
|                     } |                     } | ||||||
| @@ -315,8 +319,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|     Ok(results) |     Ok(results) | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Computes the diff between both Del and Add numbers and | /// We cannot compute the diff between both Del and Add vectors. | ||||||
| /// only inserts the parts that differ in the sorter. | /// We'll push every vector and compute the difference later in TypedChunk. | ||||||
| fn push_vectors_diff( | fn push_vectors_diff( | ||||||
|     remove_vectors_writer: &mut Writer<BufWriter<File>>, |     remove_vectors_writer: &mut Writer<BufWriter<File>>, | ||||||
|     prompts_writer: &mut Writer<BufWriter<File>>, |     prompts_writer: &mut Writer<BufWriter<File>>, | ||||||
| @@ -325,7 +329,7 @@ fn push_vectors_diff( | |||||||
|     delta: VectorStateDelta, |     delta: VectorStateDelta, | ||||||
|     reindex_vectors: bool, |     reindex_vectors: bool, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|     let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values(); |     let (must_remove, prompt, mut add_vectors) = delta.into_values(); | ||||||
|     if must_remove |     if must_remove | ||||||
|     // TODO: the below condition works because we erase the vec database when a embedding setting changes. |     // TODO: the below condition works because we erase the vec database when a embedding setting changes. | ||||||
|     // When vector pipeline will be optimized, this should be removed. |     // When vector pipeline will be optimized, this should be removed. | ||||||
| @@ -340,44 +344,25 @@ fn push_vectors_diff( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     // We sort and dedup the vectors |     // We sort and dedup the vectors | ||||||
|     del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); |  | ||||||
|     add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); |     add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); | ||||||
|     del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); |  | ||||||
|     add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); |     add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); | ||||||
|  |  | ||||||
|     let merged_vectors_iter = |     // let merged_vectors_iter = | ||||||
|         itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add)); |     //     itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add)); | ||||||
|  |  | ||||||
|     // insert vectors into the writer |     // insert vectors into the writer | ||||||
|     for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) { |     for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) { | ||||||
|         // Generate the key by extending the unique index to it. |         // Generate the key by extending the unique index to it. | ||||||
|         key_buffer.truncate(TRUNCATE_SIZE); |         key_buffer.truncate(TRUNCATE_SIZE); | ||||||
|         let index = u16::try_from(i).unwrap(); |         let index = u16::try_from(i).unwrap(); | ||||||
|         key_buffer.extend_from_slice(&index.to_be_bytes()); |         key_buffer.extend_from_slice(&index.to_be_bytes()); | ||||||
|  |  | ||||||
|         match eob { |         // We insert only the Add part of the Obkv to inform | ||||||
|             EitherOrBoth::Both(_, _) => (), // no need to touch anything |         // that we only want to remove all those vectors. | ||||||
|             EitherOrBoth::Left(vector) => { |         let mut obkv = KvWriterDelAdd::memory(); | ||||||
|                 // TODO: the below condition works because we erase the vec database when a embedding setting changes. |         obkv.insert(DelAdd::Addition, cast_slice(&vector))?; | ||||||
|                 // When vector pipeline will be optimized, this should be removed. |         let bytes = obkv.into_inner()?; | ||||||
|                 if !reindex_vectors { |         manual_vectors_writer.insert(&key_buffer, bytes)?; | ||||||
|                     // We insert only the Del part of the Obkv to inform |  | ||||||
|                     // that we only want to remove all those vectors. |  | ||||||
|                     let mut obkv = KvWriterDelAdd::memory(); |  | ||||||
|                     obkv.insert(DelAdd::Deletion, cast_slice(&vector))?; |  | ||||||
|                     let bytes = obkv.into_inner()?; |  | ||||||
|                     manual_vectors_writer.insert(&key_buffer, bytes)?; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             EitherOrBoth::Right(vector) => { |  | ||||||
|                 // We insert only the Add part of the Obkv to inform |  | ||||||
|                 // that we only want to remove all those vectors. |  | ||||||
|                 let mut obkv = KvWriterDelAdd::memory(); |  | ||||||
|                 obkv.insert(DelAdd::Addition, cast_slice(&vector))?; |  | ||||||
|                 let bytes = obkv.into_inner()?; |  | ||||||
|                 manual_vectors_writer.insert(&key_buffer, bytes)?; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     Ok(()) |     Ok(()) | ||||||
|   | |||||||
| @@ -4,8 +4,9 @@ use obkv::KvReader; | |||||||
| use serde_json::{from_slice, Value}; | use serde_json::{from_slice, Value}; | ||||||
|  |  | ||||||
| use super::Embedding; | use super::Embedding; | ||||||
|  | use crate::index::IndexEmbeddingConfig; | ||||||
| use crate::update::del_add::{DelAdd, KvReaderDelAdd}; | use crate::update::del_add::{DelAdd, KvReaderDelAdd}; | ||||||
| use crate::{FieldId, InternalError, UserError}; | use crate::{DocumentId, FieldId, InternalError, UserError}; | ||||||
|  |  | ||||||
| pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors"; | pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors"; | ||||||
|  |  | ||||||
| @@ -42,17 +43,19 @@ pub struct ExplicitVectors { | |||||||
| } | } | ||||||
|  |  | ||||||
| pub struct ParsedVectorsDiff { | pub struct ParsedVectorsDiff { | ||||||
|     pub old: Option<BTreeMap<String, Vectors>>, |     pub old: BTreeMap<String, Option<Vectors>>, | ||||||
|     pub new: Option<BTreeMap<String, Vectors>>, |     pub new: Option<BTreeMap<String, Vectors>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl ParsedVectorsDiff { | impl ParsedVectorsDiff { | ||||||
|     pub fn new( |     pub fn new( | ||||||
|  |         docid: DocumentId, | ||||||
|  |         embedders_configs: &[IndexEmbeddingConfig], | ||||||
|         documents_diff: KvReader<'_, FieldId>, |         documents_diff: KvReader<'_, FieldId>, | ||||||
|         old_vectors_fid: Option<FieldId>, |         old_vectors_fid: Option<FieldId>, | ||||||
|         new_vectors_fid: Option<FieldId>, |         new_vectors_fid: Option<FieldId>, | ||||||
|     ) -> Result<Self, Error> { |     ) -> Result<Self, Error> { | ||||||
|         let old = match old_vectors_fid |         let mut old = match old_vectors_fid | ||||||
|             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) |             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) | ||||||
|             .map(KvReaderDelAdd::new) |             .map(KvReaderDelAdd::new) | ||||||
|             .map(|obkv| to_vector_map(obkv, DelAdd::Deletion)) |             .map(|obkv| to_vector_map(obkv, DelAdd::Deletion)) | ||||||
| @@ -68,7 +71,13 @@ impl ParsedVectorsDiff { | |||||||
|                 return Err(error); |                 return Err(error); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         .flatten(); |         .flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect()); | ||||||
|  |         for embedding_config in embedders_configs { | ||||||
|  |             if embedding_config.user_defined.contains(docid) { | ||||||
|  |                 old.entry(embedding_config.name.to_string()).or_insert(None); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let new = new_vectors_fid |         let new = new_vectors_fid | ||||||
|             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) |             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) | ||||||
|             .map(KvReaderDelAdd::new) |             .map(KvReaderDelAdd::new) | ||||||
| @@ -78,8 +87,9 @@ impl ParsedVectorsDiff { | |||||||
|         Ok(Self { old, new }) |         Ok(Self { old, new }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) { |     /// Return (Some(None), _) in case the vector is user defined and contained in the database. | ||||||
|         let old = self.old.as_mut().and_then(|old| old.remove(embedder_name)); |     pub fn remove(&mut self, embedder_name: &str) -> (Option<Option<Vectors>>, Option<Vectors>) { | ||||||
|  |         let old = self.old.remove(embedder_name); | ||||||
|         let new = self.new.as_mut().and_then(|new| new.remove(embedder_name)); |         let new = self.new.as_mut().and_then(|new| new.remove(embedder_name)); | ||||||
|         (old, new) |         (old, new) | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user