mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge #4881
4881: Infer locales from index settings r=curquiza a=ManyTheFish # Pull Request ## Related issue Fixes #4828 Fixes #4816 ## What does this PR do? - Add some test using `AttributesToSearchOn` - Make the search infer the language based on the index settings when the `locales` filed is not precise CI is now working: https://github.com/meilisearch/meilisearch/actions/runs/10490050545/job/29055955667 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -386,12 +386,39 @@ async fn force_locales() { | ||||
|             |response, code| { | ||||
|                 snapshot!(response, @r###" | ||||
|                 { | ||||
|                   "hits": [], | ||||
|                   "hits": [ | ||||
|                     { | ||||
|                       "name_zh": "进击的巨人", | ||||
|                       "author_zh": "諫山創", | ||||
|                       "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", | ||||
|                       "id": 853, | ||||
|                       "_vectors": { | ||||
|                         "manual": [ | ||||
|                           1.0, | ||||
|                           2.0, | ||||
|                           3.0 | ||||
|                         ] | ||||
|                       }, | ||||
|                       "_formatted": { | ||||
|                         "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>", | ||||
|                         "author_zh": "諫山創", | ||||
|                         "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。", | ||||
|                         "id": "853", | ||||
|                         "_vectors": { | ||||
|                           "manual": [ | ||||
|                             "1.0", | ||||
|                             "2.0", | ||||
|                             "3.0" | ||||
|                           ] | ||||
|                         } | ||||
|                       } | ||||
|                     } | ||||
|                   ], | ||||
|                   "query": "\"进击的巨人\"", | ||||
|                   "processingTimeMs": "[duration]", | ||||
|                   "limit": 20, | ||||
|                   "offset": 0, | ||||
|                   "estimatedTotalHits": 0 | ||||
|                   "estimatedTotalHits": 1 | ||||
|                 } | ||||
|                 "###); | ||||
|                 snapshot!(code, @"200 OK"); | ||||
| @@ -483,12 +510,39 @@ async fn force_locales_with_pattern() { | ||||
|             |response, code| { | ||||
|                 snapshot!(response, @r###" | ||||
|                 { | ||||
|                   "hits": [], | ||||
|                   "hits": [ | ||||
|                     { | ||||
|                       "name_zh": "进击的巨人", | ||||
|                       "author_zh": "諫山創", | ||||
|                       "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", | ||||
|                       "id": 853, | ||||
|                       "_vectors": { | ||||
|                         "manual": [ | ||||
|                           1.0, | ||||
|                           2.0, | ||||
|                           3.0 | ||||
|                         ] | ||||
|                       }, | ||||
|                       "_formatted": { | ||||
|                         "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>", | ||||
|                         "author_zh": "諫山創", | ||||
|                         "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。", | ||||
|                         "id": "853", | ||||
|                         "_vectors": { | ||||
|                           "manual": [ | ||||
|                             "1.0", | ||||
|                             "2.0", | ||||
|                             "3.0" | ||||
|                           ] | ||||
|                         } | ||||
|                       } | ||||
|                     } | ||||
|                   ], | ||||
|                   "query": "\"进击的巨人\"", | ||||
|                   "processingTimeMs": "[duration]", | ||||
|                   "limit": 20, | ||||
|                   "offset": 0, | ||||
|                   "estimatedTotalHits": 0 | ||||
|                   "estimatedTotalHits": 1 | ||||
|                 } | ||||
|                 "###); | ||||
|                 snapshot!(code, @"200 OK"); | ||||
| @@ -761,6 +815,275 @@ async fn force_different_locales_with_pattern() { | ||||
|         .await; | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn auto_infer_locales_at_search_with_attributes_to_search_on() { | ||||
|     let server = Server::new().await; | ||||
|  | ||||
|     let index = server.index("test"); | ||||
|     let documents = DOCUMENTS.clone(); | ||||
|     let (response, _) = index | ||||
|         .update_settings( | ||||
|             json!({ | ||||
|                 "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"], | ||||
|                 "localizedAttributes": [ | ||||
|                     // force japanese | ||||
|                     {"attributePatterns": ["*_zh"], "locales": ["jpn"]}, | ||||
|                     // force chinese | ||||
|                     {"attributePatterns": ["*_ja"], "locales": ["cmn"]}, | ||||
|                     // any language | ||||
|                     {"attributePatterns": ["*_en"], "locales": []} | ||||
|                 ] | ||||
|             }), | ||||
|         ) | ||||
|         .await; | ||||
|     snapshot!(response, @r###" | ||||
|     { | ||||
|       "taskUid": 0, | ||||
|       "indexUid": "test", | ||||
|       "status": "enqueued", | ||||
|       "type": "settingsUpdate", | ||||
|       "enqueuedAt": "[date]" | ||||
|     } | ||||
|     "###); | ||||
|     index.add_documents(documents, None).await; | ||||
|     index.wait_task(1).await; | ||||
|  | ||||
|     // auto infer any language | ||||
|     index | ||||
|         .search( | ||||
|             json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), | ||||
|             |response, code| { | ||||
|                 snapshot!(response, @r###" | ||||
|                 { | ||||
|                   "hits": [], | ||||
|                   "query": "\"进击的巨人\"", | ||||
|                   "processingTimeMs": "[duration]", | ||||
|                   "limit": 20, | ||||
|                   "offset": 0, | ||||
|                   "estimatedTotalHits": 0 | ||||
|                 } | ||||
|                 "###); | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
|  | ||||
|     // should infer chinese | ||||
|     index | ||||
|             .search( | ||||
|                 json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"], "attributesToSearchOn": ["name_zh", "description_zh"]}), | ||||
|                 |response, code| { | ||||
|                     snapshot!(response, @r###" | ||||
|                     { | ||||
|                       "hits": [ | ||||
|                         { | ||||
|                           "name_zh": "进击的巨人", | ||||
|                           "author_zh": "諫山創", | ||||
|                           "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", | ||||
|                           "id": 853, | ||||
|                           "_vectors": { | ||||
|                             "manual": [ | ||||
|                               1.0, | ||||
|                               2.0, | ||||
|                               3.0 | ||||
|                             ] | ||||
|                           }, | ||||
|                           "_formatted": { | ||||
|                             "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>", | ||||
|                             "author_zh": "諫山創", | ||||
|                             "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。", | ||||
|                             "id": "853", | ||||
|                             "_vectors": { | ||||
|                               "manual": [ | ||||
|                                 "1.0", | ||||
|                                 "2.0", | ||||
|                                 "3.0" | ||||
|                               ] | ||||
|                             } | ||||
|                           } | ||||
|                         } | ||||
|                       ], | ||||
|                       "query": "\"进击的巨人\"", | ||||
|                       "processingTimeMs": "[duration]", | ||||
|                       "limit": 20, | ||||
|                       "offset": 0, | ||||
|                       "estimatedTotalHits": 1 | ||||
|                     } | ||||
|                     "###); | ||||
|                     snapshot!(code, @"200 OK"); | ||||
|                 }, | ||||
|             ) | ||||
|             .await; | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn auto_infer_locales_at_search() { | ||||
|     let server = Server::new().await; | ||||
|  | ||||
|     let index = server.index("test"); | ||||
|     let documents = DOCUMENTS.clone(); | ||||
|     let (response, _) = index | ||||
|         .update_settings( | ||||
|             json!({ | ||||
|                 "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"], | ||||
|                 "localizedAttributes": [ | ||||
|                     // force japanese | ||||
|                     {"attributePatterns": ["*"], "locales": ["jpn"]}, | ||||
|                 ] | ||||
|             }), | ||||
|         ) | ||||
|         .await; | ||||
|     snapshot!(response, @r###" | ||||
|     { | ||||
|       "taskUid": 0, | ||||
|       "indexUid": "test", | ||||
|       "status": "enqueued", | ||||
|       "type": "settingsUpdate", | ||||
|       "enqueuedAt": "[date]" | ||||
|     } | ||||
|     "###); | ||||
|     index.add_documents(documents, None).await; | ||||
|     index.wait_task(1).await; | ||||
|  | ||||
|     index | ||||
|         .search( | ||||
|             json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), | ||||
|             |response, code| { | ||||
|                 snapshot!(response, @r###" | ||||
|                 { | ||||
|                   "hits": [ | ||||
|                     { | ||||
|                       "name_zh": "进击的巨人", | ||||
|                       "author_zh": "諫山創", | ||||
|                       "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", | ||||
|                       "id": 853, | ||||
|                       "_vectors": { | ||||
|                         "manual": [ | ||||
|                           1.0, | ||||
|                           2.0, | ||||
|                           3.0 | ||||
|                         ] | ||||
|                       }, | ||||
|                       "_formatted": { | ||||
|                         "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>", | ||||
|                         "author_zh": "諫山創", | ||||
|                         "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。", | ||||
|                         "id": "853", | ||||
|                         "_vectors": { | ||||
|                           "manual": [ | ||||
|                             "1.0", | ||||
|                             "2.0", | ||||
|                             "3.0" | ||||
|                           ] | ||||
|                         } | ||||
|                       } | ||||
|                     } | ||||
|                   ], | ||||
|                   "query": "\"进击的巨人\"", | ||||
|                   "processingTimeMs": "[duration]", | ||||
|                   "limit": 20, | ||||
|                   "offset": 0, | ||||
|                   "estimatedTotalHits": 1 | ||||
|                 } | ||||
|                 "###); | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|             .search( | ||||
|                 json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), | ||||
|                 |response, code| { | ||||
|                     snapshot!(response, @r###" | ||||
|                     { | ||||
|                       "hits": [ | ||||
|                         { | ||||
|                           "name_zh": "进击的巨人", | ||||
|                           "author_zh": "諫山創", | ||||
|                           "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", | ||||
|                           "id": 853, | ||||
|                           "_vectors": { | ||||
|                             "manual": [ | ||||
|                               1.0, | ||||
|                               2.0, | ||||
|                               3.0 | ||||
|                             ] | ||||
|                           }, | ||||
|                           "_formatted": { | ||||
|                             "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>", | ||||
|                             "author_zh": "諫山創", | ||||
|                             "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。", | ||||
|                             "id": "853", | ||||
|                             "_vectors": { | ||||
|                               "manual": [ | ||||
|                                 "1.0", | ||||
|                                 "2.0", | ||||
|                                 "3.0" | ||||
|                               ] | ||||
|                             } | ||||
|                           } | ||||
|                         } | ||||
|                       ], | ||||
|                       "query": "\"进击的巨人\"", | ||||
|                       "processingTimeMs": "[duration]", | ||||
|                       "limit": 20, | ||||
|                       "offset": 0, | ||||
|                       "estimatedTotalHits": 1 | ||||
|                     } | ||||
|                     "###); | ||||
|                     snapshot!(code, @"200 OK"); | ||||
|                 }, | ||||
|             ) | ||||
|             .await; | ||||
|  | ||||
|     index | ||||
|         .search( | ||||
|             json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), | ||||
|             |response, code| { | ||||
|                 snapshot!(response, @r###" | ||||
|                 { | ||||
|                   "hits": [ | ||||
|                     { | ||||
|                       "name_zh": "进击的巨人", | ||||
|                       "author_zh": "諫山創", | ||||
|                       "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", | ||||
|                       "id": 853, | ||||
|                       "_vectors": { | ||||
|                         "manual": [ | ||||
|                           1.0, | ||||
|                           2.0, | ||||
|                           3.0 | ||||
|                         ] | ||||
|                       }, | ||||
|                       "_formatted": { | ||||
|                         "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>", | ||||
|                         "author_zh": "諫山創", | ||||
|                         "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。", | ||||
|                         "id": "853", | ||||
|                         "_vectors": { | ||||
|                           "manual": [ | ||||
|                             "1.0", | ||||
|                             "2.0", | ||||
|                             "3.0" | ||||
|                           ] | ||||
|                         } | ||||
|                       } | ||||
|                     } | ||||
|                   ], | ||||
|                   "query": "\"进击的巨人\"", | ||||
|                   "processingTimeMs": "[duration]", | ||||
|                   "limit": 20, | ||||
|                   "offset": 0, | ||||
|                   "estimatedTotalHits": 1 | ||||
|                 } | ||||
|                 "###); | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn force_different_locales_with_pattern_nested() { | ||||
|     let server = Server::new().await; | ||||
|   | ||||
| @@ -7,6 +7,7 @@ mod facet_search; | ||||
| mod formatted; | ||||
| mod geo; | ||||
| mod hybrid; | ||||
| #[cfg(not(feature = "chinese-pinyin"))] | ||||
| mod locales; | ||||
| mod matching_strategy; | ||||
| mod multi; | ||||
| @@ -392,6 +393,7 @@ async fn negative_special_cases_search() { | ||||
| } | ||||
|  | ||||
| #[cfg(feature = "default")] | ||||
| #[cfg(not(feature = "chinese-pinyin"))] | ||||
| #[actix_rt::test] | ||||
| async fn test_kanji_language_detection() { | ||||
|     let server = Server::new().await; | ||||
|   | ||||
| @@ -90,6 +90,21 @@ impl LocalizedFieldIds { | ||||
|     pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> { | ||||
|         self.field_id_to_locales.get(&fields_id).map(Vec::as_slice) | ||||
|     } | ||||
|  | ||||
|     pub fn all_locales(&self) -> Vec<Language> { | ||||
|         let mut locales = Vec::new(); | ||||
|         for field_locales in self.field_id_to_locales.values() { | ||||
|             if !field_locales.is_empty() { | ||||
|                 locales.extend(field_locales); | ||||
|             } else { | ||||
|                 // If a field has no locales, we consider it as not localized | ||||
|                 return Vec::new(); | ||||
|             } | ||||
|         } | ||||
|         locales.sort(); | ||||
|         locales.dedup(); | ||||
|         locales | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
|   | ||||
| @@ -360,6 +360,7 @@ mod test { | ||||
|     use super::*; | ||||
|  | ||||
|     #[cfg(feature = "japanese")] | ||||
|     #[cfg(not(feature = "chinese-pinyin"))] | ||||
|     #[test] | ||||
|     fn test_kanji_language_detection() { | ||||
|         use crate::index::tests::TempIndex; | ||||
|   | ||||
| @@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy; | ||||
| use self::graph_based_ranking_rule::Words; | ||||
| use self::interner::Interned; | ||||
| use self::vector_sort::VectorSort; | ||||
| use crate::localized_attributes_rules::LocalizedFieldIds; | ||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||
| use crate::search::new::distinct::apply_distinct_rule; | ||||
| use crate::vector::Embedder; | ||||
| @@ -671,9 +672,44 @@ pub fn execute_search( | ||||
|             tokbuilder.words_dict(dictionary); | ||||
|         } | ||||
|  | ||||
|         if let Some(locales) = locales { | ||||
|             tokbuilder.allow_list(locales); | ||||
|         } | ||||
|         let db_locales; | ||||
|         match locales { | ||||
|             Some(locales) => { | ||||
|                 if !locales.is_empty() { | ||||
|                     tokbuilder.allow_list(locales); | ||||
|                 } | ||||
|             } | ||||
|             None => { | ||||
|                 // If no locales are specified, we use the locales specified in the localized attributes rules | ||||
|                 let localized_attributes_rules = ctx.index.localized_attributes_rules(ctx.txn)?; | ||||
|                 let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; | ||||
|                 let searchable_fields = ctx.index.searchable_fields_ids(ctx.txn)?; | ||||
|  | ||||
|                 let localized_fields = match &ctx.restricted_fids { | ||||
|                     // if AttributeToSearchOn is set, use the restricted list of ids | ||||
|                     Some(restricted_fids) => { | ||||
|                         let iter = restricted_fids | ||||
|                             .exact | ||||
|                             .iter() | ||||
|                             .chain(restricted_fids.tolerant.iter()) | ||||
|                             .map(|(fid, _)| *fid); | ||||
|  | ||||
|                         LocalizedFieldIds::new(&localized_attributes_rules, &fields_ids_map, iter) | ||||
|                     } | ||||
|                     // Otherwise use the full list of ids coming from the index searchable fields | ||||
|                     None => LocalizedFieldIds::new( | ||||
|                         &localized_attributes_rules, | ||||
|                         &fields_ids_map, | ||||
|                         searchable_fields.into_iter(), | ||||
|                     ), | ||||
|                 }; | ||||
|  | ||||
|                 db_locales = localized_fields.all_locales(); | ||||
|                 if !db_locales.is_empty() { | ||||
|                     tokbuilder.allow_list(&db_locales); | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         let tokenizer = tokbuilder.build(); | ||||
|         drop(entered); | ||||
|   | ||||
| @@ -6,6 +6,7 @@ pub mod exactness; | ||||
| pub mod geo_sort; | ||||
| pub mod integration; | ||||
| #[cfg(feature = "all-tokenizations")] | ||||
| #[cfg(not(feature = "chinese-pinyin"))] | ||||
| pub mod language; | ||||
| pub mod ngram_split_words; | ||||
| pub mod proximity; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user