mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Implement localized search
This commit is contained in:
		
				
					committed by
					
						 Louis Dureuil
						Louis Dureuil
					
				
			
			
				
	
			
			
			
						parent
						
							d82f8fd904
						
					
				
				
					commit
					90c0a6db7d
				
			| @@ -256,6 +256,7 @@ InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ; | |||||||
| InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ; | InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ; | InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchSemanticRatio            , InvalidRequest       , BAD_REQUEST ; | InvalidSearchSemanticRatio            , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidSearchLocales                  , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ; | InvalidFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSimilarId                      , InvalidRequest       , BAD_REQUEST ; | InvalidSimilarId                      , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchFilter                   , InvalidRequest       , BAD_REQUEST ; | InvalidSearchFilter                   , InvalidRequest       , BAD_REQUEST ; | ||||||
|   | |||||||
| @@ -7,6 +7,7 @@ pub mod features; | |||||||
| pub mod index_uid; | pub mod index_uid; | ||||||
| pub mod index_uid_pattern; | pub mod index_uid_pattern; | ||||||
| pub mod keys; | pub mod keys; | ||||||
|  | pub mod locales; | ||||||
| pub mod settings; | pub mod settings; | ||||||
| pub mod star_or; | pub mod star_or; | ||||||
| pub mod task_view; | pub mod task_view; | ||||||
|   | |||||||
							
								
								
									
										132
									
								
								meilisearch-types/src/locales.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								meilisearch-types/src/locales.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,132 @@ | |||||||
|  | use deserr::Deserr; | ||||||
|  | use serde::{Deserialize, Serialize}; | ||||||
|  | use serde_json::json; | ||||||
|  |  | ||||||
|  | use milli::LocalizedAttributesRule; | ||||||
|  |  | ||||||
|  | /// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language. | ||||||
|  | /// | ||||||
|  | /// this enum implements `Deserr` in order to be used in the API. | ||||||
|  | macro_rules! make_locale { | ||||||
|  |  | ||||||
|  |     ($($language:tt), +) => { | ||||||
|  |         #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] | ||||||
|  |         #[deserr(rename_all = camelCase)] | ||||||
|  |         #[serde(rename_all = "camelCase")] | ||||||
|  |         pub enum Locale { | ||||||
|  |             $($language),+, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         impl From<milli::tokenizer::Language> for Locale { | ||||||
|  |             fn from(other: milli::tokenizer::Language) -> Locale { | ||||||
|  |                 match other { | ||||||
|  |                     $(milli::tokenizer::Language::$language => Locale::$language), + | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         impl From<Locale> for milli::tokenizer::Language { | ||||||
|  |             fn from(other: Locale) -> milli::tokenizer::Language { | ||||||
|  |                 match other { | ||||||
|  |                     $(Locale::$language => milli::tokenizer::Language::$language), +, | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         #[derive(Debug)] | ||||||
|  |         pub struct LocaleFormatError { | ||||||
|  |             pub invalid_locale: String, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         impl std::fmt::Display for LocaleFormatError { | ||||||
|  |             fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |                 let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", "); | ||||||
|  |                 write!(f, "Unknown value `{}`, expected one of {}", self.invalid_locale, valid_locales) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         impl std::error::Error for LocaleFormatError {} | ||||||
|  |  | ||||||
|  |         impl std::str::FromStr for Locale { | ||||||
|  |             type Err = LocaleFormatError; | ||||||
|  |  | ||||||
|  |             fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |                 milli::tokenizer::Language::from_code(s).map(Self::from).ok_or(LocaleFormatError { | ||||||
|  |                     invalid_locale: s.to_string(), | ||||||
|  |                 }) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | make_locale! { | ||||||
|  |     Epo, | ||||||
|  |     Eng, | ||||||
|  |     Rus, | ||||||
|  |     Cmn, | ||||||
|  |     Spa, | ||||||
|  |     Por, | ||||||
|  |     Ita, | ||||||
|  |     Ben, | ||||||
|  |     Fra, | ||||||
|  |     Deu, | ||||||
|  |     Ukr, | ||||||
|  |     Kat, | ||||||
|  |     Ara, | ||||||
|  |     Hin, | ||||||
|  |     Jpn, | ||||||
|  |     Heb, | ||||||
|  |     Yid, | ||||||
|  |     Pol, | ||||||
|  |     Amh, | ||||||
|  |     Jav, | ||||||
|  |     Kor, | ||||||
|  |     Nob, | ||||||
|  |     Dan, | ||||||
|  |     Swe, | ||||||
|  |     Fin, | ||||||
|  |     Tur, | ||||||
|  |     Nld, | ||||||
|  |     Hun, | ||||||
|  |     Ces, | ||||||
|  |     Ell, | ||||||
|  |     Bul, | ||||||
|  |     Bel, | ||||||
|  |     Mar, | ||||||
|  |     Kan, | ||||||
|  |     Ron, | ||||||
|  |     Slv, | ||||||
|  |     Hrv, | ||||||
|  |     Srp, | ||||||
|  |     Mkd, | ||||||
|  |     Lit, | ||||||
|  |     Lav, | ||||||
|  |     Est, | ||||||
|  |     Tam, | ||||||
|  |     Vie, | ||||||
|  |     Urd, | ||||||
|  |     Tha, | ||||||
|  |     Guj, | ||||||
|  |     Uzb, | ||||||
|  |     Pan, | ||||||
|  |     Aze, | ||||||
|  |     Ind, | ||||||
|  |     Tel, | ||||||
|  |     Pes, | ||||||
|  |     Mal, | ||||||
|  |     Ori, | ||||||
|  |     Mya, | ||||||
|  |     Nep, | ||||||
|  |     Sin, | ||||||
|  |     Khm, | ||||||
|  |     Tuk, | ||||||
|  |     Aka, | ||||||
|  |     Zul, | ||||||
|  |     Sna, | ||||||
|  |     Afr, | ||||||
|  |     Lat, | ||||||
|  |     Slk, | ||||||
|  |     Cat, | ||||||
|  |     Tgl, | ||||||
|  |     Hye | ||||||
|  | } | ||||||
| @@ -1,4 +1,4 @@ | |||||||
| use std::collections::{BinaryHeap, HashMap, HashSet}; | use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet}; | ||||||
| use std::fs; | use std::fs; | ||||||
| use std::mem::take; | use std::mem::take; | ||||||
| use std::path::{Path, PathBuf}; | use std::path::{Path, PathBuf}; | ||||||
| @@ -10,6 +10,7 @@ use actix_web::HttpRequest; | |||||||
| use byte_unit::Byte; | use byte_unit::Byte; | ||||||
| use index_scheduler::IndexScheduler; | use index_scheduler::IndexScheduler; | ||||||
| use meilisearch_auth::{AuthController, AuthFilter}; | use meilisearch_auth::{AuthController, AuthFilter}; | ||||||
|  | use meilisearch_types::locales::Locale; | ||||||
| use meilisearch_types::InstanceUid; | use meilisearch_types::InstanceUid; | ||||||
| use once_cell::sync::Lazy; | use once_cell::sync::Lazy; | ||||||
| use regex::Regex; | use regex::Regex; | ||||||
| @@ -653,6 +654,9 @@ pub struct SearchAggregator { | |||||||
|     // every time a search is done, we increment the counter linked to the used settings |     // every time a search is done, we increment the counter linked to the used settings | ||||||
|     matching_strategy: HashMap<String, usize>, |     matching_strategy: HashMap<String, usize>, | ||||||
|  |  | ||||||
|  |     // List of the unique Locales passed as parameter | ||||||
|  |     locales: BTreeSet<Locale>, | ||||||
|  |  | ||||||
|     // pagination |     // pagination | ||||||
|     max_limit: usize, |     max_limit: usize, | ||||||
|     max_offset: usize, |     max_offset: usize, | ||||||
| @@ -707,6 +711,7 @@ impl SearchAggregator { | |||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|         } = query; |         } = query; | ||||||
|  |  | ||||||
|         let mut ret = Self::default(); |         let mut ret = Self::default(); | ||||||
| @@ -774,6 +779,10 @@ impl SearchAggregator { | |||||||
|  |  | ||||||
|         ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1); |         ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1); | ||||||
|  |  | ||||||
|  |         if let Some(locales) = locales { | ||||||
|  |             ret.locales = locales.into_iter().copied().collect(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); |         ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); | ||||||
|         ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); |         ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); | ||||||
|         ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER(); |         ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER(); | ||||||
| @@ -859,6 +868,7 @@ impl SearchAggregator { | |||||||
|             total_degraded, |             total_degraded, | ||||||
|             total_used_negative_operator, |             total_used_negative_operator, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             ref mut locales, | ||||||
|         } = other; |         } = other; | ||||||
|  |  | ||||||
|         if self.timestamp.is_none() { |         if self.timestamp.is_none() { | ||||||
| @@ -947,6 +957,9 @@ impl SearchAggregator { | |||||||
|         self.show_ranking_score |= show_ranking_score; |         self.show_ranking_score |= show_ranking_score; | ||||||
|         self.show_ranking_score_details |= show_ranking_score_details; |         self.show_ranking_score_details |= show_ranking_score_details; | ||||||
|         self.ranking_score_threshold |= ranking_score_threshold; |         self.ranking_score_threshold |= ranking_score_threshold; | ||||||
|  |  | ||||||
|  |         // locales | ||||||
|  |         self.locales.append(locales); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { |     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { | ||||||
| @@ -991,6 +1004,7 @@ impl SearchAggregator { | |||||||
|             total_degraded, |             total_degraded, | ||||||
|             total_used_negative_operator, |             total_used_negative_operator, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
|         if total_received == 0 { |         if total_received == 0 { | ||||||
| @@ -1060,6 +1074,7 @@ impl SearchAggregator { | |||||||
|                 "matching_strategy": { |                 "matching_strategy": { | ||||||
|                     "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), |                     "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), | ||||||
|                 }, |                 }, | ||||||
|  |                 "locales": locales, | ||||||
|                 "scoring": { |                 "scoring": { | ||||||
|                     "show_ranking_score": show_ranking_score, |                     "show_ranking_score": show_ranking_score, | ||||||
|                     "show_ranking_score_details": show_ranking_score_details, |                     "show_ranking_score_details": show_ranking_score_details, | ||||||
| @@ -1150,6 +1165,7 @@ impl MultiSearchAggregator { | |||||||
|                     attributes_to_search_on: _, |                     attributes_to_search_on: _, | ||||||
|                     hybrid: _, |                     hybrid: _, | ||||||
|                     ranking_score_threshold: _, |                     ranking_score_threshold: _, | ||||||
|  |                     locales: _, | ||||||
|                 } = query; |                 } = query; | ||||||
|  |  | ||||||
|                 index_uid.as_str() |                 index_uid.as_str() | ||||||
| @@ -1307,6 +1323,7 @@ impl FacetSearchAggregator { | |||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|         } = query; |         } = query; | ||||||
|  |  | ||||||
|         let mut ret = Self::default(); |         let mut ret = Self::default(); | ||||||
| @@ -1322,7 +1339,8 @@ impl FacetSearchAggregator { | |||||||
|             || *matching_strategy != MatchingStrategy::default() |             || *matching_strategy != MatchingStrategy::default() | ||||||
|             || attributes_to_search_on.is_some() |             || attributes_to_search_on.is_some() | ||||||
|             || hybrid.is_some() |             || hybrid.is_some() | ||||||
|             || ranking_score_threshold.is_some(); |             || ranking_score_threshold.is_some() | ||||||
|  |             || locales.is_some(); | ||||||
|  |  | ||||||
|         ret |         ret | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -7,6 +7,7 @@ use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; | |||||||
| use meilisearch_types::error::deserr_codes::*; | use meilisearch_types::error::deserr_codes::*; | ||||||
| use meilisearch_types::error::ResponseError; | use meilisearch_types::error::ResponseError; | ||||||
| use meilisearch_types::index_uid::IndexUid; | use meilisearch_types::index_uid::IndexUid; | ||||||
|  | use meilisearch_types::locales::Locale; | ||||||
| use meilisearch_types::milli; | use meilisearch_types::milli; | ||||||
| use meilisearch_types::serde_cs::vec::CS; | use meilisearch_types::serde_cs::vec::CS; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
| @@ -89,6 +90,8 @@ pub struct SearchQueryGet { | |||||||
|     pub hybrid_semantic_ratio: Option<SemanticRatioGet>, |     pub hybrid_semantic_ratio: Option<SemanticRatioGet>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)] | ||||||
|     pub ranking_score_threshold: Option<RankingScoreThresholdGet>, |     pub ranking_score_threshold: Option<RankingScoreThresholdGet>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)] | ||||||
|  |     pub locales: Option<CS<Locale>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] | #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] | ||||||
| @@ -175,6 +178,7 @@ impl From<SearchQueryGet> for SearchQuery { | |||||||
|             attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), |             attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), | ||||||
|             hybrid, |             hybrid, | ||||||
|             ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), |             ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), | ||||||
|  |             locales: other.locales.map(|o| o.into_iter().collect()), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -380,9 +380,6 @@ pub fn perform_federated_search( | |||||||
|  |  | ||||||
|         let criteria = index.criteria(&rtxn)?; |         let criteria = index.criteria(&rtxn)?; | ||||||
|  |  | ||||||
|         // stuff we need for the hitmaker |  | ||||||
|         let script_lang_map = index.script_language(&rtxn)?; |  | ||||||
|  |  | ||||||
|         let dictionary = index.dictionary(&rtxn)?; |         let dictionary = index.dictionary(&rtxn)?; | ||||||
|         let dictionary: Option<Vec<_>> = |         let dictionary: Option<Vec<_>> = | ||||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); |             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||||
| @@ -494,6 +491,7 @@ pub fn perform_federated_search( | |||||||
|                     sort: query.sort, |                     sort: query.sort, | ||||||
|                     show_ranking_score: query.show_ranking_score, |                     show_ranking_score: query.show_ranking_score, | ||||||
|                     show_ranking_score_details: query.show_ranking_score_details, |                     show_ranking_score_details: query.show_ranking_score_details, | ||||||
|  |                     locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()), | ||||||
|                 }; |                 }; | ||||||
|  |  | ||||||
|                 let milli::SearchResult { |                 let milli::SearchResult { | ||||||
| @@ -509,11 +507,7 @@ pub fn perform_federated_search( | |||||||
|                 degraded |= query_degraded; |                 degraded |= query_degraded; | ||||||
|                 used_negative_operator |= query_used_negative_operator; |                 used_negative_operator |= query_used_negative_operator; | ||||||
|  |  | ||||||
|                 let tokenizer = HitMaker::tokenizer( |                 let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref()); | ||||||
|                     &script_lang_map, |  | ||||||
|                     dictionary.as_deref(), |  | ||||||
|                     separators.as_deref(), |  | ||||||
|                 ); |  | ||||||
|  |  | ||||||
|                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); |                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| use core::fmt; | use core::fmt; | ||||||
| use std::cmp::min; | use std::cmp::min; | ||||||
| use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | use std::collections::{BTreeMap, BTreeSet, HashSet}; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
| use std::sync::Arc; | use std::sync::Arc; | ||||||
| use std::time::{Duration, Instant}; | use std::time::{Duration, Instant}; | ||||||
| @@ -15,16 +15,17 @@ use meilisearch_types::error::deserr_codes::*; | |||||||
| use meilisearch_types::error::{Code, ResponseError}; | use meilisearch_types::error::{Code, ResponseError}; | ||||||
| use meilisearch_types::heed::RoTxn; | use meilisearch_types::heed::RoTxn; | ||||||
| use meilisearch_types::index_uid::IndexUid; | use meilisearch_types::index_uid::IndexUid; | ||||||
|  | use meilisearch_types::locales::Locale; | ||||||
| use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; | use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
| use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; | use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; | ||||||
| use meilisearch_types::milli::vector::Embedder; | use meilisearch_types::milli::vector::Embedder; | ||||||
| use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; | use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; | ||||||
| use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | ||||||
| use meilisearch_types::{milli, Document}; | use meilisearch_types::{milli, Document}; | ||||||
| use milli::tokenizer::TokenizerBuilder; | use milli::tokenizer::{Language, TokenizerBuilder}; | ||||||
| use milli::{ | use milli::{ | ||||||
|     AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder, |     AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, LocalizedAttributesRule, | ||||||
|     SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, |     MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, | ||||||
| }; | }; | ||||||
| use regex::Regex; | use regex::Regex; | ||||||
| use serde::Serialize; | use serde::Serialize; | ||||||
| @@ -100,6 +101,8 @@ pub struct SearchQuery { | |||||||
|     pub attributes_to_search_on: Option<Vec<String>>, |     pub attributes_to_search_on: Option<Vec<String>>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] | ||||||
|     pub ranking_score_threshold: Option<RankingScoreThreshold>, |     pub ranking_score_threshold: Option<RankingScoreThreshold>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)] | ||||||
|  |     pub locales: Option<Vec<Locale>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, Deserr)] | #[derive(Debug, Clone, Copy, PartialEq, Deserr)] | ||||||
| @@ -169,6 +172,7 @@ impl fmt::Debug for SearchQuery { | |||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
|         let mut debug = f.debug_struct("SearchQuery"); |         let mut debug = f.debug_struct("SearchQuery"); | ||||||
| @@ -250,6 +254,10 @@ impl fmt::Debug for SearchQuery { | |||||||
|             debug.field("ranking_score_threshold", &ranking_score_threshold); |             debug.field("ranking_score_threshold", &ranking_score_threshold); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         if let Some(locales) = locales { | ||||||
|  |             debug.field("locales", &locales); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         debug.finish() |         debug.finish() | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -425,6 +433,8 @@ pub struct SearchQueryWithIndex { | |||||||
|     pub attributes_to_search_on: Option<Vec<String>>, |     pub attributes_to_search_on: Option<Vec<String>>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] | ||||||
|     pub ranking_score_threshold: Option<RankingScoreThreshold>, |     pub ranking_score_threshold: Option<RankingScoreThreshold>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)] | ||||||
|  |     pub locales: Option<Vec<Locale>>, | ||||||
|  |  | ||||||
|     #[deserr(default)] |     #[deserr(default)] | ||||||
|     pub federation_options: Option<FederationOptions>, |     pub federation_options: Option<FederationOptions>, | ||||||
| @@ -477,6 +487,7 @@ impl SearchQueryWithIndex { | |||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|         } = self; |         } = self; | ||||||
|         ( |         ( | ||||||
|             index_uid, |             index_uid, | ||||||
| @@ -506,6 +517,7 @@ impl SearchQueryWithIndex { | |||||||
|                 attributes_to_search_on, |                 attributes_to_search_on, | ||||||
|                 hybrid, |                 hybrid, | ||||||
|                 ranking_score_threshold, |                 ranking_score_threshold, | ||||||
|  |                 locales, | ||||||
|                 // do not use ..Default::default() here, |                 // do not use ..Default::default() here, | ||||||
|                 // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` |                 // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` | ||||||
|             }, |             }, | ||||||
| @@ -866,6 +878,10 @@ fn prepare_search<'t>( | |||||||
|         search.sort_criteria(sort); |         search.sort_criteria(sort); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if let Some(ref locales) = query.locales { | ||||||
|  |         search.locales(locales.iter().copied().map(Into::into).collect()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     Ok((search, is_finite_pagination, max_total_hits, offset)) |     Ok((search, is_finite_pagination, max_total_hits, offset)) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -917,6 +933,7 @@ pub fn perform_search( | |||||||
|         highlight_pre_tag, |         highlight_pre_tag, | ||||||
|         highlight_post_tag, |         highlight_post_tag, | ||||||
|         crop_marker, |         crop_marker, | ||||||
|  |         locales, | ||||||
|         // already used in prepare_search |         // already used in prepare_search | ||||||
|         vector: _, |         vector: _, | ||||||
|         hybrid: _, |         hybrid: _, | ||||||
| @@ -941,6 +958,7 @@ pub fn perform_search( | |||||||
|         sort, |         sort, | ||||||
|         show_ranking_score, |         show_ranking_score, | ||||||
|         show_ranking_score_details, |         show_ranking_score_details, | ||||||
|  |         locales: locales.map(|l| l.iter().copied().map(Into::into).collect()), | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let documents = make_hits( |     let documents = make_hits( | ||||||
| @@ -1046,6 +1064,7 @@ struct AttributesFormat { | |||||||
|     sort: Option<Vec<String>>, |     sort: Option<Vec<String>>, | ||||||
|     show_ranking_score: bool, |     show_ranking_score: bool, | ||||||
|     show_ranking_score_details: bool, |     show_ranking_score_details: bool, | ||||||
|  |     locales: Option<Vec<Language>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] | #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||||||
| @@ -1093,19 +1112,16 @@ struct HitMaker<'a> { | |||||||
|     show_ranking_score_details: bool, |     show_ranking_score_details: bool, | ||||||
|     sort: Option<Vec<String>>, |     sort: Option<Vec<String>>, | ||||||
|     show_matches_position: bool, |     show_matches_position: bool, | ||||||
|  |     locales: Option<Vec<Language>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> HitMaker<'a> { | impl<'a> HitMaker<'a> { | ||||||
|     pub fn tokenizer<'b>( |     pub fn tokenizer<'b>( | ||||||
|         script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>, |  | ||||||
|         dictionary: Option<&'b [&'b str]>, |         dictionary: Option<&'b [&'b str]>, | ||||||
|         separators: Option<&'b [&'b str]>, |         separators: Option<&'b [&'b str]>, | ||||||
|     ) -> milli::tokenizer::Tokenizer<'b> { |     ) -> milli::tokenizer::Tokenizer<'b> { | ||||||
|         let mut tokenizer_builder = TokenizerBuilder::default(); |         let mut tokenizer_builder = TokenizerBuilder::default(); | ||||||
|         tokenizer_builder.create_char_map(true); |         tokenizer_builder.create_char_map(true); | ||||||
|         if !script_lang_map.is_empty() { |  | ||||||
|             tokenizer_builder.allow_list(script_lang_map); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if let Some(separators) = separators { |         if let Some(separators) = separators { | ||||||
|             tokenizer_builder.separators(separators); |             tokenizer_builder.separators(separators); | ||||||
| @@ -1218,6 +1234,7 @@ impl<'a> HitMaker<'a> { | |||||||
|             show_ranking_score_details: format.show_ranking_score_details, |             show_ranking_score_details: format.show_ranking_score_details, | ||||||
|             show_matches_position: format.show_matches_position, |             show_matches_position: format.show_matches_position, | ||||||
|             sort: format.sort, |             sort: format.sort, | ||||||
|  |             locales: format.locales, | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1280,6 +1297,7 @@ impl<'a> HitMaker<'a> { | |||||||
|             &self.formatted_options, |             &self.formatted_options, | ||||||
|             self.show_matches_position, |             self.show_matches_position, | ||||||
|             &self.displayed_ids, |             &self.displayed_ids, | ||||||
|  |             self.locales.as_deref(), | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|         if let Some(sort) = self.sort.as_ref() { |         if let Some(sort) = self.sort.as_ref() { | ||||||
| @@ -1312,8 +1330,6 @@ fn make_hits<'a>( | |||||||
| ) -> Result<Vec<SearchHit>, MeilisearchHttpError> { | ) -> Result<Vec<SearchHit>, MeilisearchHttpError> { | ||||||
|     let mut documents = Vec::new(); |     let mut documents = Vec::new(); | ||||||
|  |  | ||||||
|     let script_lang_map = index.script_language(rtxn)?; |  | ||||||
|  |  | ||||||
|     let dictionary = index.dictionary(rtxn)?; |     let dictionary = index.dictionary(rtxn)?; | ||||||
|     let dictionary: Option<Vec<_>> = |     let dictionary: Option<Vec<_>> = | ||||||
|         dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); |         dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||||
| @@ -1321,8 +1337,7 @@ fn make_hits<'a>( | |||||||
|     let separators: Option<Vec<_>> = |     let separators: Option<Vec<_>> = | ||||||
|         separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); |         separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||||
|  |  | ||||||
|     let tokenizer = |     let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref()); | ||||||
|         HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref()); |  | ||||||
|  |  | ||||||
|     let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); |     let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); | ||||||
|  |  | ||||||
| @@ -1341,6 +1356,7 @@ pub fn perform_facet_search( | |||||||
|     facet_name: String, |     facet_name: String, | ||||||
|     search_kind: SearchKind, |     search_kind: SearchKind, | ||||||
|     features: RoFeatures, |     features: RoFeatures, | ||||||
|  |     locales: Option<Vec<Language>>, | ||||||
| ) -> Result<FacetSearchResult, ResponseError> { | ) -> Result<FacetSearchResult, ResponseError> { | ||||||
|     let before_search = Instant::now(); |     let before_search = Instant::now(); | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
| @@ -1363,6 +1379,10 @@ pub fn perform_facet_search( | |||||||
|         facet_search.max_values(max_facets as usize); |         facet_search.max_values(max_facets as usize); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if let Some(locales) = locales { | ||||||
|  |         facet_search.locales(locales); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     Ok(FacetSearchResult { |     Ok(FacetSearchResult { | ||||||
|         facet_hits: facet_search.execute()?, |         facet_hits: facet_search.execute()?, | ||||||
|         facet_query, |         facet_query, | ||||||
| @@ -1443,6 +1463,7 @@ pub fn perform_similar( | |||||||
|         sort: None, |         sort: None, | ||||||
|         show_ranking_score, |         show_ranking_score, | ||||||
|         show_ranking_score_details, |         show_ranking_score_details, | ||||||
|  |         locales: None, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let hits = make_hits( |     let hits = make_hits( | ||||||
| @@ -1631,6 +1652,7 @@ fn format_fields( | |||||||
|     formatted_options: &BTreeMap<FieldId, FormatOptions>, |     formatted_options: &BTreeMap<FieldId, FormatOptions>, | ||||||
|     compute_matches: bool, |     compute_matches: bool, | ||||||
|     displayable_ids: &BTreeSet<FieldId>, |     displayable_ids: &BTreeSet<FieldId>, | ||||||
|  |     locales: Option<&[Language]>, | ||||||
| ) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> { | ) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> { | ||||||
|     let mut matches_position = compute_matches.then(BTreeMap::new); |     let mut matches_position = compute_matches.then(BTreeMap::new); | ||||||
|     let mut document = document.clone(); |     let mut document = document.clone(); | ||||||
| @@ -1664,6 +1686,14 @@ fn format_fields( | |||||||
|         let mut infos = Vec::new(); |         let mut infos = Vec::new(); | ||||||
|  |  | ||||||
|         *value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches); |         *value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches); | ||||||
|  |         *value = format_value( | ||||||
|  |             std::mem::take(value), | ||||||
|  |             builder, | ||||||
|  |             format, | ||||||
|  |             &mut infos, | ||||||
|  |             compute_matches, | ||||||
|  |             locales, | ||||||
|  |         ); | ||||||
|  |  | ||||||
|         if let Some(matches) = matches_position.as_mut() { |         if let Some(matches) = matches_position.as_mut() { | ||||||
|             if !infos.is_empty() { |             if !infos.is_empty() { | ||||||
| @@ -1688,10 +1718,11 @@ fn format_value( | |||||||
|     format_options: Option<FormatOptions>, |     format_options: Option<FormatOptions>, | ||||||
|     infos: &mut Vec<MatchBounds>, |     infos: &mut Vec<MatchBounds>, | ||||||
|     compute_matches: bool, |     compute_matches: bool, | ||||||
|  |     locales: Option<&[Language]>, | ||||||
| ) -> Value { | ) -> Value { | ||||||
|     match value { |     match value { | ||||||
|         Value::String(old_string) => { |         Value::String(old_string) => { | ||||||
|             let mut matcher = builder.build(&old_string); |             let mut matcher = builder.build(&old_string, locales); | ||||||
|             if compute_matches { |             if compute_matches { | ||||||
|                 let matches = matcher.matches(); |                 let matches = matcher.matches(); | ||||||
|                 infos.extend_from_slice(&matches[..]); |                 infos.extend_from_slice(&matches[..]); | ||||||
| @@ -1718,6 +1749,7 @@ fn format_value( | |||||||
|                         }), |                         }), | ||||||
|                         infos, |                         infos, | ||||||
|                         compute_matches, |                         compute_matches, | ||||||
|  |                         locales, | ||||||
|                     ) |                     ) | ||||||
|                 }) |                 }) | ||||||
|                 .collect(), |                 .collect(), | ||||||
| @@ -1737,6 +1769,7 @@ fn format_value( | |||||||
|                             }), |                             }), | ||||||
|                             infos, |                             infos, | ||||||
|                             compute_matches, |                             compute_matches, | ||||||
|  |                             locales, | ||||||
|                         ), |                         ), | ||||||
|                     ) |                     ) | ||||||
|                 }) |                 }) | ||||||
| @@ -1745,7 +1778,7 @@ fn format_value( | |||||||
|         Value::Number(number) => { |         Value::Number(number) => { | ||||||
|             let s = number.to_string(); |             let s = number.to_string(); | ||||||
|  |  | ||||||
|             let mut matcher = builder.build(&s); |             let mut matcher = builder.build(&s, locales); | ||||||
|             if compute_matches { |             if compute_matches { | ||||||
|                 let matches = matcher.matches(); |                 let matches = matcher.matches(); | ||||||
|                 infos.extend_from_slice(&matches[..]); |                 infos.extend_from_slice(&matches[..]); | ||||||
|   | |||||||
| @@ -68,6 +68,7 @@ fn main() -> Result<(), Box<dyn Error>> { | |||||||
|                 logger, |                 logger, | ||||||
|                 TimeBudget::max(), |                 TimeBudget::max(), | ||||||
|                 None, |                 None, | ||||||
|  |                 None, | ||||||
|             )?; |             )?; | ||||||
|             if let Some((logger, dir)) = detailed_logger { |             if let Some((logger, dir)) = detailed_logger { | ||||||
|                 logger.finish(&mut ctx, Path::new(dir))?; |                 logger.finish(&mut ctx, Path::new(dir))?; | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ use std::collections::BinaryHeap; | |||||||
| use std::ops::ControlFlow; | use std::ops::ControlFlow; | ||||||
|  |  | ||||||
| use charabia::normalizer::NormalizerOption; | use charabia::normalizer::NormalizerOption; | ||||||
| use charabia::Normalize; | use charabia::{Language, Normalize, StrDetection, Token}; | ||||||
| use fst::automaton::{Automaton, Str}; | use fst::automaton::{Automaton, Str}; | ||||||
| use fst::{IntoStreamer, Streamer}; | use fst::{IntoStreamer, Streamer}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| @@ -23,6 +23,7 @@ pub struct SearchForFacetValues<'a> { | |||||||
|     search_query: Search<'a>, |     search_query: Search<'a>, | ||||||
|     max_values: usize, |     max_values: usize, | ||||||
|     is_hybrid: bool, |     is_hybrid: bool, | ||||||
|  |     locales: Option<Vec<Language>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> SearchForFacetValues<'a> { | impl<'a> SearchForFacetValues<'a> { | ||||||
| @@ -37,6 +38,7 @@ impl<'a> SearchForFacetValues<'a> { | |||||||
|             search_query, |             search_query, | ||||||
|             max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, |             max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, | ||||||
|             is_hybrid, |             is_hybrid, | ||||||
|  |             locales: None, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -50,6 +52,11 @@ impl<'a> SearchForFacetValues<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn locales(&mut self, locales: Vec<Language>) -> &mut Self { | ||||||
|  |         self.locales = Some(locales); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn one_original_value_of( |     fn one_original_value_of( | ||||||
|         &self, |         &self, | ||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
| @@ -109,8 +116,7 @@ impl<'a> SearchForFacetValues<'a> { | |||||||
|  |  | ||||||
|         match self.query.as_ref() { |         match self.query.as_ref() { | ||||||
|             Some(query) => { |             Some(query) => { | ||||||
|                 let options = NormalizerOption { lossy: true, ..Default::default() }; |                 let query = normalize_facet_string(query, self.locales.as_deref()); | ||||||
|                 let query = query.normalize(&options); |  | ||||||
|                 let query = query.as_ref(); |                 let query = query.as_ref(); | ||||||
|  |  | ||||||
|                 let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; |                 let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; | ||||||
| @@ -330,3 +336,15 @@ impl ValuesCollection { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String { | ||||||
|  |     let options = NormalizerOption { lossy: true, ..Default::default() }; | ||||||
|  |     let mut detection = StrDetection::new(facet_string, locales); | ||||||
|  |     let token = Token { | ||||||
|  |         lemma: std::borrow::Cow::Borrowed(facet_string), | ||||||
|  |         script: detection.script(), | ||||||
|  |         language: detection.language(), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     token.normalize(&options).lemma.to_string() | ||||||
|  | } | ||||||
|   | |||||||
| @@ -174,6 +174,7 @@ impl<'a> Search<'a> { | |||||||
|             semantic: self.semantic.clone(), |             semantic: self.semantic.clone(), | ||||||
|             time_budget: self.time_budget.clone(), |             time_budget: self.time_budget.clone(), | ||||||
|             ranking_score_threshold: self.ranking_score_threshold, |             ranking_score_threshold: self.ranking_score_threshold, | ||||||
|  |             locales: self.locales.clone(), | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let semantic = search.semantic.take(); |         let semantic = search.semantic.take(); | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| use std::fmt; | use std::fmt; | ||||||
| use std::sync::Arc; | use std::sync::Arc; | ||||||
|  |  | ||||||
|  | use charabia::Language; | ||||||
| use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; | use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; | ||||||
| use once_cell::sync::Lazy; | use once_cell::sync::Lazy; | ||||||
| use roaring::bitmap::RoaringBitmap; | use roaring::bitmap::RoaringBitmap; | ||||||
| @@ -52,6 +53,7 @@ pub struct Search<'a> { | |||||||
|     semantic: Option<SemanticSearch>, |     semantic: Option<SemanticSearch>, | ||||||
|     time_budget: TimeBudget, |     time_budget: TimeBudget, | ||||||
|     ranking_score_threshold: Option<f64>, |     ranking_score_threshold: Option<f64>, | ||||||
|  |     locales: Option<Vec<Language>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> Search<'a> { | impl<'a> Search<'a> { | ||||||
| @@ -72,6 +74,7 @@ impl<'a> Search<'a> { | |||||||
|             rtxn, |             rtxn, | ||||||
|             index, |             index, | ||||||
|             semantic: None, |             semantic: None, | ||||||
|  |             locales: None, | ||||||
|             time_budget: TimeBudget::max(), |             time_budget: TimeBudget::max(), | ||||||
|             ranking_score_threshold: None, |             ranking_score_threshold: None, | ||||||
|         } |         } | ||||||
| @@ -160,6 +163,11 @@ impl<'a> Search<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn locales(&mut self, locales: Vec<Language>) -> &mut Search<'a> { | ||||||
|  |         self.locales = Some(locales); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { |     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { | ||||||
|         if has_vector_search { |         if has_vector_search { | ||||||
|             let ctx = SearchContext::new(self.index, self.rtxn)?; |             let ctx = SearchContext::new(self.index, self.rtxn)?; | ||||||
| @@ -232,6 +240,7 @@ impl<'a> Search<'a> { | |||||||
|                 &mut DefaultSearchLogger, |                 &mut DefaultSearchLogger, | ||||||
|                 self.time_budget.clone(), |                 self.time_budget.clone(), | ||||||
|                 self.ranking_score_threshold, |                 self.ranking_score_threshold, | ||||||
|  |                 self.locales.as_ref(), | ||||||
|             )?, |             )?, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -272,6 +281,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             semantic, |             semantic, | ||||||
|             time_budget, |             time_budget, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|         } = self; |         } = self; | ||||||
|         f.debug_struct("Search") |         f.debug_struct("Search") | ||||||
|             .field("query", query) |             .field("query", query) | ||||||
| @@ -292,6 +302,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             ) |             ) | ||||||
|             .field("time_budget", time_budget) |             .field("time_budget", time_budget) | ||||||
|             .field("ranking_score_threshold", ranking_score_threshold) |             .field("ranking_score_threshold", ranking_score_threshold) | ||||||
|  |             .field("locales", locales) | ||||||
|             .finish() |             .finish() | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
|  |  | ||||||
| use charabia::{SeparatorKind, Token, Tokenizer}; | use charabia::{Language, SeparatorKind, Token, Tokenizer}; | ||||||
| pub use matching_words::MatchingWords; | pub use matching_words::MatchingWords; | ||||||
| use matching_words::{MatchType, PartialMatch, WordId}; | use matching_words::{MatchType, PartialMatch, WordId}; | ||||||
| use serde::Serialize; | use serde::Serialize; | ||||||
| @@ -46,7 +46,11 @@ impl<'m> MatcherBuilder<'m> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> { |     pub fn build<'t, 'lang>( | ||||||
|  |         &self, | ||||||
|  |         text: &'t str, | ||||||
|  |         locales: Option<&'lang [Language]>, | ||||||
|  |     ) -> Matcher<'t, 'm, '_, 'lang> { | ||||||
|         let crop_marker = match &self.crop_marker { |         let crop_marker = match &self.crop_marker { | ||||||
|             Some(marker) => marker.as_str(), |             Some(marker) => marker.as_str(), | ||||||
|             None => DEFAULT_CROP_MARKER, |             None => DEFAULT_CROP_MARKER, | ||||||
| @@ -68,6 +72,7 @@ impl<'m> MatcherBuilder<'m> { | |||||||
|             highlight_prefix, |             highlight_prefix, | ||||||
|             highlight_suffix, |             highlight_suffix, | ||||||
|             matches: None, |             matches: None, | ||||||
|  |             locales, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -107,17 +112,18 @@ pub struct MatchBounds { | |||||||
|  |  | ||||||
| /// Structure used to analyze a string, compute words that match, | /// Structure used to analyze a string, compute words that match, | ||||||
| /// and format the source string, returning a highlighted and cropped sub-string. | /// and format the source string, returning a highlighted and cropped sub-string. | ||||||
| pub struct Matcher<'t, 'tokenizer, 'b> { | pub struct Matcher<'t, 'tokenizer, 'b, 'lang> { | ||||||
|     text: &'t str, |     text: &'t str, | ||||||
|     matching_words: &'b MatchingWords, |     matching_words: &'b MatchingWords, | ||||||
|     tokenizer: &'b Tokenizer<'tokenizer>, |     tokenizer: &'b Tokenizer<'tokenizer>, | ||||||
|  |     locales: Option<&'lang [Language]>, | ||||||
|     crop_marker: &'b str, |     crop_marker: &'b str, | ||||||
|     highlight_prefix: &'b str, |     highlight_prefix: &'b str, | ||||||
|     highlight_suffix: &'b str, |     highlight_suffix: &'b str, | ||||||
|     matches: Option<(Vec<Token<'t>>, Vec<Match>)>, |     matches: Option<(Vec<Token<'t>>, Vec<Match>)>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> { | impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { | ||||||
|     /// Iterates over tokens and save any of them that matches the query. |     /// Iterates over tokens and save any of them that matches the query. | ||||||
|     fn compute_matches(&mut self) -> &mut Self { |     fn compute_matches(&mut self) -> &mut Self { | ||||||
|         /// some words are counted as matches only if they are close together and in the good order, |         /// some words are counted as matches only if they are close together and in the good order, | ||||||
| @@ -173,7 +179,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> { | |||||||
|             false |             false | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let tokens: Vec<_> = self.tokenizer.tokenize(self.text).collect(); |         let tokens: Vec<_> = | ||||||
|  |             self.tokenizer.tokenize_with_allow_list(self.text, self.locales).collect(); | ||||||
|         let mut matches = Vec::new(); |         let mut matches = Vec::new(); | ||||||
|  |  | ||||||
|         let mut words_positions = tokens |         let mut words_positions = tokens | ||||||
| @@ -530,6 +537,7 @@ mod tests { | |||||||
|                 &mut crate::DefaultSearchLogger, |                 &mut crate::DefaultSearchLogger, | ||||||
|                 TimeBudget::max(), |                 TimeBudget::max(), | ||||||
|                 None, |                 None, | ||||||
|  |                 None, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|  |  | ||||||
| @@ -553,19 +561,19 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text without any match. |         // Text without any match. | ||||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; |         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop and no highlight should return complete text. |         // no crop and no highlight should return complete text. | ||||||
|         assert_eq!(&matcher.format(format_options), &text); |         assert_eq!(&matcher.format(format_options), &text); | ||||||
|  |  | ||||||
|         // Text containing all matches. |         // Text containing all matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; |         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop and no highlight should return complete text. |         // no crop and no highlight should return complete text. | ||||||
|         assert_eq!(&matcher.format(format_options), &text); |         assert_eq!(&matcher.format(format_options), &text); | ||||||
|  |  | ||||||
|         // Text containing some matches. |         // Text containing some matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; |         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop and no highlight should return complete text. |         // no crop and no highlight should return complete text. | ||||||
|         assert_eq!(&matcher.format(format_options), &text); |         assert_eq!(&matcher.format(format_options), &text); | ||||||
|     } |     } | ||||||
| @@ -580,23 +588,23 @@ mod tests { | |||||||
|  |  | ||||||
|         // empty text. |         // empty text. | ||||||
|         let text = ""; |         let text = ""; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         assert_eq!(&matcher.format(format_options), ""); |         assert_eq!(&matcher.format(format_options), ""); | ||||||
|  |  | ||||||
|         // text containing only separators. |         // text containing only separators. | ||||||
|         let text = ":-)"; |         let text = ":-)"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         assert_eq!(&matcher.format(format_options), ":-)"); |         assert_eq!(&matcher.format(format_options), ":-)"); | ||||||
|  |  | ||||||
|         // Text without any match. |         // Text without any match. | ||||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; |         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop should return complete text, because there is no matches. |         // no crop should return complete text, because there is no matches. | ||||||
|         assert_eq!(&matcher.format(format_options), &text); |         assert_eq!(&matcher.format(format_options), &text); | ||||||
|  |  | ||||||
|         // Text containing all matches. |         // Text containing all matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; |         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop should return complete text with highlighted matches. |         // no crop should return complete text with highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -605,7 +613,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing some matches. |         // Text containing some matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; |         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop should return complete text with highlighted matches. |         // no crop should return complete text with highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -622,7 +630,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing prefix match. |         // Text containing prefix match. | ||||||
|         let text = "Ŵôřlḑôle"; |         let text = "Ŵôřlḑôle"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop should return complete text with highlighted matches. |         // no crop should return complete text with highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -631,7 +639,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing unicode match. |         // Text containing unicode match. | ||||||
|         let text = "Ŵôřlḑ"; |         let text = "Ŵôřlḑ"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop should return complete text with highlighted matches. |         // no crop should return complete text with highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -643,7 +651,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing unicode match. |         // Text containing unicode match. | ||||||
|         let text = "Westfália"; |         let text = "Westfália"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no crop should return complete text with highlighted matches. |         // no crop should return complete text with highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -661,7 +669,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // empty text. |         // empty text. | ||||||
|         let text = ""; |         let text = ""; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
|             @"" |             @"" | ||||||
| @@ -669,7 +677,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // text containing only separators. |         // text containing only separators. | ||||||
|         let text = ":-)"; |         let text = ":-)"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
|             @":-)" |             @":-)" | ||||||
| @@ -677,7 +685,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text without any match. |         // Text without any match. | ||||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; |         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no highlight should return 10 first words with a marker at the end. |         // no highlight should return 10 first words with a marker at the end. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -686,7 +694,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text without any match starting by a separator. |         // Text without any match starting by a separator. | ||||||
|         let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)"; |         let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no highlight should return 10 first words with a marker at the end. |         // no highlight should return 10 first words with a marker at the end. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -695,7 +703,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Test phrase propagation |         // Test phrase propagation | ||||||
|         let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it."; |         let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // should crop the phrase instead of croping around the match. |         // should crop the phrase instead of croping around the match. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -704,7 +712,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing some matches. |         // Text containing some matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; |         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no highlight should return 10 last words with a marker at the start. |         // no highlight should return 10 last words with a marker at the start. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -713,7 +721,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing all matches. |         // Text containing all matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; |         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // no highlight should return 10 last words with a marker at the start. |         // no highlight should return 10 last words with a marker at the start. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -722,7 +730,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing a match unordered and a match ordered. |         // Text containing a match unordered and a match ordered. | ||||||
|         let text = "The world split void void void void void void void void void split the world void void"; |         let text = "The world split void void void void void void void void void split the world void void"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // crop should return 10 last words with a marker at the start. |         // crop should return 10 last words with a marker at the start. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -731,7 +739,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing matches with different density. |         // Text containing matches with different density. | ||||||
|         let text = "split void the void void world void void void void void void void void void void split the world void void"; |         let text = "split void the void void world void void void void void void void void void void split the world void void"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // crop should return 10 last words with a marker at the start. |         // crop should return 10 last words with a marker at the start. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -740,7 +748,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing matches with same word. |         // Text containing matches with same word. | ||||||
|         let text = "split split split split split split void void void void void void void void void void split the world void void"; |         let text = "split split split split split split void void void void void void void void void void split the world void void"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // crop should return 10 last words with a marker at the start. |         // crop should return 10 last words with a marker at the start. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -758,7 +766,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // empty text. |         // empty text. | ||||||
|         let text = ""; |         let text = ""; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
|             @"" |             @"" | ||||||
| @@ -766,7 +774,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // text containing only separators. |         // text containing only separators. | ||||||
|         let text = ":-)"; |         let text = ":-)"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
|             @":-)" |             @":-)" | ||||||
| @@ -774,7 +782,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text without any match. |         // Text without any match. | ||||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; |         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // both should return 10 first words with a marker at the end. |         // both should return 10 first words with a marker at the end. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -783,7 +791,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing some matches. |         // Text containing some matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; |         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // both should return 10 last words with a marker at the start and highlighted matches. |         // both should return 10 last words with a marker at the start and highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -792,7 +800,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing all matches. |         // Text containing all matches. | ||||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; |         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // both should return 10 last words with a marker at the start and highlighted matches. |         // both should return 10 last words with a marker at the start and highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -801,7 +809,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // Text containing a match unordered and a match ordered. |         // Text containing a match unordered and a match ordered. | ||||||
|         let text = "The world split void void void void void void void void void split the world void void"; |         let text = "The world split void void void void void void void void void split the world void void"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // crop should return 10 last words with a marker at the start. |         // crop should return 10 last words with a marker at the start. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -824,7 +832,7 @@ mod tests { | |||||||
|         let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; |         let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; | ||||||
|  |  | ||||||
|         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\""); |         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\""); | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // should return 10 words with a marker at the start as well the end, and the highlighted matches. |         // should return 10 words with a marker at the start as well the end, and the highlighted matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -832,7 +840,7 @@ mod tests { | |||||||
|         ); |         ); | ||||||
|  |  | ||||||
|         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\""); |         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\""); | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // should highlight "those" and the phrase "and those". |         // should highlight "those" and the phrase "and those". | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -851,7 +859,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // set a smaller crop size |         // set a smaller crop size | ||||||
|         let format_options = FormatOptions { highlight: false, crop: Some(2) }; |         let format_options = FormatOptions { highlight: false, crop: Some(2) }; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // because crop size < query size, partially format matches. |         // because crop size < query size, partially format matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -860,7 +868,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // set a smaller crop size |         // set a smaller crop size | ||||||
|         let format_options = FormatOptions { highlight: false, crop: Some(1) }; |         let format_options = FormatOptions { highlight: false, crop: Some(1) }; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // because crop size < query size, partially format matches. |         // because crop size < query size, partially format matches. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -869,7 +877,7 @@ mod tests { | |||||||
|  |  | ||||||
|         // set  crop size to 0 |         // set  crop size to 0 | ||||||
|         let format_options = FormatOptions { highlight: false, crop: Some(0) }; |         let format_options = FormatOptions { highlight: false, crop: Some(0) }; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         // because crop size is 0, crop is ignored. |         // because crop size is 0, crop is ignored. | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
| @@ -889,7 +897,7 @@ mod tests { | |||||||
|         let format_options = FormatOptions { highlight: true, crop: None }; |         let format_options = FormatOptions { highlight: true, crop: None }; | ||||||
|  |  | ||||||
|         let text = "the do or die can't be he do and or isn't he"; |         let text = "the do or die can't be he do and or isn't he"; | ||||||
|         let mut matcher = builder.build(text); |         let mut matcher = builder.build(text, None); | ||||||
|         insta::assert_snapshot!( |         insta::assert_snapshot!( | ||||||
|             matcher.format(format_options), |             matcher.format(format_options), | ||||||
|             @"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_" |             @"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_" | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ mod tests; | |||||||
| use std::collections::HashSet; | use std::collections::HashSet; | ||||||
|  |  | ||||||
| use bucket_sort::{bucket_sort, BucketSortOutput}; | use bucket_sort::{bucket_sort, BucketSortOutput}; | ||||||
| use charabia::TokenizerBuilder; | use charabia::{Language, TokenizerBuilder}; | ||||||
| use db_cache::DatabaseCache; | use db_cache::DatabaseCache; | ||||||
| use exact_attribute::ExactAttribute; | use exact_attribute::ExactAttribute; | ||||||
| use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo}; | use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo}; | ||||||
| @@ -639,6 +639,7 @@ pub fn execute_search( | |||||||
|     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, |     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|     time_budget: TimeBudget, |     time_budget: TimeBudget, | ||||||
|     ranking_score_threshold: Option<f64>, |     ranking_score_threshold: Option<f64>, | ||||||
|  |     locales: Option<&Vec<Language>>, | ||||||
| ) -> Result<PartialSearchResult> { | ) -> Result<PartialSearchResult> { | ||||||
|     check_sort_criteria(ctx, sort_criteria.as_ref())?; |     check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||||
|  |  | ||||||
| @@ -670,9 +671,8 @@ pub fn execute_search( | |||||||
|             tokbuilder.words_dict(dictionary); |             tokbuilder.words_dict(dictionary); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let languages = ctx.index.languages(ctx.txn)?; |         if let Some(locales) = locales { | ||||||
|         if !languages.is_empty() { |             tokbuilder.allow_list(locales); | ||||||
|             tokbuilder.allow_list(&languages); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let tokenizer = tokbuilder.build(); |         let tokenizer = tokbuilder.build(); | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ pub struct ExtractedTokens { | |||||||
| #[tracing::instrument(level = "trace", skip_all, target = "search::query")] | #[tracing::instrument(level = "trace", skip_all, target = "search::query")] | ||||||
| pub fn located_query_terms_from_tokens( | pub fn located_query_terms_from_tokens( | ||||||
|     ctx: &mut SearchContext<'_>, |     ctx: &mut SearchContext<'_>, | ||||||
|     query: NormalizedTokenIter<'_, '_>, |     query: NormalizedTokenIter<'_, '_, '_, '_>, | ||||||
|     words_limit: Option<usize>, |     words_limit: Option<usize>, | ||||||
| ) -> Result<ExtractedTokens> { | ) -> Result<ExtractedTokens> { | ||||||
|     let nbr_typos = number_of_typos_allowed(ctx)?; |     let nbr_typos = number_of_typos_allowed(ctx)?; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user