mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Don't compute split_words for phrases
This commit is contained in:
		| @@ -462,7 +462,7 @@ fill: \"#B6E2D3\" | ||||
|                 shape: class | ||||
|                 max_nbr_typo: {}", | ||||
|                     term_subset.description(ctx), | ||||
|                     term_subset.max_nbr_typos(ctx) | ||||
|                     term_subset.max_typo_cost(ctx) | ||||
|                 )?; | ||||
|  | ||||
|                 for w in term_subset.all_single_words_except_prefix_db(ctx)? { | ||||
|   | ||||
| @@ -28,14 +28,14 @@ pub enum ZeroOrOneTypo { | ||||
| impl Interned<QueryTerm> { | ||||
|     pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> { | ||||
|         let s = ctx.term_interner.get_mut(self); | ||||
|         if s.max_nbr_typos <= 1 && s.one_typo.is_uninit() { | ||||
|         if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() { | ||||
|             assert!(s.two_typo.is_uninit()); | ||||
|             // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words | ||||
|             self.initialize_one_typo_subterm(ctx)?; | ||||
|             let s = ctx.term_interner.get_mut(self); | ||||
|             assert!(s.one_typo.is_init()); | ||||
|             s.two_typo = Lazy::Init(TwoTypoTerm::default()); | ||||
|         } else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() { | ||||
|         } else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() { | ||||
|             assert!(s.two_typo.is_uninit()); | ||||
|             self.initialize_one_and_two_typo_subterm(ctx)?; | ||||
|             let s = ctx.term_interner.get_mut(self); | ||||
| @@ -185,7 +185,7 @@ pub fn partially_initialized_term_from_word( | ||||
|                 original: ctx.word_interner.insert(word.to_owned()), | ||||
|                 ngram_words: None, | ||||
|                 is_prefix: false, | ||||
|                 max_nbr_typos: 0, | ||||
|                 max_levenshtein_distance: 0, | ||||
|                 zero_typo: <_>::default(), | ||||
|                 one_typo: Lazy::Init(<_>::default()), | ||||
|                 two_typo: Lazy::Init(<_>::default()), | ||||
| @@ -256,7 +256,7 @@ pub fn partially_initialized_term_from_word( | ||||
|     Ok(QueryTerm { | ||||
|         original: word_interned, | ||||
|         ngram_words: None, | ||||
|         max_nbr_typos: max_typo, | ||||
|         max_levenshtein_distance: max_typo, | ||||
|         is_prefix, | ||||
|         zero_typo, | ||||
|         one_typo: Lazy::Uninit, | ||||
| @@ -275,7 +275,16 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern | ||||
| impl Interned<QueryTerm> { | ||||
|     fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { | ||||
|         let self_mut = ctx.term_interner.get_mut(self); | ||||
|         let QueryTerm { original, is_prefix, one_typo, max_nbr_typos, .. } = self_mut; | ||||
|  | ||||
|         let allows_split_words = self_mut.allows_split_words(); | ||||
|         let QueryTerm { | ||||
|             original, | ||||
|             is_prefix, | ||||
|             one_typo, | ||||
|             max_levenshtein_distance: max_nbr_typos, | ||||
|             .. | ||||
|         } = self_mut; | ||||
|  | ||||
|         let original = *original; | ||||
|         let is_prefix = *is_prefix; | ||||
|         // let original_str = ctx.word_interner.get(*original).to_owned(); | ||||
| @@ -300,13 +309,17 @@ impl Interned<QueryTerm> { | ||||
|             })?; | ||||
|         } | ||||
|  | ||||
|         let split_words = if allows_split_words { | ||||
|             let original_str = ctx.word_interner.get(original).to_owned(); | ||||
|         let split_words = find_split_words(ctx, original_str.as_str())?; | ||||
|             find_split_words(ctx, original_str.as_str())? | ||||
|         } else { | ||||
|             None | ||||
|         }; | ||||
|  | ||||
|         let self_mut = ctx.term_interner.get_mut(self); | ||||
|  | ||||
|         // Only add the split words to the derivations if: | ||||
|         // 1. the term is not an ngram; OR | ||||
|         // 1. the term is neither an ngram nor a phrase; OR | ||||
|         // 2. the term is an ngram, but the split words are different from the ngram's component words | ||||
|         let split_words = if let Some((ngram_words, split_words)) = | ||||
|             self_mut.ngram_words.as_ref().zip(split_words.as_ref()) | ||||
| @@ -328,7 +341,13 @@ impl Interned<QueryTerm> { | ||||
|     } | ||||
|     fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { | ||||
|         let self_mut = ctx.term_interner.get_mut(self); | ||||
|         let QueryTerm { original, is_prefix, two_typo, max_nbr_typos, .. } = self_mut; | ||||
|         let QueryTerm { | ||||
|             original, | ||||
|             is_prefix, | ||||
|             two_typo, | ||||
|             max_levenshtein_distance: max_nbr_typos, | ||||
|             .. | ||||
|         } = self_mut; | ||||
|         let original_str = ctx.word_interner.get(*original).to_owned(); | ||||
|         if two_typo.is_init() { | ||||
|             return Ok(()); | ||||
|   | ||||
| @@ -43,7 +43,7 @@ pub struct QueryTermSubset { | ||||
| pub struct QueryTerm { | ||||
|     original: Interned<String>, | ||||
|     ngram_words: Option<Vec<Interned<String>>>, | ||||
|     max_nbr_typos: u8, | ||||
|     max_levenshtein_distance: u8, | ||||
|     is_prefix: bool, | ||||
|     zero_typo: ZeroTypoTerm, | ||||
|     // May not be computed yet | ||||
| @@ -342,10 +342,16 @@ impl QueryTermSubset { | ||||
|         } | ||||
|         None | ||||
|     } | ||||
|     pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 { | ||||
|     pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 { | ||||
|         let t = ctx.term_interner.get(self.original); | ||||
|         match t.max_nbr_typos { | ||||
|             0 => 0, | ||||
|         match t.max_levenshtein_distance { | ||||
|             0 => { | ||||
|                 if t.allows_split_words() { | ||||
|                     1 | ||||
|                 } else { | ||||
|                     0 | ||||
|                 } | ||||
|             } | ||||
|             1 => { | ||||
|                 if self.one_typo_subset.is_empty() { | ||||
|                     0 | ||||
| @@ -438,6 +444,9 @@ impl QueryTerm { | ||||
|  | ||||
|         self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty() | ||||
|     } | ||||
|     fn allows_split_words(&self) -> bool { | ||||
|         self.zero_typo.phrase.is_none() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Interned<QueryTerm> { | ||||
|   | ||||
| @@ -217,7 +217,7 @@ pub fn make_ngram( | ||||
|         original: ngram_str_interned, | ||||
|         ngram_words: Some(words_interned), | ||||
|         is_prefix, | ||||
|         max_nbr_typos, | ||||
|         max_levenshtein_distance: max_nbr_typos, | ||||
|         zero_typo: term.zero_typo, | ||||
|         one_typo: Lazy::Uninit, | ||||
|         two_typo: Lazy::Uninit, | ||||
| @@ -271,7 +271,7 @@ impl PhraseBuilder { | ||||
|                 QueryTerm { | ||||
|                     original: ctx.word_interner.insert(phrase_desc), | ||||
|                     ngram_words: None, | ||||
|                     max_nbr_typos: 0, | ||||
|                     max_levenshtein_distance: 0, | ||||
|                     is_prefix: false, | ||||
|                     zero_typo: ZeroTypoTerm { | ||||
|                         phrase: Some(phrase), | ||||
|   | ||||
| @@ -50,7 +50,7 @@ impl RankingRuleGraphTrait for TypoGraph { | ||||
|         // 3-gram -> equivalent to 2 typos | ||||
|         let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 }; | ||||
|  | ||||
|         for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) { | ||||
|         for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) { | ||||
|             let mut term = term.clone(); | ||||
|             match nbr_typos { | ||||
|                 0 => { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user