mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Don't compute split_words for phrases
This commit is contained in:
		| @@ -462,7 +462,7 @@ fill: \"#B6E2D3\" | |||||||
|                 shape: class |                 shape: class | ||||||
|                 max_nbr_typo: {}", |                 max_nbr_typo: {}", | ||||||
|                     term_subset.description(ctx), |                     term_subset.description(ctx), | ||||||
|                     term_subset.max_nbr_typos(ctx) |                     term_subset.max_typo_cost(ctx) | ||||||
|                 )?; |                 )?; | ||||||
|  |  | ||||||
|                 for w in term_subset.all_single_words_except_prefix_db(ctx)? { |                 for w in term_subset.all_single_words_except_prefix_db(ctx)? { | ||||||
|   | |||||||
| @@ -28,14 +28,14 @@ pub enum ZeroOrOneTypo { | |||||||
| impl Interned<QueryTerm> { | impl Interned<QueryTerm> { | ||||||
|     pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> { |     pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> { | ||||||
|         let s = ctx.term_interner.get_mut(self); |         let s = ctx.term_interner.get_mut(self); | ||||||
|         if s.max_nbr_typos <= 1 && s.one_typo.is_uninit() { |         if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() { | ||||||
|             assert!(s.two_typo.is_uninit()); |             assert!(s.two_typo.is_uninit()); | ||||||
|             // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words |             // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words | ||||||
|             self.initialize_one_typo_subterm(ctx)?; |             self.initialize_one_typo_subterm(ctx)?; | ||||||
|             let s = ctx.term_interner.get_mut(self); |             let s = ctx.term_interner.get_mut(self); | ||||||
|             assert!(s.one_typo.is_init()); |             assert!(s.one_typo.is_init()); | ||||||
|             s.two_typo = Lazy::Init(TwoTypoTerm::default()); |             s.two_typo = Lazy::Init(TwoTypoTerm::default()); | ||||||
|         } else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() { |         } else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() { | ||||||
|             assert!(s.two_typo.is_uninit()); |             assert!(s.two_typo.is_uninit()); | ||||||
|             self.initialize_one_and_two_typo_subterm(ctx)?; |             self.initialize_one_and_two_typo_subterm(ctx)?; | ||||||
|             let s = ctx.term_interner.get_mut(self); |             let s = ctx.term_interner.get_mut(self); | ||||||
| @@ -185,7 +185,7 @@ pub fn partially_initialized_term_from_word( | |||||||
|                 original: ctx.word_interner.insert(word.to_owned()), |                 original: ctx.word_interner.insert(word.to_owned()), | ||||||
|                 ngram_words: None, |                 ngram_words: None, | ||||||
|                 is_prefix: false, |                 is_prefix: false, | ||||||
|                 max_nbr_typos: 0, |                 max_levenshtein_distance: 0, | ||||||
|                 zero_typo: <_>::default(), |                 zero_typo: <_>::default(), | ||||||
|                 one_typo: Lazy::Init(<_>::default()), |                 one_typo: Lazy::Init(<_>::default()), | ||||||
|                 two_typo: Lazy::Init(<_>::default()), |                 two_typo: Lazy::Init(<_>::default()), | ||||||
| @@ -256,7 +256,7 @@ pub fn partially_initialized_term_from_word( | |||||||
|     Ok(QueryTerm { |     Ok(QueryTerm { | ||||||
|         original: word_interned, |         original: word_interned, | ||||||
|         ngram_words: None, |         ngram_words: None, | ||||||
|         max_nbr_typos: max_typo, |         max_levenshtein_distance: max_typo, | ||||||
|         is_prefix, |         is_prefix, | ||||||
|         zero_typo, |         zero_typo, | ||||||
|         one_typo: Lazy::Uninit, |         one_typo: Lazy::Uninit, | ||||||
| @@ -275,7 +275,16 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern | |||||||
| impl Interned<QueryTerm> { | impl Interned<QueryTerm> { | ||||||
|     fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { |     fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { | ||||||
|         let self_mut = ctx.term_interner.get_mut(self); |         let self_mut = ctx.term_interner.get_mut(self); | ||||||
|         let QueryTerm { original, is_prefix, one_typo, max_nbr_typos, .. } = self_mut; |  | ||||||
|  |         let allows_split_words = self_mut.allows_split_words(); | ||||||
|  |         let QueryTerm { | ||||||
|  |             original, | ||||||
|  |             is_prefix, | ||||||
|  |             one_typo, | ||||||
|  |             max_levenshtein_distance: max_nbr_typos, | ||||||
|  |             .. | ||||||
|  |         } = self_mut; | ||||||
|  |  | ||||||
|         let original = *original; |         let original = *original; | ||||||
|         let is_prefix = *is_prefix; |         let is_prefix = *is_prefix; | ||||||
|         // let original_str = ctx.word_interner.get(*original).to_owned(); |         // let original_str = ctx.word_interner.get(*original).to_owned(); | ||||||
| @@ -300,13 +309,17 @@ impl Interned<QueryTerm> { | |||||||
|             })?; |             })?; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let original_str = ctx.word_interner.get(original).to_owned(); |         let split_words = if allows_split_words { | ||||||
|         let split_words = find_split_words(ctx, original_str.as_str())?; |             let original_str = ctx.word_interner.get(original).to_owned(); | ||||||
|  |             find_split_words(ctx, original_str.as_str())? | ||||||
|  |         } else { | ||||||
|  |             None | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         let self_mut = ctx.term_interner.get_mut(self); |         let self_mut = ctx.term_interner.get_mut(self); | ||||||
|  |  | ||||||
|         // Only add the split words to the derivations if: |         // Only add the split words to the derivations if: | ||||||
|         // 1. the term is not an ngram; OR |         // 1. the term is neither an ngram nor a phrase; OR | ||||||
|         // 2. the term is an ngram, but the split words are different from the ngram's component words |         // 2. the term is an ngram, but the split words are different from the ngram's component words | ||||||
|         let split_words = if let Some((ngram_words, split_words)) = |         let split_words = if let Some((ngram_words, split_words)) = | ||||||
|             self_mut.ngram_words.as_ref().zip(split_words.as_ref()) |             self_mut.ngram_words.as_ref().zip(split_words.as_ref()) | ||||||
| @@ -328,7 +341,13 @@ impl Interned<QueryTerm> { | |||||||
|     } |     } | ||||||
|     fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { |     fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { | ||||||
|         let self_mut = ctx.term_interner.get_mut(self); |         let self_mut = ctx.term_interner.get_mut(self); | ||||||
|         let QueryTerm { original, is_prefix, two_typo, max_nbr_typos, .. } = self_mut; |         let QueryTerm { | ||||||
|  |             original, | ||||||
|  |             is_prefix, | ||||||
|  |             two_typo, | ||||||
|  |             max_levenshtein_distance: max_nbr_typos, | ||||||
|  |             .. | ||||||
|  |         } = self_mut; | ||||||
|         let original_str = ctx.word_interner.get(*original).to_owned(); |         let original_str = ctx.word_interner.get(*original).to_owned(); | ||||||
|         if two_typo.is_init() { |         if two_typo.is_init() { | ||||||
|             return Ok(()); |             return Ok(()); | ||||||
|   | |||||||
| @@ -43,7 +43,7 @@ pub struct QueryTermSubset { | |||||||
| pub struct QueryTerm { | pub struct QueryTerm { | ||||||
|     original: Interned<String>, |     original: Interned<String>, | ||||||
|     ngram_words: Option<Vec<Interned<String>>>, |     ngram_words: Option<Vec<Interned<String>>>, | ||||||
|     max_nbr_typos: u8, |     max_levenshtein_distance: u8, | ||||||
|     is_prefix: bool, |     is_prefix: bool, | ||||||
|     zero_typo: ZeroTypoTerm, |     zero_typo: ZeroTypoTerm, | ||||||
|     // May not be computed yet |     // May not be computed yet | ||||||
| @@ -342,10 +342,16 @@ impl QueryTermSubset { | |||||||
|         } |         } | ||||||
|         None |         None | ||||||
|     } |     } | ||||||
|     pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 { |     pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 { | ||||||
|         let t = ctx.term_interner.get(self.original); |         let t = ctx.term_interner.get(self.original); | ||||||
|         match t.max_nbr_typos { |         match t.max_levenshtein_distance { | ||||||
|             0 => 0, |             0 => { | ||||||
|  |                 if t.allows_split_words() { | ||||||
|  |                     1 | ||||||
|  |                 } else { | ||||||
|  |                     0 | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|             1 => { |             1 => { | ||||||
|                 if self.one_typo_subset.is_empty() { |                 if self.one_typo_subset.is_empty() { | ||||||
|                     0 |                     0 | ||||||
| @@ -438,6 +444,9 @@ impl QueryTerm { | |||||||
|  |  | ||||||
|         self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty() |         self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty() | ||||||
|     } |     } | ||||||
|  |     fn allows_split_words(&self) -> bool { | ||||||
|  |         self.zero_typo.phrase.is_none() | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Interned<QueryTerm> { | impl Interned<QueryTerm> { | ||||||
|   | |||||||
| @@ -217,7 +217,7 @@ pub fn make_ngram( | |||||||
|         original: ngram_str_interned, |         original: ngram_str_interned, | ||||||
|         ngram_words: Some(words_interned), |         ngram_words: Some(words_interned), | ||||||
|         is_prefix, |         is_prefix, | ||||||
|         max_nbr_typos, |         max_levenshtein_distance: max_nbr_typos, | ||||||
|         zero_typo: term.zero_typo, |         zero_typo: term.zero_typo, | ||||||
|         one_typo: Lazy::Uninit, |         one_typo: Lazy::Uninit, | ||||||
|         two_typo: Lazy::Uninit, |         two_typo: Lazy::Uninit, | ||||||
| @@ -271,7 +271,7 @@ impl PhraseBuilder { | |||||||
|                 QueryTerm { |                 QueryTerm { | ||||||
|                     original: ctx.word_interner.insert(phrase_desc), |                     original: ctx.word_interner.insert(phrase_desc), | ||||||
|                     ngram_words: None, |                     ngram_words: None, | ||||||
|                     max_nbr_typos: 0, |                     max_levenshtein_distance: 0, | ||||||
|                     is_prefix: false, |                     is_prefix: false, | ||||||
|                     zero_typo: ZeroTypoTerm { |                     zero_typo: ZeroTypoTerm { | ||||||
|                         phrase: Some(phrase), |                         phrase: Some(phrase), | ||||||
|   | |||||||
| @@ -50,7 +50,7 @@ impl RankingRuleGraphTrait for TypoGraph { | |||||||
|         // 3-gram -> equivalent to 2 typos |         // 3-gram -> equivalent to 2 typos | ||||||
|         let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 }; |         let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 }; | ||||||
|  |  | ||||||
|         for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) { |         for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) { | ||||||
|             let mut term = term.clone(); |             let mut term = term.clone(); | ||||||
|             match nbr_typos { |             match nbr_typos { | ||||||
|                 0 => { |                 0 => { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user