mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	dynamic minimum word len for typos in query tree builder
This commit is contained in:
		| @@ -155,6 +155,8 @@ trait Context { | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|     /// Returns the minimum word len for 1 and 2 typos. | ||||
|     fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; | ||||
| } | ||||
|  | ||||
| /// The query tree builder is the interface to build a query tree. | ||||
| @@ -178,6 +180,12 @@ impl<'a> Context for QueryTreeBuilder<'a> { | ||||
|     fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> { | ||||
|         self.index.word_documents_count(self.rtxn, word) | ||||
|     } | ||||
|  | ||||
|     fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { | ||||
|         let one = self.index.min_word_len_1_typo(&self.rtxn)?; | ||||
|         let two = self.index.min_word_len_2_typo(&self.rtxn)?; | ||||
|         Ok((one, two)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> QueryTreeBuilder<'a> { | ||||
| @@ -256,14 +264,23 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O | ||||
|     Ok(best.map(|(_, left, right)| Operation::Phrase(vec![left.to_string(), right.to_string()]))) | ||||
| } | ||||
|  | ||||
| pub struct TypoConfig { | ||||
|     pub max_typos: u8, | ||||
|     pub word_len_1_typo: u8, | ||||
|     pub word_len_2_typo: u8, | ||||
| } | ||||
|  | ||||
| /// Return the `QueryKind` of a word depending on `authorize_typos` | ||||
| /// and the provided word length. | ||||
| fn typos(word: String, authorize_typos: bool, max_typos: u8) -> QueryKind { | ||||
| fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind { | ||||
|     if authorize_typos { | ||||
|         match word.chars().count() { | ||||
|             0..=4 => QueryKind::exact(word), | ||||
|             5..=8 => QueryKind::tolerant(1.min(max_typos), word), | ||||
|             _ => QueryKind::tolerant(2.min(max_typos), word), | ||||
|         let count = word.chars().count().min(u8::MAX as usize) as u8; | ||||
|         if (0..config.word_len_1_typo).contains(&count) { | ||||
|             QueryKind::exact(word) | ||||
|         } else if (config.word_len_1_typo..config.word_len_2_typo).contains(&count) { | ||||
|             QueryKind::tolerant(1.min(config.max_typos), word) | ||||
|         } else { | ||||
|             QueryKind::tolerant(2.min(config.max_typos), word) | ||||
|         } | ||||
|     } else { | ||||
|         QueryKind::exact(word) | ||||
| @@ -314,9 +331,11 @@ fn create_query_tree( | ||||
|                 if let Some(child) = split_best_frequency(ctx, &word)? { | ||||
|                     children.push(child); | ||||
|                 } | ||||
|                 let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?; | ||||
|                 let config = TypoConfig { max_typos: 2, word_len_1_typo, word_len_2_typo }; | ||||
|                 children.push(Operation::Query(Query { | ||||
|                     prefix, | ||||
|                     kind: typos(word, authorize_typos, 2), | ||||
|                     kind: typos(word, authorize_typos, config), | ||||
|                 })); | ||||
|                 Ok(Operation::or(false, children)) | ||||
|             } | ||||
| @@ -363,9 +382,12 @@ fn create_query_tree( | ||||
|                                 .collect(); | ||||
|                             let mut operations = synonyms(ctx, &words)?.unwrap_or_default(); | ||||
|                             let concat = words.concat(); | ||||
|                             let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?; | ||||
|                             let config = | ||||
|                                 TypoConfig { max_typos: 1, word_len_1_typo, word_len_2_typo }; | ||||
|                             let query = Query { | ||||
|                                 prefix: is_prefix, | ||||
|                                 kind: typos(concat, authorize_typos, 1), | ||||
|                                 kind: typos(concat, authorize_typos, config), | ||||
|                             }; | ||||
|                             operations.push(Operation::Query(query)); | ||||
|                             and_op_children.push(Operation::or(false, operations)); | ||||
| @@ -576,6 +598,10 @@ mod test { | ||||
|             let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); | ||||
|             Ok(self.synonyms.get(&words).cloned()) | ||||
|         } | ||||
|  | ||||
|         fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { | ||||
|             Ok((5, 9)) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     impl Default for TestContext { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user