mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Merge #372
372: Fix Meilisearch 1714 r=Kerollmops a=ManyTheFish The bug comes from the typo tolerance, to know how many typos are accepted we were counting bytes instead of characters in a word. On Chinese Script characters, we were allowing 2 typos on 3 characters words. We are now counting the number of char instead of counting bytes to assign the typo tolerance. Related to [Meilisearch#1714](https://github.com/meilisearch/MeiliSearch/issues/1714) Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
		| @@ -262,7 +262,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O | ||||
| /// and the provided word length. | ||||
| fn typos(word: String, authorize_typos: bool) -> QueryKind { | ||||
|     if authorize_typos { | ||||
|         match word.len() { | ||||
|         match word.chars().count() { | ||||
|             0..=4 => QueryKind::exact(word), | ||||
|             5..=8 => QueryKind::tolerant(1, word), | ||||
|             _ => QueryKind::tolerant(2, word), | ||||
|   | ||||
| @@ -981,4 +981,41 @@ mod tests { | ||||
|         let count = index.number_of_documents(&rtxn).unwrap(); | ||||
|         assert_eq!(count, 4); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_meilisearch_1714() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(10 * 1024 * 1024); // 10 MB | ||||
|         let index = Index::new(options, &path).unwrap(); | ||||
|  | ||||
|         let content = documents!([ | ||||
|           {"id": "123", "title": "小化妆包" }, | ||||
|           {"id": "456", "title": "Ipad 包" } | ||||
|         ]); | ||||
|  | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|         let builder = IndexDocuments::new(&mut wtxn, &index, 0); | ||||
|         builder.execute(content, |_, _| ()).unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         // Only the first document should match. | ||||
|         let count = index.word_docids.get(&rtxn, "化妆包").unwrap().unwrap().len(); | ||||
|         assert_eq!(count, 1); | ||||
|  | ||||
|         // Only the second document should match. | ||||
|         let count = index.word_docids.get(&rtxn, "包").unwrap().unwrap().len(); | ||||
|         assert_eq!(count, 1); | ||||
|  | ||||
|         let mut search = crate::Search::new(&rtxn, &index); | ||||
|         search.query("化妆包"); | ||||
|         search.authorize_typos(true); | ||||
|         search.optional_words(true); | ||||
|  | ||||
|         // only 1 document should be returned | ||||
|         let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|         assert_eq!(documents_ids.len(), 1); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user