mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge pull request #3703 from meilisearch/search-refactor-test-typo-tolerance
Search refactor test typo tolerance + some bugfixes
This commit is contained in:
		| @@ -330,6 +330,8 @@ pub fn execute_search( | |||||||
|         ctx.index.documents_ids(ctx.txn)? |         ctx.index.documents_ids(ctx.txn)? | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|  |     check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||||
|  |  | ||||||
|     let mut located_query_terms = None; |     let mut located_query_terms = None; | ||||||
|     let bucket_sort_output = if let Some(query) = query { |     let bucket_sort_output = if let Some(query) = query { | ||||||
|         // We make sure that the analyzer is aware of the stop words |         // We make sure that the analyzer is aware of the stop words | ||||||
| @@ -352,8 +354,6 @@ pub fn execute_search( | |||||||
|         let graph = QueryGraph::from_query(ctx, &query_terms)?; |         let graph = QueryGraph::from_query(ctx, &query_terms)?; | ||||||
|         located_query_terms = Some(query_terms); |         located_query_terms = Some(query_terms); | ||||||
|  |  | ||||||
|         check_sort_criteria(ctx, sort_criteria.as_ref())?; |  | ||||||
|  |  | ||||||
|         let ranking_rules = |         let ranking_rules = | ||||||
|             get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?; |             get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -20,10 +20,9 @@ if `words` doesn't exist before it. | |||||||
|  |  | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
|  |  | ||||||
| use crate::{ | use crate::index::tests::TempIndex; | ||||||
|     index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, | use crate::search::new::tests::collect_field_values; | ||||||
|     SearchResult, TermsMatchingStrategy, | use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy}; | ||||||
| }; |  | ||||||
|  |  | ||||||
| fn create_index() -> TempIndex { | fn create_index() -> TempIndex { | ||||||
|     let index = TempIndex::new(); |     let index = TempIndex::new(); | ||||||
| @@ -134,6 +133,14 @@ fn create_index() -> TempIndex { | |||||||
|                 "id": 23, |                 "id": 23, | ||||||
|                 "text": "the quivk brown fox jumps over the lazy dog" |                 "text": "the quivk brown fox jumps over the lazy dog" | ||||||
|             }, |             }, | ||||||
|  |             { | ||||||
|  |                 "id": 24, | ||||||
|  |                 "tolerant_text": "the quick brown fox jumps over the lazy dog", | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "id": 25, | ||||||
|  |                 "tolerant_text": "the quivk brown fox jumps over the lazy dog", | ||||||
|  |             }, | ||||||
|         ])) |         ])) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
|     index |     index | ||||||
| @@ -212,79 +219,6 @@ fn test_default_typo() { | |||||||
|         "\"the quickest brownest fox jumps over the laziest dog\"", |         "\"the quickest brownest fox jumps over the laziest dog\"", | ||||||
|     ] |     ] | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
|     // 1 typo on one word, swapped letters |  | ||||||
|     let mut s = Search::new(&txn, &index); |  | ||||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); |  | ||||||
|     s.query("the quikc borwn fox jupms over the lazy dog"); |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |  | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); |  | ||||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); |  | ||||||
|     insta::assert_debug_snapshot!(texts, @r###" |  | ||||||
|     [ |  | ||||||
|         "\"the quick brown fox jumps over the lazy dog\"", |  | ||||||
|     ] |  | ||||||
|     "###); |  | ||||||
|  |  | ||||||
|     // 1 first letter typo on a word <5 bytes, replaced letter |  | ||||||
|     let mut s = Search::new(&txn, &index); |  | ||||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); |  | ||||||
|     s.query("the nuick brown fox jumps over the lazy dog"); |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |  | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); |  | ||||||
|  |  | ||||||
|     // 1 first letter typo on a word <5 bytes, missing letter |  | ||||||
|     let mut s = Search::new(&txn, &index); |  | ||||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); |  | ||||||
|     s.query("the uick brown fox jumps over the lazy dog"); |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |  | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); |  | ||||||
|  |  | ||||||
|     // 1 typo on all words >=5 bytes, replaced letters |  | ||||||
|     let mut s = Search::new(&txn, &index); |  | ||||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); |  | ||||||
|     s.query("the quack brawn fox junps over the lazy dog"); |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |  | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); |  | ||||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); |  | ||||||
|     insta::assert_debug_snapshot!(texts, @r###" |  | ||||||
|     [ |  | ||||||
|         "\"the quick brown fox jumps over the lazy dog\"", |  | ||||||
|     ] |  | ||||||
|     "###); |  | ||||||
|  |  | ||||||
|     // 2 typos on words < 9 bytes |  | ||||||
|     let mut s = Search::new(&txn, &index); |  | ||||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); |  | ||||||
|     s.query("the quckest brawnert fox jumps over the aziest dog"); |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |  | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); |  | ||||||
|  |  | ||||||
|     // 2 typos on words >= 9 bytes: missing letters, missing first letter, replaced letters |  | ||||||
|     let mut s = Search::new(&txn, &index); |  | ||||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); |  | ||||||
|     s.query("the extravant fox kyrocketed over the lamguorout dog"); |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |  | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); |  | ||||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); |  | ||||||
|     insta::assert_debug_snapshot!(texts, @r###" |  | ||||||
|     [ |  | ||||||
|         "\"the extravagant fox skyrocketed over the languorous dog\"", |  | ||||||
|     ] |  | ||||||
|     "###); |  | ||||||
|  |  | ||||||
|     // 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters |  | ||||||
|     let mut s = Search::new(&txn, &index); |  | ||||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); |  | ||||||
|     s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog"); |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |  | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); |  | ||||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); |  | ||||||
|     insta::assert_debug_snapshot!(texts, @r###" |  | ||||||
|     [ |  | ||||||
|         "\"the extravagant fox skyrocketed over the languorous dog\"", |  | ||||||
|     ] |  | ||||||
|     "###); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #[test] | #[test] | ||||||
| @@ -301,6 +235,160 @@ fn test_phrase_no_typo_allowed() { | |||||||
|     insta::assert_debug_snapshot!(texts, @"[]"); |     insta::assert_debug_snapshot!(texts, @"[]"); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn test_typo_exact_word() { | ||||||
|  |     let index = create_index(); | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .update_settings(|s| { | ||||||
|  |             s.set_exact_words( | ||||||
|  |                 ["quick", "quack", "sunflower"].iter().map(ToString::to_string).collect(), | ||||||
|  |             ) | ||||||
|  |         }) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |     let txn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |     let ot = index.min_word_len_one_typo(&txn).unwrap(); | ||||||
|  |     let tt = index.min_word_len_two_typos(&txn).unwrap(); | ||||||
|  |     insta::assert_debug_snapshot!(ot, @"5"); | ||||||
|  |     insta::assert_debug_snapshot!(tt, @"9"); | ||||||
|  |  | ||||||
|  |     // don't match quivk | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("the quick brown fox jumps over the lazy dog"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "\"the quick brown fox jumps over the lazy dog\"", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Don't match quick | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("the quack brown fox jumps over the lazy dog"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); | ||||||
|  |  | ||||||
|  |     // words not in exact_words (quicest, jummps) have normal typo handling | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("the quicest brownest fox jummps over the laziest dog"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]"); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "\"the quickest brownest fox jumps over the laziest dog\"", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // exact words do not disable prefix (sunflowering OK, but no sunflowar or sun flower) | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("network interconnection sunflower"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18]"); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "\"network interconnection sunflower\"", | ||||||
|  |         "\"network interconnection sunflowering\"", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn test_typo_exact_attribute() { | ||||||
|  |     let index = create_index(); | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .update_settings(|s| { | ||||||
|  |             s.set_exact_attributes(["text"].iter().map(ToString::to_string).collect()); | ||||||
|  |             s.set_searchable_fields( | ||||||
|  |                 ["text", "tolerant_text"].iter().map(ToString::to_string).collect(), | ||||||
|  |             ); | ||||||
|  |             s.set_exact_words(["quivk"].iter().map(ToString::to_string).collect()) | ||||||
|  |         }) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |     let txn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |     let ot = index.min_word_len_one_typo(&txn).unwrap(); | ||||||
|  |     let tt = index.min_word_len_two_typos(&txn).unwrap(); | ||||||
|  |     insta::assert_debug_snapshot!(ot, @"5"); | ||||||
|  |     insta::assert_debug_snapshot!(tt, @"9"); | ||||||
|  |  | ||||||
|  |     // Exact match returns both exact attributes and tolerant ones. | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("the quick brown fox jumps over the lazy dog"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 24, 25]"); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "\"the quick brown fox jumps over the lazy dog\"", | ||||||
|  |         "__does_not_exist__", | ||||||
|  |         "__does_not_exist__", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "__does_not_exist__", | ||||||
|  |         "\"the quick brown fox jumps over the lazy dog\"", | ||||||
|  |         "\"the quivk brown fox jumps over the lazy dog\"", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // 1 typo only returns the tolerant attribute | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("the quidk brown fox jumps over the lazy dog"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[24, 25]"); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "\"the quick brown fox jumps over the lazy dog\"", | ||||||
|  |         "\"the quivk brown fox jumps over the lazy dog\"", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // combine with exact words | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("the quivk brown fox jumps over the lazy dog"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 25]"); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "\"the quivk brown fox jumps over the lazy dog\"", | ||||||
|  |         "__does_not_exist__", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |     let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(texts, @r###" | ||||||
|  |     [ | ||||||
|  |         "__does_not_exist__", | ||||||
|  |         "\"the quivk brown fox jumps over the lazy dog\"", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // No result in tolerant attribute | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||||
|  |     s.query("the quicest brownest fox jummps over the laziest dog"); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); | ||||||
|  | } | ||||||
|  |  | ||||||
| #[test] | #[test] | ||||||
| fn test_ngram_typos() { | fn test_ngram_typos() { | ||||||
|     let index = create_index(); |     let index = create_index(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user