mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Add more search tests
This commit is contained in:
		| @@ -41,7 +41,7 @@ pub fn apply_distinct_rule( | ||||
| } | ||||
|  | ||||
| /// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id. | ||||
| fn distinct_single_docid( | ||||
| pub fn distinct_single_docid( | ||||
|     index: &Index, | ||||
|     txn: &RoTxn, | ||||
|     field_id: u16, | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| mod bucket_sort; | ||||
| mod db_cache; | ||||
| mod distinct; | ||||
| mod graph_based_ranking_rule; | ||||
| @@ -31,7 +32,8 @@ pub use logger::detailed::DetailedSearchLogger; | ||||
| pub use logger::{DefaultSearchLogger, SearchLogger}; | ||||
| use query_graph::{QueryGraph, QueryNode}; | ||||
| use query_term::{located_query_terms_from_string, Phrase, QueryTerm}; | ||||
| use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait}; | ||||
| use ranking_rules::{PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait}; | ||||
| use bucket_sort::bucket_sort; | ||||
| use resolve_query_graph::PhraseDocIdsCache; | ||||
| use roaring::RoaringBitmap; | ||||
| use words::Words; | ||||
|   | ||||
							
								
								
									
										590
									
								
								milli/src/search/new/tests/distinct.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										590
									
								
								milli/src/search/new/tests/distinct.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,590 @@ | ||||
| /*! | ||||
| This module tests the "distinct attribute" feature, and its | ||||
| interaction with other ranking rules. | ||||
|  | ||||
| 1. no duplicate distinct attributes are ever returned | ||||
| 2. only the best document (according to the search rules) for each distinct value appears in the result | ||||
| 3. if a document does not have a distinct attribute, then the distinct rule does not apply to it | ||||
|  | ||||
| It doesn't test properly: | ||||
| - combination of distinct + exhaustive_nbr_hits (because we know it's incorrect) | ||||
| - distinct attributes with arrays (because we know it's incorrect as well) | ||||
| */ | ||||
|  | ||||
| use std::collections::HashSet; | ||||
|  | ||||
| use big_s::S; | ||||
| use heed::RoTxn; | ||||
| use maplit::hashset; | ||||
|  | ||||
| use crate::{ | ||||
|     index::tests::TempIndex, AscDesc, Criterion, Index, Member, Search, SearchResult, | ||||
|     TermsMatchingStrategy, | ||||
| }; | ||||
|  | ||||
| use super::collect_field_values; | ||||
|  | ||||
| fn create_index() -> TempIndex { | ||||
|     let index = TempIndex::new(); | ||||
|  | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_primary_key("id".to_owned()); | ||||
|             s.set_searchable_fields(vec!["text".to_owned()]); | ||||
|             s.set_sortable_fields(hashset! { S("rank1"), S("letter") }); | ||||
|             s.set_distinct_field("letter".to_owned()); | ||||
|             s.set_criteria(vec![Criterion::Words]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     index | ||||
|         .add_documents(documents!([ | ||||
|             { | ||||
|                 "id": 0, | ||||
|                 "letter": "A", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick brown fox jamps over the lazy dog", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 1, | ||||
|                 "letter": "A", | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quick brown fox jumpes over the lazy dog", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 2, | ||||
|                 "letter": "B", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick brown foxjumps over the lazy dog", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 3, | ||||
|                 "letter": "B", | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quick brown fox jumps over the lazy dog", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 4, | ||||
|                 "letter": "B", | ||||
|                 "rank1": 2, | ||||
|                 "text": "the quick brown fox jumps over the lazy", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 5, | ||||
|                 "letter": "C", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quickbrownfox jumps over the lazy", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 6, | ||||
|                 "letter": "C", | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quick brown fox jumpss over the lazy", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 7, | ||||
|                 "letter": "C", | ||||
|                 "rank1": 2, | ||||
|                 "text": "the quick brown fox jumps over the lazy", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 8, | ||||
|                 "letter": "D", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick brown fox jumps over the lazy", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 9, | ||||
|                 "letter": "E", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick brown fox jumps over the lazy", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 10, | ||||
|                 "letter": "E", | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quackbrown foxjunps over", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 11, | ||||
|                 "letter": "E", | ||||
|                 "rank1": 2, | ||||
|                 "text": "the quicko browno fox junps over", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 12, | ||||
|                 "letter": "E", | ||||
|                 "rank1": 3, | ||||
|                 "text": "the quicko browno fox jumps over", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 13, | ||||
|                 "letter": "E", | ||||
|                 "rank1": 4, | ||||
|                 "text": "the quick brewn fox jumps over", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 14, | ||||
|                 "letter": "E", | ||||
|                 "rank1": 5, | ||||
|                 "text": "the quick brown fox jumps over", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 15, | ||||
|                 "letter": "F", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick brownf fox jumps over", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 16, | ||||
|                 "letter": "F", | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quic brown fox jamps over", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 17, | ||||
|                 "letter": "F", | ||||
|                 "rank1": 2, | ||||
|                 "text": "thequick browns fox jimps", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 18, | ||||
|                 "letter": "G", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the qick brown fox jumps", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 19, | ||||
|                 "letter": "G", | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quick brownfoxjumps", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 20, | ||||
|                 "letter": "H", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick brow fox jumps", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 21, | ||||
|                 "letter": "I", | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick brown fox jpmps", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 22, | ||||
|                 "letter": "I", | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quick brown fox jumps", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 23, | ||||
|                 "letter": "I", | ||||
|                 "rank1": 2, | ||||
|                 "text": "the quick", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 24, | ||||
|                 "rank1": 0, | ||||
|                 "text": "the quick", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 25, | ||||
|                 "rank1": 1, | ||||
|                 "text": "the quick brown", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 26, | ||||
|                 "rank1": 2, | ||||
|                 "text": "the quick brown fox", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 26, | ||||
|                 "rank1": 3, | ||||
|                 "text": "the quick brown fox jumps over the lazy dog", | ||||
|             }, | ||||
|         ])) | ||||
|         .unwrap(); | ||||
|     index | ||||
| } | ||||
|  | ||||
| fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> { | ||||
|     let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids); | ||||
|  | ||||
|     let mut unique = HashSet::new(); | ||||
|     for v in vs.iter() { | ||||
|         if v == "__does_not_exist__" { | ||||
|             continue; | ||||
|         } | ||||
|         assert!(unique.insert(v.clone())); | ||||
|     } | ||||
|  | ||||
|     vs | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_distinct_placeholder_no_ranking_rules() { | ||||
|     let index = create_index(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let s = Search::new(&txn, &index); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); | ||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); | ||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||
|     [ | ||||
|         "\"A\"", | ||||
|         "\"B\"", | ||||
|         "\"C\"", | ||||
|         "\"D\"", | ||||
|         "\"E\"", | ||||
|         "\"F\"", | ||||
|         "\"G\"", | ||||
|         "\"H\"", | ||||
|         "\"I\"", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_distinct_placeholder_sort() { | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Sort]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); | ||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); | ||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||
|     [ | ||||
|         "\"E\"", | ||||
|         "__does_not_exist__", | ||||
|         "\"B\"", | ||||
|         "\"C\"", | ||||
|         "\"F\"", | ||||
|         "\"I\"", | ||||
|         "\"A\"", | ||||
|         "\"G\"", | ||||
|         "__does_not_exist__", | ||||
|         "\"D\"", | ||||
|         "\"H\"", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
|     let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(rank_values, @r###" | ||||
|     [ | ||||
|         "5", | ||||
|         "3", | ||||
|         "2", | ||||
|         "2", | ||||
|         "2", | ||||
|         "2", | ||||
|         "1", | ||||
|         "1", | ||||
|         "1", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]"); | ||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); | ||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||
|     [ | ||||
|         "\"I\"", | ||||
|         "\"H\"", | ||||
|         "\"G\"", | ||||
|         "\"F\"", | ||||
|         "\"E\"", | ||||
|         "\"D\"", | ||||
|         "\"C\"", | ||||
|         "\"B\"", | ||||
|         "\"A\"", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
|     let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(rank_values, @r###" | ||||
|     [ | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "1", | ||||
|         "3", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.sort_criteria(vec![ | ||||
|         AscDesc::Desc(Member::Field(S("letter"))), | ||||
|         AscDesc::Desc(Member::Field(S("rank1"))), | ||||
|     ]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]"); | ||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); | ||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||
|     [ | ||||
|         "\"I\"", | ||||
|         "\"H\"", | ||||
|         "\"G\"", | ||||
|         "\"F\"", | ||||
|         "\"E\"", | ||||
|         "\"D\"", | ||||
|         "\"C\"", | ||||
|         "\"B\"", | ||||
|         "\"A\"", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
|     let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(rank_values, @r###" | ||||
|     [ | ||||
|         "2", | ||||
|         "0", | ||||
|         "1", | ||||
|         "2", | ||||
|         "5", | ||||
|         "0", | ||||
|         "2", | ||||
|         "2", | ||||
|         "1", | ||||
|         "3", | ||||
|         "1", | ||||
|         "0", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_distinct_words() { | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Words]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]"); | ||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); | ||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||
|     [ | ||||
|         "\"A\"", | ||||
|         "\"B\"", | ||||
|         "__does_not_exist__", | ||||
|         "\"C\"", | ||||
|         "\"D\"", | ||||
|         "\"E\"", | ||||
|         "\"F\"", | ||||
|         "\"G\"", | ||||
|         "\"H\"", | ||||
|         "\"I\"", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
|     let text_values = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(text_values, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jamps over the lazy dog\"", | ||||
|         "\"the quick brown foxjumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quickbrownfox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brownf fox jumps over\"", | ||||
|         "\"the qick brown fox jumps\"", | ||||
|         "\"the quick brow fox jumps\"", | ||||
|         "\"the quick brown fox jpmps\"", | ||||
|         "\"the quick brown\"", | ||||
|         "\"the quick\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_distinct_sort_words() { | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Sort, Criterion::Words, Criterion::Desc(S("rank1"))]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]"); | ||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); | ||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||
|     [ | ||||
|         "\"I\"", | ||||
|         "\"H\"", | ||||
|         "\"G\"", | ||||
|         "\"F\"", | ||||
|         "\"E\"", | ||||
|         "\"D\"", | ||||
|         "\"C\"", | ||||
|         "\"B\"", | ||||
|         "\"A\"", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(rank_values, @r###" | ||||
|     [ | ||||
|         "1", | ||||
|         "0", | ||||
|         "1", | ||||
|         "1", | ||||
|         "0", | ||||
|         "0", | ||||
|         "2", | ||||
|         "1", | ||||
|         "1", | ||||
|         "3", | ||||
|         "1", | ||||
|         "0", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let text_values = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(text_values, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the quick brow fox jumps\"", | ||||
|         "\"the quick brownfoxjumps\"", | ||||
|         "\"the quic brown fox jamps over\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumpes over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown\"", | ||||
|         "\"the quick\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_distinct_all_candidates() { | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Sort]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]); | ||||
|     s.exhaustive_number_hits(true); | ||||
|  | ||||
|     let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap(); | ||||
|     let candidates = candidates.iter().collect::<Vec<_>>(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); | ||||
|     // TODO: this is incorrect! | ||||
|     insta::assert_snapshot!(format!("{candidates:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_distinct_typo() { | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Words, Criterion::Typo]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]"); | ||||
|  | ||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); | ||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||
|     [ | ||||
|         "\"B\"", | ||||
|         "__does_not_exist__", | ||||
|         "\"A\"", | ||||
|         "\"C\"", | ||||
|         "\"D\"", | ||||
|         "\"E\"", | ||||
|         "\"F\"", | ||||
|         "\"I\"", | ||||
|         "\"G\"", | ||||
|         "\"H\"", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let text_values = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(text_values, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jamps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brownf fox jumps over\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the qick brown fox jumps\"", | ||||
|         "\"the quick brow fox jumps\"", | ||||
|         "\"the quick brown\"", | ||||
|         "\"the quick\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
							
								
								
									
										22
									
								
								milli/src/search/new/tests/language.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								milli/src/search/new/tests/language.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| use crate::{index::tests::TempIndex, Search, SearchResult}; | ||||
|  | ||||
| #[test] | ||||
| fn test_kanji_language_detection() { | ||||
|     let index = TempIndex::new(); | ||||
|  | ||||
|     index | ||||
|         .add_documents(documents!([ | ||||
|             { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" }, | ||||
|             { "id": 1, "title": "東京のお寿司。" }, | ||||
|             { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" } | ||||
|         ])) | ||||
|         .unwrap(); | ||||
|  | ||||
|     let txn = index.write_txn().unwrap(); | ||||
|     let mut search = Search::new(&txn, &index); | ||||
|  | ||||
|     search.query("東京"); | ||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|  | ||||
|     assert_eq!(documents_ids, vec![1]); | ||||
| } | ||||
| @@ -1,3 +1,28 @@ | ||||
| pub mod distinct; | ||||
| #[cfg(feature = "default")] | ||||
| pub mod language; | ||||
| pub mod ngram_split_words; | ||||
| pub mod proximity; | ||||
| pub mod sort; | ||||
| pub mod typo; | ||||
| pub mod words_tms; | ||||
|  | ||||
| fn collect_field_values( | ||||
|     index: &crate::Index, | ||||
|     txn: &heed::RoTxn, | ||||
|     fid: &str, | ||||
|     docids: &[u32], | ||||
| ) -> Vec<String> { | ||||
|     let mut values = vec![]; | ||||
|     let fid = index.fields_ids_map(txn).unwrap().id(fid).unwrap(); | ||||
|     for doc in index.documents(txn, docids.iter().copied()).unwrap() { | ||||
|         if let Some(v) = doc.1.get(fid) { | ||||
|             let v: serde_json::Value = serde_json::from_slice(v).unwrap(); | ||||
|             let v = v.to_string(); | ||||
|             values.push(v); | ||||
|         } else { | ||||
|             values.push("__does_not_exist__".to_owned()); | ||||
|         } | ||||
|     } | ||||
|     values | ||||
| } | ||||
|   | ||||
							
								
								
									
										0
									
								
								milli/src/search/new/tests/proximity.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								milli/src/search/new/tests/proximity.rs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										316
									
								
								milli/src/search/new/tests/sort.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										316
									
								
								milli/src/search/new/tests/sort.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,316 @@ | ||||
| /*! | ||||
| This module tests the `sort` ranking rule: | ||||
|  | ||||
| 1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time | ||||
| 2. an error is returned if the fields-to-sort are not sortable | ||||
| 3. it is possible to add multiple fields-to-sort at search time | ||||
| 4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected | ||||
| 5. numbers appear before strings | ||||
| 6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket | ||||
| 7. boolean values are translated to strings | ||||
| 8. if a field contains an array, it is sorted by the best value in the array according to the sort rule | ||||
| */ | ||||
|  | ||||
| use big_s::S; | ||||
| use maplit::hashset; | ||||
|  | ||||
| use crate::{ | ||||
|     index::tests::TempIndex, search::new::tests::collect_field_values, AscDesc, Criterion, Member, | ||||
|     Search, SearchResult, TermsMatchingStrategy, | ||||
| }; | ||||
|  | ||||
| fn create_index() -> TempIndex { | ||||
|     let index = TempIndex::new(); | ||||
|  | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_primary_key("id".to_owned()); | ||||
|             s.set_searchable_fields(vec!["text".to_owned()]); | ||||
|             s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") }); | ||||
|             s.set_criteria(vec![Criterion::Sort]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     index | ||||
|         .add_documents(documents!([ | ||||
|             { | ||||
|                 "id": 0, | ||||
|                 "letter": "A", | ||||
|                 "rank": 0, | ||||
|                 "vague": 0, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 1, | ||||
|                 "letter": "A", | ||||
|                 "rank": 1, | ||||
|                 "vague": "0", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 2, | ||||
|                 "letter": "B", | ||||
|                 "rank": 0, | ||||
|                 "vague": 1, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 3, | ||||
|                 "letter": "B", | ||||
|                 "rank": 1, | ||||
|                 "vague": "1", | ||||
|             }, | ||||
|             { | ||||
|                 "id": 4, | ||||
|                 "letter": "B", | ||||
|                 "rank": 2, | ||||
|                 "vague": [1, 2], | ||||
|             }, | ||||
|             { | ||||
|                 "id": 5, | ||||
|                 "letter": "C", | ||||
|                 "rank": 0, | ||||
|                 "vague": [1, "2"], | ||||
|             }, | ||||
|             { | ||||
|                 "id": 6, | ||||
|                 "letter": "C", | ||||
|                 "rank": 1, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 7, | ||||
|                 "letter": "C", | ||||
|                 "rank": 2, | ||||
|                 "vague": null, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 8, | ||||
|                 "letter": "D", | ||||
|                 "rank": 0, | ||||
|                 "vague": [null, null, ""] | ||||
|             }, | ||||
|             { | ||||
|                 "id": 9, | ||||
|                 "letter": "E", | ||||
|                 "rank": 0, | ||||
|                 "vague": "" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 10, | ||||
|                 "letter": "E", | ||||
|                 "rank": 1, | ||||
|                 "vague": { | ||||
|                     "sub": 0, | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "id": 11, | ||||
|                 "letter": "E", | ||||
|                 "rank": 2, | ||||
|                 "vague": true, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 12, | ||||
|                 "letter": "E", | ||||
|                 "rank": 3, | ||||
|                 "vague": false, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 13, | ||||
|                 "letter": "E", | ||||
|                 "rank": 4, | ||||
|                 "vague": 1.5673, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 14, | ||||
|                 "letter": "E", | ||||
|                 "rank": 5, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 15, | ||||
|                 "letter": "F", | ||||
|                 "rank": 0, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 16, | ||||
|                 "letter": "F", | ||||
|                 "rank": 1, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 17, | ||||
|                 "letter": "F", | ||||
|                 "rank": 2, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 18, | ||||
|                 "letter": "G", | ||||
|                 "rank": 0, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 19, | ||||
|                 "letter": "G", | ||||
|                 "rank": 1, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 20, | ||||
|                 "letter": "H", | ||||
|                 "rank": 0, | ||||
|                 "vague": true, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 21, | ||||
|                 "letter": "I", | ||||
|                 "rank": 0, | ||||
|                 "vague": false, | ||||
|             }, | ||||
|             { | ||||
|                 "id": 22, | ||||
|                 "letter": "I", | ||||
|                 "rank": 1, | ||||
|                 "vague": [1.1367, "help", null] | ||||
|             }, | ||||
|             { | ||||
|                 "id": 23, | ||||
|                 "letter": "I", | ||||
|                 "rank": 2, | ||||
|                 "vague": [1.2367, "hello"] | ||||
|             }, | ||||
|         ])) | ||||
|         .unwrap(); | ||||
|     index | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_sort() { | ||||
|     let index = create_index(); | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 22, 23, 20, 18, 19, 15, 16, 17, 9, 10, 11, 12, 13, 14, 8, 5, 6, 7, 2]"); | ||||
|  | ||||
|     let letter_values = collect_field_values(&index, &txn, "letter", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(letter_values, @r###" | ||||
|     [ | ||||
|         "\"I\"", | ||||
|         "\"I\"", | ||||
|         "\"I\"", | ||||
|         "\"H\"", | ||||
|         "\"G\"", | ||||
|         "\"G\"", | ||||
|         "\"F\"", | ||||
|         "\"F\"", | ||||
|         "\"F\"", | ||||
|         "\"E\"", | ||||
|         "\"E\"", | ||||
|         "\"E\"", | ||||
|         "\"E\"", | ||||
|         "\"E\"", | ||||
|         "\"E\"", | ||||
|         "\"D\"", | ||||
|         "\"C\"", | ||||
|         "\"C\"", | ||||
|         "\"C\"", | ||||
|         "\"B\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 13, 12, 4, 7, 11, 17, 23, 1, 3, 6, 10, 16, 19, 22, 0, 2, 5, 8, 9]"); | ||||
|  | ||||
|     let rank_values = collect_field_values(&index, &txn, "rank", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(rank_values, @r###" | ||||
|     [ | ||||
|         "5", | ||||
|         "4", | ||||
|         "3", | ||||
|         "2", | ||||
|         "2", | ||||
|         "2", | ||||
|         "2", | ||||
|         "2", | ||||
|         "1", | ||||
|         "1", | ||||
|         "1", | ||||
|         "1", | ||||
|         "1", | ||||
|         "1", | ||||
|         "1", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|         "0", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]"); | ||||
|  | ||||
|     let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(vague_values, @r###" | ||||
|     [ | ||||
|         "0", | ||||
|         "1", | ||||
|         "[1,2]", | ||||
|         "[1,\"2\"]", | ||||
|         "[1.1367,\"help\",null]", | ||||
|         "[1.2367,\"hello\"]", | ||||
|         "1.5673", | ||||
|         "\"0\"", | ||||
|         "\"1\"", | ||||
|         "false", | ||||
|         "false", | ||||
|         "true", | ||||
|         "true", | ||||
|         "__does_not_exist___", | ||||
|         "null", | ||||
|         "[null,null,\"\"]", | ||||
|         "\"\"", | ||||
|         "{\"sub\":0}", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist___", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]); | ||||
|  | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 13, 23, 22, 2, 5, 0, 11, 20, 12, 21, 3, 1, 6, 7, 8, 9, 10, 14, 15]"); | ||||
|  | ||||
|     let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(vague_values, @r###" | ||||
|     [ | ||||
|         "[1,2]", | ||||
|         "1.5673", | ||||
|         "[1.2367,\"hello\"]", | ||||
|         "[1.1367,\"help\",null]", | ||||
|         "1", | ||||
|         "[1,\"2\"]", | ||||
|         "0", | ||||
|         "true", | ||||
|         "true", | ||||
|         "false", | ||||
|         "false", | ||||
|         "\"1\"", | ||||
|         "\"0\"", | ||||
|         "__does_not_exist___", | ||||
|         "null", | ||||
|         "[null,null,\"\"]", | ||||
|         "\"\"", | ||||
|         "{\"sub\":0}", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist___", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user