mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Add more search tests
This commit is contained in:
		| @@ -18,5 +18,5 @@ fn test_kanji_language_detection() { | ||||
|     search.query("東京"); | ||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|  | ||||
|     assert_eq!(documents_ids, vec![1]); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]"); | ||||
| } | ||||
|   | ||||
| @@ -16,7 +16,10 @@ This module tests the following properties: | ||||
| 13. Ngrams cannot be formed by combining a phrase and a word or two phrases | ||||
| */ | ||||
|  | ||||
| use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy}; | ||||
| use crate::{ | ||||
|     index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, | ||||
|     SearchResult, TermsMatchingStrategy, | ||||
| }; | ||||
|  | ||||
| fn create_index() -> TempIndex { | ||||
|     let index = TempIndex::new(); | ||||
| @@ -46,6 +49,14 @@ fn create_index() -> TempIndex { | ||||
|             { | ||||
|                 "id": 3, | ||||
|                 "text": "the sunflower is tall" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 4, | ||||
|                 "text": "the sunflawer is tall" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 5, | ||||
|                 "text": "sunflowering is not a verb" | ||||
|             } | ||||
|         ])) | ||||
|         .unwrap(); | ||||
| @@ -67,8 +78,18 @@ fn test_2gram_simple() { | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("sun flower"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     // will also match documents with "sun flower" | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]"); | ||||
|     // will also match documents with "sunflower" + prefix tolerance | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flowers are pretty\"", | ||||
|         "\"the sun flower is tall\"", | ||||
|         "\"the sunflowers are pretty\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|         "\"sunflowering is not a verb\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
| #[test] | ||||
| fn test_3gram_simple() { | ||||
| @@ -87,6 +108,13 @@ fn test_3gram_simple() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flowers are pretty\"", | ||||
|         "\"the sunflowers are pretty\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -99,7 +127,18 @@ fn test_2gram_typo() { | ||||
|     s.query("sun flawer"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flowers are pretty\"", | ||||
|         "\"the sun flower is tall\"", | ||||
|         "\"the sunflowers are pretty\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|         "\"the sunflawer is tall\"", | ||||
|         "\"sunflowering is not a verb\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -119,6 +158,13 @@ fn test_no_disable_ngrams() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     // documents containing `sunflower` | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flower is tall\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -137,7 +183,17 @@ fn test_2gram_prefix() { | ||||
|     s.query("sun flow"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     // documents containing words beginning with `sunflow` | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flowers are pretty\"", | ||||
|         "\"the sun flower is tall\"", | ||||
|         "\"the sunflowers are pretty\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|         "\"sunflowering is not a verb\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -157,7 +213,16 @@ fn test_3gram_prefix() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     // documents containing a word beginning with sunfl | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 5]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sunflowers are pretty\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|         "\"the sunflawer is tall\"", | ||||
|         "\"sunflowering is not a verb\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -170,8 +235,17 @@ fn test_split_words() { | ||||
|     s.query("sunflower "); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     // all the documents with either `sunflower` or `sun flower` | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]"); | ||||
|     // all the documents with either `sunflower` or `sun flower` + eventual typo | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flower is tall\"", | ||||
|         "\"the sunflowers are pretty\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|         "\"the sunflawer is tall\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -191,6 +265,12 @@ fn test_disable_split_words() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     // no document containing `sun flower` | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sunflower is tall\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -203,8 +283,18 @@ fn test_2gram_split_words() { | ||||
|     s.query("sunf lower"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     // all the documents with "sunflower", "sun flower", or (sunflower + 1 typo) | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]"); | ||||
|     // all the documents with "sunflower", "sun flower", (sunflower + 1 typo), or (sunflower as prefix) | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4, 5]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flower is tall\"", | ||||
|         "\"the sunflowers are pretty\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|         "\"the sunflawer is tall\"", | ||||
|         "\"sunflowering is not a verb\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -218,7 +308,15 @@ fn test_3gram_no_split_words() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     // no document with `sun flower` | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 5]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sunflowers are pretty\"", | ||||
|         "\"the sunflower is tall\"", | ||||
|         "\"sunflowering is not a verb\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -231,7 +329,13 @@ fn test_3gram_no_typos() { | ||||
|     s.query("sunf la wer"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sunflawer is tall\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -245,6 +349,13 @@ fn test_no_ngram_phrases() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flowers are pretty\"", | ||||
|         "\"the sun flower is tall\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
| @@ -252,4 +363,10 @@ fn test_no_ngram_phrases() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the sun flower is tall\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|   | ||||
| @@ -0,0 +1,317 @@ | ||||
| /*! | ||||
| This module tests the Proximity ranking rule: | ||||
|  | ||||
| 1. A proximity of >7 always has the same cost. | ||||
|  | ||||
| 2. Phrase terms can be in proximity to other terms via their start and end words, | ||||
| but we need to make sure that the phrase exists in the document that meets this | ||||
| proximity condition. This is especially relevant with split words and synonyms. | ||||
|  | ||||
| 3. An ngram has the same proximity cost as its component words being consecutive. | ||||
| e.g. `sunflower` equivalent to `sun flower`. | ||||
|  | ||||
| 4. The prefix databases can be used to find the proximity between two words, but | ||||
| they store fewer proximities than the regular word proximity DB. | ||||
|  | ||||
| */ | ||||
|  | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use crate::{ | ||||
|     index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, | ||||
|     SearchResult, TermsMatchingStrategy, | ||||
| }; | ||||
|  | ||||
| fn create_simple_index() -> TempIndex { | ||||
|     let index = TempIndex::new(); | ||||
|  | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_primary_key("id".to_owned()); | ||||
|             s.set_searchable_fields(vec!["text".to_owned()]); | ||||
|             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     index | ||||
|         .add_documents(documents!([ | ||||
|             { | ||||
|                 "id": 0, | ||||
|                 "text": "the very quick dark brown and smart fox did jump over the terribly lazy and small dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 1, | ||||
|                 "text": "the. quick brown fox jumps over the lazy. dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 2, | ||||
|                 "text": "the quick brown fox jumps over the lazy. dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 3, | ||||
|                 "text": "dog the quick brown fox jumps over the lazy" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 4, | ||||
|                 "text": "the quickbrown fox jumps over the lazy dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 5, | ||||
|                 "text": "brown quick fox jumps over the lazy dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 6, | ||||
|                 "text": "the really quick brown fox jumps over the very lazy dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 7, | ||||
|                 "text": "the really quick brown fox jumps over the lazy dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 8, | ||||
|                 "text": "the quick brown fox jumps over the lazy" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 9, | ||||
|                 "text": "the quack brown fox jumps over the lazy" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 9, | ||||
|                 "text": "the quack brown fox jumps over the lazy dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 10, | ||||
|                 "text": "the quick brown fox jumps over the lazy dog" | ||||
|             } | ||||
|         ])) | ||||
|         .unwrap(); | ||||
|     index | ||||
| } | ||||
|  | ||||
| fn create_edge_cases_index() -> TempIndex { | ||||
|     let index = TempIndex::new(); | ||||
|  | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_primary_key("id".to_owned()); | ||||
|             s.set_searchable_fields(vec!["text".to_owned()]); | ||||
|             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|  | ||||
|     index.add_documents(documents!([ | ||||
|         { | ||||
|             // This document will insert "s" in the prefix database | ||||
|             "id": 0, | ||||
|             "text": " | ||||
|             saa sab sac sae saf sag sah sai saj sak sal sam san sao sap saq sar sasa sat sau sav saw sax say saz | ||||
|             sba sbb sbc sbe sbf sbg sbh sbi sbj sbk sbl sbm sbn sbo sbp sbq sbr sbsb sbt sbu sbv sbw sbx sby sbz | ||||
|             sca scb scc sce scf scg sch sci scj sck scl scm scn sco scp scq scr scsc sct scu scv scw scx scy scz | ||||
|             sda sdb sdc sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sdsd sdt sdu sdv sdw sdx sdy sdz | ||||
|             sea seb sec see sef seg seh sei sej sek sel sem sen seo sep seq ser sese set seu sev sew sex sey sez | ||||
|             sfa sfb sfc sfe sff sfg sfh sfi sfj sfk sfl sfm sfn sfo sfp sfq sfr sfsf sft sfu sfv sfw sfx sfy sfz | ||||
|             sga sgb sgc sge sgf sgg sgh sgi sgj sgk sgl sgm sgn sgo sgp sgq sgr sgsg sgt sgu sgv sgw sgx sgy sgz | ||||
|             ska skb skc ske skf skg skh ski skj skk skl skm skn sko skp skq skr sksk skt sku skv skw skx sky skz | ||||
|             sla slb slc sle slf slg slh sli slj slk sll slm sln slo slp slq slr slsl slt slu slv slw slx sly slz | ||||
|             sma smb smc sme smf smg smh smi smj smk sml smm smn smo smp smq smr smsm smt smu smv smw smx smy smz | ||||
|             sna snb snc sne snf sng snh sni snj snk snl snm snn sno snp snq snr snsn snt snu snv snw snx sny snz | ||||
|             soa sob soc soe sof sog soh soi soj sok sol som son soo sop soq sor soso sot sou sov sow sox soy soz | ||||
|             spa spb spc spe spf spg sph spi spj spk spl spm spn spo spp spq spr spsp spt spu spv spw spx spy spz | ||||
|             sqa sqb sqc sqe sqf sqg sqh sqi sqj sqk sql sqm sqn sqo sqp sqq sqr sqsq sqt squ sqv sqw sqx sqy sqz | ||||
|             sra srb src sre srf srg srh sri srj srk srl srm srn sro srp srq srr srsr srt sru srv srw srx sry srz | ||||
|             ssa ssb ssc sse ssf ssg ssh ssi ssj ssk ssl ssm ssn sso ssp ssq ssr ssss sst ssu ssv ssw ssx ssy ssz | ||||
|             sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz | ||||
|             " | ||||
|         }, | ||||
|         // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`.  | ||||
|         // If the search query is "sunflower", the split word "Sun Flower" will match some documents.  | ||||
|         // If the query is `sunflower wilting`, then we should make sure that | ||||
|         // the proximity condition `flower wilting: prox N` also comes with the condition | ||||
|         // `sun wilting: prox N+1`. TODO: this is not the exact condition we use for now.  | ||||
|         // We only check that the phrase `sun flower` exists and `flower wilting: prox N`, which | ||||
|         // is better than nothing but not the best. | ||||
|         { | ||||
|             "id": 1, | ||||
|             "text": "Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat." | ||||
|         }, | ||||
|         { | ||||
|             "id": 2, | ||||
|             "text": "Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat." | ||||
|         }, | ||||
|         { | ||||
|             "id": 3, | ||||
|             // This document matches the query `sunflower wilting`, but the proximity condition  | ||||
|             // between `sunflower` and `wilting` cannot be through the split-word `Sun Flower` | ||||
|             // which would reduce to only `flower` and `wilting` being in proximity. | ||||
|             "text": "A flower wilting under the sun, unlike a sunflower" | ||||
|         }, | ||||
|         { | ||||
|             // This should be the best document for `sunflower wilting` | ||||
|             "id": 4, | ||||
|             "text": "sun flower wilting under the heat" | ||||
|         }, | ||||
|         { | ||||
|             // This is also the best document for `sunflower wilting` | ||||
|             "id": 5, | ||||
|             "text": "sunflower wilting under the heat" | ||||
|         }, | ||||
|         { | ||||
|             // Prox MAX between `best` and `s` prefix | ||||
|             "id": 6, | ||||
|             "text": "this is the best meal I have ever had in such a beautiful summer day" | ||||
|         }, | ||||
|         { | ||||
|             // Prox 5 between `best` and `s` prefix | ||||
|             "id": 7, | ||||
|             "text": "this is the best cooked meal of the summer" | ||||
|         }, | ||||
|         { | ||||
|             // Prox 4 between `best` and `s` prefix | ||||
|             "id": 8, | ||||
|             "text": "this is the best meal of the summer" | ||||
|         }, | ||||
|         { | ||||
|             // Prox 3 between `best` and `s` prefix | ||||
|             "id": 9, | ||||
|             "text": "this is the best meal of summer" | ||||
|         }, | ||||
|         { | ||||
|             // Prox 1 between `best` and `s` prefix | ||||
|             "id": 10, | ||||
|             "text": "this is the best summer meal" | ||||
|         }, | ||||
|         { | ||||
|             // Reverse Prox 3 between `best` and `s` prefix | ||||
|             "id": 11, | ||||
|             "text": "summer x y best" | ||||
|         }, | ||||
|         { | ||||
|             // Reverse Prox 2 between `best` and `s` prefix | ||||
|             "id": 12, | ||||
|             "text": "summer x best" | ||||
|         }, | ||||
|         { | ||||
|             // Reverse Prox 1 between `best` and `s` prefix | ||||
|             "id": 13, | ||||
|             "text": "summer best" | ||||
|         }, | ||||
|     ])).unwrap(); | ||||
|     index | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_proximity_simple() { | ||||
|     let index = create_simple_index(); | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 9, 10, 7, 6, 5, 2, 3, 0, 1]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quickbrown fox jumps over the lazy dog\"", | ||||
|         "\"the quack brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the really quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the really quick brown fox jumps over the very lazy dog\"", | ||||
|         "\"brown quick fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy. dog\"", | ||||
|         "\"dog the quick brown fox jumps over the lazy\"", | ||||
|         "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"", | ||||
|         "\"the. quick brown fox jumps over the lazy. dog\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_proximity_split_word() { | ||||
|     let index = create_edge_cases_index(); | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("sunflower wilting"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     // TODO: "2" and "4" should be swapped ideally | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", | ||||
|         "\"sun flower wilting under the heat\"", | ||||
|         "\"sunflower wilting under the heat\"", | ||||
|         "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"", | ||||
|         "\"A flower wilting under the sun, unlike a sunflower\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("\"sun flower\" wilting"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     // TODO: "2" and "4" should be swapped ideally | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", | ||||
|         "\"sun flower wilting under the heat\"", | ||||
|         "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"", | ||||
|     ] | ||||
|     "###); | ||||
|     drop(txn); | ||||
|  | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             let mut syns = HashMap::new(); | ||||
|             syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]); | ||||
|             s.set_synonyms(syns); | ||||
|         }) | ||||
|         .unwrap(); | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("xyz wilting"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     // TODO: "2" and "4" should be swapped ideally | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", | ||||
|         "\"sun flower wilting under the heat\"", | ||||
|         "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_proximity_prefix_db() { | ||||
|     let index = create_edge_cases_index(); | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("best s"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|  | ||||
|     // This test illustrates the loss of precision from using the prefix DB | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"this is the best summer meal\"", | ||||
|         "\"summer best\"", | ||||
|         "\"this is the best meal of summer\"", | ||||
|         "\"summer x best\"", | ||||
|         "\"this is the best meal of the summer\"", | ||||
|         "\"this is the best meal I have ever had in such a beautiful summer day\"", | ||||
|         "\"this is the best cooked meal of the summer\"", | ||||
|         "\"summer x y best\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|   | ||||
| @@ -21,8 +21,8 @@ if `words` doesn't exist before it. | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use crate::{ | ||||
|     index::tests::TempIndex, Criterion,  | ||||
|     Search, SearchResult, TermsMatchingStrategy, | ||||
|     index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, | ||||
|     SearchResult, TermsMatchingStrategy, | ||||
| }; | ||||
|  | ||||
| fn create_index() -> TempIndex { | ||||
| @@ -130,6 +130,10 @@ fn create_index() -> TempIndex { | ||||
|                 "id": 22, | ||||
|                 "text": "the quick brown fox jumps over the lackadaisical dog" | ||||
|             }, | ||||
|             { | ||||
|                 "id": 23, | ||||
|                 "text": "the quivk brown fox jumps over the lazy dog" | ||||
|             }, | ||||
|         ])) | ||||
|         .unwrap(); | ||||
|     index | ||||
| @@ -151,6 +155,12 @@ fn test_no_typo() { | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -168,7 +178,14 @@ fn test_default_typo() { | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 23]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quivk brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     // 1 typo on one word, replaced letter | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -176,6 +193,12 @@ fn test_default_typo() { | ||||
|     s.query("the quack brown fox jumps over the lazy dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     // 1 typo on one word, missing letter, extra letter | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -183,6 +206,12 @@ fn test_default_typo() { | ||||
|     s.query("the quicest brownest fox jummps over the laziest dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quickest brownest fox jumps over the laziest dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     // 1 typo on one word, swapped letters | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -190,6 +219,12 @@ fn test_default_typo() { | ||||
|     s.query("the quikc borwn fox jupms over the lazy dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     // 1 first letter typo on a word <5 bytes, replaced letter | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -211,6 +246,12 @@ fn test_default_typo() { | ||||
|     s.query("the quack brawn fox junps over the lazy dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     // 2 typos on words < 9 bytes | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -225,6 +266,12 @@ fn test_default_typo() { | ||||
|     s.query("the extravant fox kyrocketed over the lamguorout dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the extravagant fox skyrocketed over the languorous dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     // 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -232,6 +279,12 @@ fn test_default_typo() { | ||||
|     s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the extravagant fox skyrocketed over the languorous dog\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -244,6 +297,8 @@ fn test_phrase_no_typo_allowed() { | ||||
|     s.query("the \"quick brewn\" fox jumps over the lazy dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @"[]"); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -256,12 +311,20 @@ fn test_ngram_typos() { | ||||
|     s.query("the extra lagant fox skyrocketed over the languorous dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the extravagant fox skyrocketed over the languorous dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("the ex tra lagant fox skyrocketed over the languorous dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @"[]"); | ||||
| } | ||||
| #[test] | ||||
| fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { | ||||
| @@ -278,7 +341,29 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|     let SearchResult { documents_ids: ids_1, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); | ||||
|     insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &ids_1); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quivk brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the\"", | ||||
|         "\"the quick brown fox jumps over\"", | ||||
|         "\"the quick brown fox jumps over the lackadaisical dog\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the quick brown fox\"", | ||||
|         "\"the quick brown foxes jump over the lazy dog\"", | ||||
|         "\"the quick brown fax sends a letter to the dog\"", | ||||
|         "\"the quick brown\"", | ||||
|         "\"the quick\"", | ||||
|         "\"a fox doesn't quack, that crown goes to the duck.\"", | ||||
|         "\"the quickest brownest fox jumps over the laziest dog\"", | ||||
|         "\"the quicker browner fox jumped over the lazier dog\"", | ||||
|         "\"the extravagant fox skyrocketed over the languorous dog\"", | ||||
|         "\"the fast brownish fox jumps over the lackadaisical dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
| @@ -290,7 +375,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|     s.query("the quick brown fox jumps over the lazy dog"); | ||||
|     let SearchResult { documents_ids: ids_2, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); | ||||
|     insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); | ||||
|  | ||||
|     assert_eq!(ids_1, ids_2); | ||||
| } | ||||
| @@ -307,6 +392,17 @@ fn test_typo_bucketing() { | ||||
|     s.query("network interconnection sunflower"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"netwolk interconections sunflawar\"", | ||||
|         "\"network interconnections sunflawer\"", | ||||
|         "\"network interconnection sunflower\"", | ||||
|         "\"network interconnection sun flower\"", | ||||
|         "\"network interconnection sunflowering\"", | ||||
|         "\"network interconnection sunflowar\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     // Then with the typo ranking rule | ||||
|     drop(txn); | ||||
| @@ -322,12 +418,34 @@ fn test_typo_bucketing() { | ||||
|     s.query("network interconnection sunflower"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"network interconnection sunflower\"", | ||||
|         "\"network interconnection sunflowering\"", | ||||
|         "\"network interconnection sun flower\"", | ||||
|         "\"network interconnection sunflowar\"", | ||||
|         "\"network interconnections sunflawer\"", | ||||
|         "\"netwolk interconections sunflawar\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("network interconnection sun flower"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"network interconnection sun flower\"", | ||||
|         "\"network interconnection sun flowering\"", | ||||
|         "\"network interconnection sunflower\"", | ||||
|         "\"network interconnection sunflowering\"", | ||||
|         "\"network interconnection sunflowar\"", | ||||
|         "\"network interconnections sunflawer\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| @@ -350,7 +468,15 @@ fn test_typo_synonyms() { | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("the quick brown fox jumps over the lackadaisical dog"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 22, 23]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lackadaisical dog\"", | ||||
|         "\"the quivk brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
| @@ -359,5 +485,13 @@ fn test_typo_synonyms() { | ||||
|     // TODO: is this correct? interaction of ngrams + synonyms means that the | ||||
|     // multi-word synonyms end up having a typo cost. This is probably not what we want. | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the fast brownish fox jumps over the lackadaisical dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lackadaisical dog\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|   | ||||
| @@ -12,9 +12,12 @@ account by the proximity ranking rule. | ||||
| 7. The search is capable of returning no results if no documents match the query | ||||
| */ | ||||
|  | ||||
| use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy}; | ||||
| use crate::{ | ||||
|     index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, | ||||
|     SearchResult, TermsMatchingStrategy, | ||||
| }; | ||||
|  | ||||
| fn create_quick_brown_fox_trivial_index() -> TempIndex { | ||||
| fn create_index() -> TempIndex { | ||||
|     let index = TempIndex::new(); | ||||
|  | ||||
|     index | ||||
| @@ -126,7 +129,7 @@ fn create_quick_brown_fox_trivial_index() -> TempIndex { | ||||
|  | ||||
| #[test] | ||||
| fn test_words_tms_last_simple() { | ||||
|     let index = create_quick_brown_fox_trivial_index(); | ||||
|     let index = create_index(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -136,6 +139,31 @@ fn test_words_tms_last_simple() { | ||||
|  | ||||
|     // 6 and 7 have the same score because "the" appears twice | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 8, 6, 7, 5, 4, 11, 12, 3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the brown quick fox jumps over the lazy dog\"", | ||||
|         "\"the mighty and quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the great quick brown fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and very scary fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and scary fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy blue dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over\"", | ||||
|         "\"the quick brown fox jumps over the\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the quick brown fox\"", | ||||
|         "\"the quick brown fox talks to the lazy and slow dog\"", | ||||
|         "\"the quick brown fox talks to the lazy dog\"", | ||||
|         "\"the quick brown\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("extravagant the quick brown fox jumps over the lazy dog"); | ||||
| @@ -146,7 +174,7 @@ fn test_words_tms_last_simple() { | ||||
|  | ||||
| #[test] | ||||
| fn test_words_tms_last_phrase() { | ||||
|     let index = create_quick_brown_fox_trivial_index(); | ||||
|     let index = create_index(); | ||||
|  | ||||
|     let txn = index.read_txn().unwrap(); | ||||
|     let mut s = Search::new(&txn, &index); | ||||
| @@ -156,6 +184,21 @@ fn test_words_tms_last_phrase() { | ||||
|  | ||||
|     // "The quick brown fox" is a phrase, not deleted by this term matching strategy | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 6, 7, 5, 4, 11, 12]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over\"", | ||||
|         "\"the quick brown fox jumps over the\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the quick brown fox\"", | ||||
|         "\"the quick brown fox talks to the lazy and slow dog\"", | ||||
|         "\"the quick brown fox talks to the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("\"the quick brown fox\" jumps over the \"lazy\" dog"); | ||||
| @@ -165,6 +208,17 @@ fn test_words_tms_last_phrase() { | ||||
|     // "lazy" is a phrase, not deleted by this term matching strategy | ||||
|     // but words before it can be deleted | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 11, 12]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox talks to the lazy and slow dog\"", | ||||
|         "\"the quick brown fox talks to the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("\"the quick brown fox jumps over the lazy dog\""); | ||||
| @@ -173,6 +227,12 @@ fn test_words_tms_last_phrase() { | ||||
|  | ||||
|     // The whole query is a phrase, no terms are removed | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("\"the quick brown fox jumps over the lazy dog"); | ||||
| @@ -181,11 +241,17 @@ fn test_words_tms_last_phrase() { | ||||
|  | ||||
|     // The whole query is still a phrase, even without closing quotes, so no terms are removed | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_words_proximity_tms_last_simple() { | ||||
|     let index = create_quick_brown_fox_trivial_index(); | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); | ||||
| @@ -200,6 +266,31 @@ fn test_words_proximity_tms_last_simple() { | ||||
|  | ||||
|     // 7 is better than 6 because of the proximity between "the" and its surrounding terms | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the great quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the mighty and quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the brown quick fox jumps over the lazy dog\"", | ||||
|         "\"the brown quick fox jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy blue dog\"", | ||||
|         "\"this quick brown and scary fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and very scary fox jumps over the lazy dog\"", | ||||
|         "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the\"", | ||||
|         "\"the quick brown fox jumps over\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the quick brown fox\"", | ||||
|         "\"the quick brown fox talks to the lazy and slow dog\"", | ||||
|         "\"the quick brown fox talks to the lazy dog\"", | ||||
|         "\"the quick brown\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("the brown quick fox jumps over the lazy dog"); | ||||
| @@ -208,11 +299,36 @@ fn test_words_proximity_tms_last_simple() { | ||||
|  | ||||
|     // 10 is better than 9 because of the proximity between "quick" and "brown" | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the brown quick fox jumps over the lazy dog\"", | ||||
|         "\"the brown quick fox jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy blue dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the great quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the mighty and quick brown fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and scary fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and very scary fox jumps over the lazy dog\"", | ||||
|         "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the\"", | ||||
|         "\"the quick brown fox jumps over\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the quick brown fox\"", | ||||
|         "\"the quick brown fox talks to the lazy and slow dog\"", | ||||
|         "\"the quick brown fox talks to the lazy dog\"", | ||||
|         "\"the quick brown\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_words_proximity_tms_last_phrase() { | ||||
|     let index = create_quick_brown_fox_trivial_index(); | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); | ||||
| @@ -228,6 +344,26 @@ fn test_words_proximity_tms_last_phrase() { | ||||
|     // "quick brown" is a phrase. The proximity of its first and last words | ||||
|     // to their adjacent query words should be taken into account | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5, 4, 11, 12, 3]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the great quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the mighty and quick brown fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and scary fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and very scary fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the\"", | ||||
|         "\"the quick brown fox jumps over\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|         "\"the quick brown fox\"", | ||||
|         "\"the quick brown fox talks to the lazy and slow dog\"", | ||||
|         "\"the quick brown fox talks to the lazy dog\"", | ||||
|         "\"the quick brown\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("the \"quick brown\" \"fox jumps\" over the lazy dog"); | ||||
| @@ -238,11 +374,27 @@ fn test_words_proximity_tms_last_phrase() { | ||||
|     // to their adjacent query words should be taken into account. | ||||
|     // The same applies to `fox jumps`. | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the great quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the mighty and quick brown fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and scary fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and very scary fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the lazy\"", | ||||
|         "\"the quick brown fox jumps over the\"", | ||||
|         "\"the quick brown fox jumps over\"", | ||||
|         "\"the quick brown fox jumps\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
| fn test_words_tms_all() { | ||||
|     let index = create_quick_brown_fox_trivial_index(); | ||||
|     let index = create_index(); | ||||
|     index | ||||
|         .update_settings(|s| { | ||||
|             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); | ||||
| @@ -256,6 +408,23 @@ fn test_words_tms_all() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"the quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", | ||||
|         "\"the great quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the quick brown fox jumps over the really lazy dog\"", | ||||
|         "\"the mighty and quick brown fox jumps over the lazy dog\"", | ||||
|         "\"the brown quick fox jumps over the lazy dog\"", | ||||
|         "\"the brown quick fox jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy dog\"", | ||||
|         "\"the brown quick fox immediately jumps over the really lazy blue dog\"", | ||||
|         "\"this quick brown and scary fox jumps over the lazy dog\"", | ||||
|         "\"this quick brown and very scary fox jumps over the lazy dog\"", | ||||
|         "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
|     let mut s = Search::new(&txn, &index); | ||||
|     s.query("extravagant"); | ||||
| @@ -263,4 +432,6 @@ fn test_words_tms_all() { | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|  | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     insta::assert_debug_snapshot!(texts, @"[]"); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user