mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	
							
								
								
									
										2
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										2
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -1206,7 +1206,7 @@ checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" | |||||||
| [[package]] | [[package]] | ||||||
| name = "meilisearch-tokenizer" | name = "meilisearch-tokenizer" | ||||||
| version = "0.1.1" | version = "0.1.1" | ||||||
| source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.4#31ba3ff4a15501f12b7d37ac64ddce7c35a9757c" | source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0#833c48b2ee39071f8b4f51abd15122afdb3c8c06" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "character_converter", |  "character_converter", | ||||||
|  "cow-utils", |  "cow-utils", | ||||||
|   | |||||||
| @@ -10,7 +10,7 @@ anyhow = "1.0.38" | |||||||
| byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } | byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } | ||||||
| grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } | grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } | ||||||
| heed = "0.10.6" | heed = "0.10.6" | ||||||
| meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.4" } | meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.0" } | ||||||
| memmap = "0.7.0" | memmap = "0.7.0" | ||||||
| milli = { path = "../milli" } | milli = { path = "../milli" } | ||||||
| once_cell = "1.5.2" | once_cell = "1.5.2" | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| use std::collections::{BTreeMap, HashMap, HashSet}; | use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | ||||||
| use std::fmt::Display; | use std::fmt::Display; | ||||||
| use std::fs::{File, create_dir_all}; | use std::fs::{File, create_dir_all}; | ||||||
| use std::net::SocketAddr; | use std::net::SocketAddr; | ||||||
| @@ -128,7 +128,10 @@ struct Highlighter<'a, A> { | |||||||
|  |  | ||||||
| impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { | impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { | ||||||
|     fn new(stop_words: &'a fst::Set<A>) -> Self { |     fn new(stop_words: &'a fst::Set<A>) -> Self { | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |         let mut config = AnalyzerConfig::default(); | ||||||
|  |         config.stop_words(stop_words); | ||||||
|  |         let analyzer = Analyzer::new(config); | ||||||
|  |  | ||||||
|         Self { analyzer } |         Self { analyzer } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -266,6 +269,13 @@ struct Settings { | |||||||
|         skip_serializing_if = "Option::is_none", |         skip_serializing_if = "Option::is_none", | ||||||
|     )] |     )] | ||||||
|     criteria: Option<Option<Vec<String>>>, |     criteria: Option<Option<Vec<String>>>, | ||||||
|  |  | ||||||
|  |     #[serde( | ||||||
|  |         default, | ||||||
|  |         deserialize_with = "deserialize_some", | ||||||
|  |         skip_serializing_if = "Option::is_none", | ||||||
|  |     )] | ||||||
|  |     stop_words: Option<Option<BTreeSet<String>>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | #[derive(Debug, Clone, Serialize, Deserialize)] | ||||||
| @@ -439,6 +449,14 @@ async fn main() -> anyhow::Result<()> { | |||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
|  |                     // We transpose the settings JSON struct into a real setting update. | ||||||
|  |                     if let Some(stop_words) = settings.stop_words { | ||||||
|  |                         match stop_words { | ||||||
|  |                             Some(stop_words) => builder.set_stop_words(stop_words), | ||||||
|  |                             None => builder.reset_stop_words(), | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |  | ||||||
|                     let result = builder.execute(|indexing_step, update_id| { |                     let result = builder.execute(|indexing_step, update_id| { | ||||||
|                         let (current, total) = match indexing_step { |                         let (current, total) = match indexing_step { | ||||||
|                             TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None), |                             TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None), | ||||||
|   | |||||||
| @@ -20,7 +20,7 @@ heed = { version = "0.10.6", default-features = false, features = ["lmdb", "sync | |||||||
| human_format = "1.0.3" | human_format = "1.0.3" | ||||||
| levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } | levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } | ||||||
| linked-hash-map = "0.5.4" | linked-hash-map = "0.5.4" | ||||||
| meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.4" } | meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.0" } | ||||||
| memmap = "0.7.0" | memmap = "0.7.0" | ||||||
| num-traits = "0.2.14" | num-traits = "0.2.14" | ||||||
| obkv = "0.1.1" | obkv = "0.1.1" | ||||||
|   | |||||||
| @@ -28,6 +28,7 @@ pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields"; | |||||||
| pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; | pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; | ||||||
| pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; | pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; | ||||||
| pub const WORDS_FST_KEY: &str = "words-fst"; | pub const WORDS_FST_KEY: &str = "words-fst"; | ||||||
|  | pub const STOP_WORDS_KEY: &str = "stop-words"; | ||||||
| pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; | pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; | ||||||
| const CREATED_AT_KEY: &str = "created-at"; | const CREATED_AT_KEY: &str = "created-at"; | ||||||
| const UPDATED_AT_KEY: &str = "updated-at"; | const UPDATED_AT_KEY: &str = "updated-at"; | ||||||
| @@ -377,6 +378,22 @@ impl Index { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /* stop words */ | ||||||
|  |  | ||||||
|  |     pub fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> { | ||||||
|  |         self.main.put::<_, Str, ByteSlice>(wtxn, STOP_WORDS_KEY, fst.as_fst().as_bytes()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||||
|  |         self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY) | ||||||
|  |     } | ||||||
|  |     pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<Option<fst::Set<&'t [u8]>>> { | ||||||
|  |         match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? { | ||||||
|  |             Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), | ||||||
|  |             None => Ok(None), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /* words prefixes fst */ |     /* words prefixes fst */ | ||||||
|  |  | ||||||
|     /// Writes the FST which is the words prefixes dictionnary of the engine. |     /// Writes the FST which is the words prefixes dictionnary of the engine. | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ use std::fmt; | |||||||
| use std::str::Utf8Error; | use std::str::Utf8Error; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
|  |  | ||||||
| use fst::{IntoStreamer, Streamer, Set}; | use fst::{IntoStreamer, Streamer}; | ||||||
| use levenshtein_automata::{DFA, LevenshteinAutomatonBuilder as LevBuilder}; | use levenshtein_automata::{DFA, LevenshteinAutomatonBuilder as LevBuilder}; | ||||||
| use log::debug; | use log::debug; | ||||||
| use meilisearch_tokenizer::{AnalyzerConfig, Analyzer}; | use meilisearch_tokenizer::{AnalyzerConfig, Analyzer}; | ||||||
| @@ -91,8 +91,7 @@ impl<'a> Search<'a> { | |||||||
|                 let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); |                 let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); | ||||||
|                 builder.optional_words(self.optional_words); |                 builder.optional_words(self.optional_words); | ||||||
|                 builder.authorize_typos(self.authorize_typos); |                 builder.authorize_typos(self.authorize_typos); | ||||||
|                 let stop_words = &Set::default(); |                 let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default()); | ||||||
|                 let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|                 let result = analyzer.analyze(query); |                 let result = analyzer.analyze(query); | ||||||
|                 let tokens = result.tokens(); |                 let tokens = result.tokens(); | ||||||
|                 builder.build(tokens)? |                 builder.build(tokens)? | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| use std::collections::HashSet; | use std::collections::HashSet; | ||||||
| use std::{fmt, cmp, mem}; | use std::{fmt, cmp, mem}; | ||||||
|  |  | ||||||
|  | use fst::Set; | ||||||
| use levenshtein_automata::{DFA, Distance}; | use levenshtein_automata::{DFA, Distance}; | ||||||
| use meilisearch_tokenizer::{TokenKind, tokenizer::TokenStream}; | use meilisearch_tokenizer::{TokenKind, tokenizer::TokenStream}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| @@ -154,6 +155,10 @@ impl fmt::Debug for Query { | |||||||
|  |  | ||||||
| trait Context { | trait Context { | ||||||
|     fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>; |     fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>; | ||||||
|  |     fn stop_words(&self) -> anyhow::Result<Option<Set<&[u8]>>>; | ||||||
|  |     fn is_stop_word(&self, word: &str) -> anyhow::Result<bool> { | ||||||
|  |         Ok(self.stop_words()?.map_or(false, |s| s.contains(word))) | ||||||
|  |     } | ||||||
|     fn synonyms<S: AsRef<str>>(&self, words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>>; |     fn synonyms<S: AsRef<str>>(&self, words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>>; | ||||||
|     fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> { |     fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> { | ||||||
|         match self.word_docids(word)? { |         match self.word_docids(word)? { | ||||||
| @@ -183,6 +188,10 @@ impl<'a> Context for QueryTreeBuilder<'a> { | |||||||
|     fn synonyms<S: AsRef<str>>(&self, _words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> { |     fn synonyms<S: AsRef<str>>(&self, _words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> { | ||||||
|         Ok(None) |         Ok(None) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn stop_words(&self) -> anyhow::Result<Option<Set<&[u8]>>> { | ||||||
|  |         self.index.stop_words(self.rtxn) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> QueryTreeBuilder<'a> { | impl<'a> QueryTreeBuilder<'a> { | ||||||
| @@ -331,8 +340,7 @@ fn create_query_tree( | |||||||
|     optional_words: bool, |     optional_words: bool, | ||||||
|     authorize_typos: bool, |     authorize_typos: bool, | ||||||
|     query: PrimitiveQuery, |     query: PrimitiveQuery, | ||||||
| ) -> anyhow::Result<Operation> | ) -> anyhow::Result<Operation> { | ||||||
| { |  | ||||||
|     /// Matches on the `PrimitiveQueryPart` and create an operation from it. |     /// Matches on the `PrimitiveQueryPart` and create an operation from it. | ||||||
|     fn resolve_primitive_part( |     fn resolve_primitive_part( | ||||||
|         ctx: &impl Context, |         ctx: &impl Context, | ||||||
| @@ -350,7 +358,12 @@ fn create_query_tree( | |||||||
|                 if let Some(child) = split_best_frequency(ctx, &word)? { |                 if let Some(child) = split_best_frequency(ctx, &word)? { | ||||||
|                     children.push(child); |                     children.push(child); | ||||||
|                 } |                 } | ||||||
|                 children.push(Operation::Query(Query { prefix, kind: typos(word, authorize_typos) })); |  | ||||||
|  |                 let is_stop_word = ctx.is_stop_word(&word)?; | ||||||
|  |                 let query = Query { prefix, kind: typos(word, authorize_typos) }; | ||||||
|  |                 if query.prefix || query.kind.is_tolerant() || !is_stop_word { | ||||||
|  |                     children.push(Operation::Query(query)); | ||||||
|  |                 } | ||||||
|                 Ok(Operation::or(false, children)) |                 Ok(Operation::or(false, children)) | ||||||
|             }, |             }, | ||||||
|             // create a CONSECUTIVE operation wrapping all word in the phrase |             // create a CONSECUTIVE operation wrapping all word in the phrase | ||||||
| @@ -365,12 +378,11 @@ fn create_query_tree( | |||||||
|         ctx: &impl Context, |         ctx: &impl Context, | ||||||
|         authorize_typos: bool, |         authorize_typos: bool, | ||||||
|         query: &[PrimitiveQueryPart], |         query: &[PrimitiveQueryPart], | ||||||
|     ) -> anyhow::Result<Operation> |     ) -> anyhow::Result<Operation> { | ||||||
|     { |  | ||||||
|         const MAX_NGRAM: usize = 3; |         const MAX_NGRAM: usize = 3; | ||||||
|         let mut op_children = Vec::new(); |         let mut op_children = Vec::new(); | ||||||
|  |  | ||||||
|         for sub_query in query.linear_group_by(|a, b| !(a.is_phrase() || b.is_phrase()) ) { |         for sub_query in query.linear_group_by(|a, b| !(a.is_phrase() || b.is_phrase())) { | ||||||
|             let mut or_op_children = Vec::new(); |             let mut or_op_children = Vec::new(); | ||||||
|  |  | ||||||
|             for ngram in 1..=MAX_NGRAM.min(sub_query.len()) { |             for ngram in 1..=MAX_NGRAM.min(sub_query.len()) { | ||||||
| @@ -381,23 +393,31 @@ fn create_query_tree( | |||||||
|  |  | ||||||
|                     match group { |                     match group { | ||||||
|                         [part] => { |                         [part] => { | ||||||
|                             let operation = resolve_primitive_part(ctx, authorize_typos, part.clone())?; |                             let operation = | ||||||
|  |                                 resolve_primitive_part(ctx, authorize_typos, part.clone())?; | ||||||
|                             and_op_children.push(operation); |                             and_op_children.push(operation); | ||||||
|                         }, |                         } | ||||||
|                         words => { |                         words => { | ||||||
|                             let is_prefix = words.last().map(|part| part.is_prefix()).unwrap_or(false); |                             let is_prefix = words.last().map_or(false, |part| part.is_prefix()); | ||||||
|                             let words: Vec<_> = words.iter().filter_map(| part| { |                             let words: Vec<_> = words | ||||||
|                                 if let PrimitiveQueryPart::Word(word, _) = part { |                                 .iter() | ||||||
|                                     Some(word.as_str()) |                                 .filter_map(|part| { | ||||||
|                                 } else { |                                     if let PrimitiveQueryPart::Word(word, _) = part { | ||||||
|                                     None |                                         Some(word.as_str()) | ||||||
|                                 } |                                     } else { | ||||||
|                             }).collect(); |                                         None | ||||||
|  |                                     } | ||||||
|  |                                 }) | ||||||
|  |                                 .collect(); | ||||||
|                             let mut operations = synonyms(ctx, &words)?.unwrap_or_default(); |                             let mut operations = synonyms(ctx, &words)?.unwrap_or_default(); | ||||||
|                             let concat = words.concat(); |                             let concat = words.concat(); | ||||||
|  |  | ||||||
|  |                             let is_stop_word = ctx.is_stop_word(&concat)?; | ||||||
|                             let query = Query { prefix: is_prefix, kind: typos(concat, authorize_typos) }; |                             let query = Query { prefix: is_prefix, kind: typos(concat, authorize_typos) }; | ||||||
|                             operations.push(Operation::Query(query)); |                             if query.prefix || query.kind.is_tolerant() || !is_stop_word { | ||||||
|                             and_op_children.push(Operation::or(false, operations)); |                                 operations.push(Operation::Query(query)); | ||||||
|  |                                 and_op_children.push(Operation::or(false, operations)); | ||||||
|  |                             } | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
| @@ -543,7 +563,6 @@ pub fn maximum_proximity(operation: &Operation) -> usize { | |||||||
| mod test { | mod test { | ||||||
|     use std::collections::HashMap; |     use std::collections::HashMap; | ||||||
|  |  | ||||||
|     use fst::Set; |  | ||||||
|     use maplit::{hashmap, hashset}; |     use maplit::{hashmap, hashset}; | ||||||
|     use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; |     use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; | ||||||
|     use rand::{Rng, SeedableRng, rngs::StdRng}; |     use rand::{Rng, SeedableRng, rngs::StdRng}; | ||||||
| @@ -582,6 +601,10 @@ mod test { | |||||||
|             let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); |             let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); | ||||||
|             Ok(self.synonyms.get(&words).cloned()) |             Ok(self.synonyms.get(&words).cloned()) | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         fn stop_words(&self) -> anyhow::Result<Option<Set<&[u8]>>> { | ||||||
|  |             Ok(None) | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     impl Default for TestContext { |     impl Default for TestContext { | ||||||
| @@ -646,8 +669,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn prefix() { |     fn prefix() { | ||||||
|         let query = "hey friends"; |         let query = "hey friends"; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -667,8 +689,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn no_prefix() { |     fn no_prefix() { | ||||||
|         let query = "hey friends "; |         let query = "hey friends "; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -688,8 +709,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn synonyms() { |     fn synonyms() { | ||||||
|         let query = "hello world "; |         let query = "hello world "; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -720,8 +740,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn complex_synonyms() { |     fn complex_synonyms() { | ||||||
|         let query = "new york city "; |         let query = "new york city "; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -766,8 +785,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn ngrams() { |     fn ngrams() { | ||||||
|         let query = "n grams "; |         let query = "n grams "; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -787,8 +805,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn word_split() { |     fn word_split() { | ||||||
|         let query = "wordsplit fish "; |         let query = "wordsplit fish "; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -814,8 +831,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn phrase() { |     fn phrase() { | ||||||
|         let query = "\"hey friends\" \" \" \"wooop"; |         let query = "\"hey friends\" \" \" \"wooop"; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -835,8 +851,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn optional_word() { |     fn optional_word() { | ||||||
|         let query = "hey my friend "; |         let query = "hey my friend "; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -875,8 +890,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn optional_word_phrase() { |     fn optional_word_phrase() { | ||||||
|         let query = "\"hey my\""; |         let query = "\"hey my\""; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -892,8 +906,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn optional_word_multiple_phrases() { |     fn optional_word_multiple_phrases() { | ||||||
|         let query = r#""hey" my good "friend""#; |         let query = r#""hey" my good "friend""#; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -927,8 +940,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn no_typo() { |     fn no_typo() { | ||||||
|         let query = "hey friends "; |         let query = "hey friends "; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
| @@ -947,8 +959,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn fetching_words() { |     fn fetching_words() { | ||||||
|         let query = "wordsplit nyc world"; |         let query = "wordsplit nyc world"; | ||||||
|         let stop_words = &Set::default(); |         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |  | ||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -410,6 +410,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | |||||||
|             None => fields_ids_map.iter().map(|(id, _name)| id).collect(), |             None => fields_ids_map.iter().map(|(id, _name)| id).collect(), | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|  |         let stop_words = self.index.stop_words(self.wtxn)?; | ||||||
|  |         let stop_words = stop_words.as_ref(); | ||||||
|         let linked_hash_map_size = self.linked_hash_map_size; |         let linked_hash_map_size = self.linked_hash_map_size; | ||||||
|         let max_nb_chunks = self.max_nb_chunks; |         let max_nb_chunks = self.max_nb_chunks; | ||||||
|         let max_memory = self.max_memory; |         let max_memory = self.max_memory; | ||||||
| @@ -436,7 +438,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | |||||||
|             let readers = rayon::iter::repeatn(documents, num_threads) |             let readers = rayon::iter::repeatn(documents, num_threads) | ||||||
|                 .enumerate() |                 .enumerate() | ||||||
|                 .map(|(i, documents)| { |                 .map(|(i, documents)| { | ||||||
|                     let stop_words = fst::Set::default(); |  | ||||||
|                     let store = Store::new( |                     let store = Store::new( | ||||||
|                         searchable_fields.clone(), |                         searchable_fields.clone(), | ||||||
|                         faceted_fields.clone(), |                         faceted_fields.clone(), | ||||||
| @@ -446,7 +447,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | |||||||
|                         chunk_compression_type, |                         chunk_compression_type, | ||||||
|                         chunk_compression_level, |                         chunk_compression_level, | ||||||
|                         chunk_fusing_shrink_size, |                         chunk_fusing_shrink_size, | ||||||
|                         &stop_words, |                         stop_words, | ||||||
|                     )?; |                     )?; | ||||||
|                     store.index( |                     store.index( | ||||||
|                         documents, |                         documents, | ||||||
|   | |||||||
| @@ -86,7 +86,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { | |||||||
|         chunk_compression_type: CompressionType, |         chunk_compression_type: CompressionType, | ||||||
|         chunk_compression_level: Option<u32>, |         chunk_compression_level: Option<u32>, | ||||||
|         chunk_fusing_shrink_size: Option<u64>, |         chunk_fusing_shrink_size: Option<u64>, | ||||||
|         stop_words: &'s Set<A>, |         stop_words: Option<&'s Set<A>>, | ||||||
|     ) -> anyhow::Result<Self> |     ) -> anyhow::Result<Self> | ||||||
|     { |     { | ||||||
|         // We divide the max memory by the number of sorter the Store have. |         // We divide the max memory by the number of sorter the Store have. | ||||||
| @@ -141,7 +141,11 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { | |||||||
|             create_writer(chunk_compression_type, chunk_compression_level, f) |             create_writer(chunk_compression_type, chunk_compression_level, f) | ||||||
|         })?; |         })?; | ||||||
|  |  | ||||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); |         let mut config = AnalyzerConfig::default(); | ||||||
|  |         if let Some(stop_words) = stop_words { | ||||||
|  |             config.stop_words(stop_words); | ||||||
|  |         } | ||||||
|  |         let analyzer = Analyzer::new(config); | ||||||
|  |  | ||||||
|         Ok(Store { |         Ok(Store { | ||||||
|             // Indexing parameters. |             // Indexing parameters. | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| use std::collections::HashMap; | use std::collections::{BTreeSet, HashMap}; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
|  |  | ||||||
| use anyhow::Context; | use anyhow::Context; | ||||||
| @@ -32,6 +32,7 @@ pub struct Settings<'a, 't, 'u, 'i> { | |||||||
|     displayed_fields: Option<Option<Vec<String>>>, |     displayed_fields: Option<Option<Vec<String>>>, | ||||||
|     faceted_fields: Option<Option<HashMap<String, String>>>, |     faceted_fields: Option<Option<HashMap<String, String>>>, | ||||||
|     criteria: Option<Option<Vec<String>>>, |     criteria: Option<Option<Vec<String>>>, | ||||||
|  |     stop_words: Option<Option<BTreeSet<String>>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||||
| @@ -55,6 +56,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|             displayed_fields: None, |             displayed_fields: None, | ||||||
|             faceted_fields: None, |             faceted_fields: None, | ||||||
|             criteria: None, |             criteria: None, | ||||||
|  |             stop_words: None, | ||||||
|             update_id, |             update_id, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -91,6 +93,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         self.criteria = Some(Some(criteria)); |         self.criteria = Some(Some(criteria)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn reset_stop_words(&mut self) { | ||||||
|  |         self.stop_words = Some(None); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn set_stop_words(&mut self, stop_words: BTreeSet<String>) { | ||||||
|  |         self.stop_words = if stop_words.is_empty() { | ||||||
|  |             Some(None) | ||||||
|  |         } else { | ||||||
|  |             Some(Some(stop_words)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> |     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> | ||||||
|     where |     where | ||||||
|         F: Fn(UpdateIndexingStep, u64) + Sync |         F: Fn(UpdateIndexingStep, u64) + Sync | ||||||
| @@ -210,6 +224,28 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         Ok(true) |         Ok(true) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn update_stop_words(&mut self) -> anyhow::Result<bool> { | ||||||
|  |         match self.stop_words { | ||||||
|  |             Some(Some(ref stop_words)) => { | ||||||
|  |                 let current = self.index.stop_words(self.wtxn)?; | ||||||
|  |                 // since we can't compare a BTreeSet with an FST we are going to convert the | ||||||
|  |                 // BTreeSet to an FST and then compare bytes per bytes the two FSTs. | ||||||
|  |                 let fst = fst::Set::from_iter(&*stop_words)?; | ||||||
|  |  | ||||||
|  |                 // Does the new FST differ from the previous one? | ||||||
|  |                 if current.map_or(true, |current| current.as_fst().as_bytes() != fst.as_fst().as_bytes()) { | ||||||
|  |                     // we want to re-create our FST. | ||||||
|  |                     self.index.put_stop_words(self.wtxn, &fst)?; | ||||||
|  |                     Ok(true) | ||||||
|  |                 } else { | ||||||
|  |                     Ok(false) | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Some(None) => Ok(self.index.delete_stop_words(self.wtxn)?), | ||||||
|  |             None => Ok(false), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn update_facets(&mut self) -> anyhow::Result<bool> { |     fn update_facets(&mut self) -> anyhow::Result<bool> { | ||||||
|         match self.faceted_fields { |         match self.faceted_fields { | ||||||
|             Some(Some(ref fields)) => { |             Some(Some(ref fields)) => { | ||||||
| @@ -248,22 +284,23 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|  |  | ||||||
|     pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()> |     pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()> | ||||||
|     where |     where | ||||||
|         F: Fn(UpdateIndexingStep, u64) + Sync |     F: Fn(UpdateIndexingStep, u64) + Sync | ||||||
|         { |     { | ||||||
|             self.index.set_updated_at(self.wtxn, &Utc::now())?; |         self.index.set_updated_at(self.wtxn, &Utc::now())?; | ||||||
|             let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; |         let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; | ||||||
|             self.update_displayed()?; |         self.update_displayed()?; | ||||||
|             let facets_updated = self.update_facets()?; |         let stop_words_updated = self.update_stop_words()?; | ||||||
|             // update_criteria MUST be called after update_facets, since criterion fields must be set |         let facets_updated = self.update_facets()?; | ||||||
|             // as facets. |         // update_criteria MUST be called after update_facets, since criterion fields must be set | ||||||
|             self.update_criteria()?; |         // as facets. | ||||||
|             let searchable_updated = self.update_searchable()?; |         self.update_criteria()?; | ||||||
|  |         let searchable_updated = self.update_searchable()?; | ||||||
|  |  | ||||||
|             if facets_updated || searchable_updated { |         if facets_updated || searchable_updated || stop_words_updated { | ||||||
|                 self.reindex(&progress_callback, old_fields_ids_map)?; |             self.reindex(&progress_callback, old_fields_ids_map)?; | ||||||
|             } |  | ||||||
|             Ok(()) |  | ||||||
|         } |         } | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| @@ -271,7 +308,7 @@ mod tests { | |||||||
|     use super::*; |     use super::*; | ||||||
|  |  | ||||||
|     use heed::EnvOpenOptions; |     use heed::EnvOpenOptions; | ||||||
|     use maplit::hashmap; |     use maplit::{hashmap, btreeset}; | ||||||
|  |  | ||||||
|     use crate::facet::FacetType; |     use crate::facet::FacetType; | ||||||
|     use crate::update::{IndexDocuments, UpdateFormat}; |     use crate::update::{IndexDocuments, UpdateFormat}; | ||||||
| @@ -328,7 +365,6 @@ mod tests { | |||||||
|         assert_eq!(result.documents_ids.len(), 1); |         assert_eq!(result.documents_ids.len(), 1); | ||||||
|         let documents = index.documents(&rtxn, result.documents_ids).unwrap(); |         let documents = index.documents(&rtxn, result.documents_ids).unwrap(); | ||||||
|         assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..])); |         assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..])); | ||||||
|         drop(rtxn); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -372,7 +408,6 @@ mod tests { | |||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); |         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||||
|         assert_eq!(fields_ids.unwrap(), &["age"][..]); |         assert_eq!(fields_ids.unwrap(), &["age"][..]); | ||||||
|         drop(rtxn); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -394,7 +429,6 @@ mod tests { | |||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); |         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||||
|         assert_eq!(fields_ids, None); |         assert_eq!(fields_ids, None); | ||||||
|         drop(rtxn); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -434,7 +468,6 @@ mod tests { | |||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); |         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||||
|         assert_eq!(fields_ids, None); |         assert_eq!(fields_ids, None); | ||||||
|         drop(rtxn); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -478,7 +511,96 @@ mod tests { | |||||||
|         // Only count the field_id 0 and level 0 facet values. |         // Only count the field_id 0 and level 0 facet values. | ||||||
|         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); |         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); | ||||||
|         assert_eq!(count, 4); |         assert_eq!(count, 4); | ||||||
|         drop(rtxn); |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn default_stop_words() { | ||||||
|  |         let path = tempfile::tempdir().unwrap(); | ||||||
|  |         let mut options = EnvOpenOptions::new(); | ||||||
|  |         options.map_size(10 * 1024 * 1024); // 10 MB | ||||||
|  |         let index = Index::new(options, &path).unwrap(); | ||||||
|  |  | ||||||
|  |         // First we send 3 documents with ids from 1 to 3. | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let content = &b"name,age\nkevin,23\nkevina,21\nbenoit,34\n"[..]; | ||||||
|  |         let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); | ||||||
|  |         builder.update_format(UpdateFormat::Csv); | ||||||
|  |         builder.execute(content, |_, _| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         // Ensure there is no stop_words by default | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let stop_words = index.stop_words(&rtxn).unwrap(); | ||||||
|  |         assert!(stop_words.is_none()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn set_and_reset_stop_words() { | ||||||
|  |         let path = tempfile::tempdir().unwrap(); | ||||||
|  |         let mut options = EnvOpenOptions::new(); | ||||||
|  |         options.map_size(10 * 1024 * 1024); // 10 MB | ||||||
|  |         let index = Index::new(options, &path).unwrap(); | ||||||
|  |  | ||||||
|  |         // First we send 3 documents with ids from 1 to 3. | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let content = &b"name,age,maxim\nkevin,23,I love dogs\nkevina,21,Doggos are the best\nbenoit,34,The crepes are really good\n"[..]; | ||||||
|  |         let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); | ||||||
|  |         builder.update_format(UpdateFormat::Csv); | ||||||
|  |         builder.execute(content, |_, _| ()).unwrap(); | ||||||
|  |  | ||||||
|  |         // In the same transaction we provide some stop_words | ||||||
|  |         let mut builder = Settings::new(&mut wtxn, &index, 0); | ||||||
|  |         let set = btreeset!{ "i".to_string(), "the".to_string(), "are".to_string() }; | ||||||
|  |         builder.set_stop_words(set.clone()); | ||||||
|  |         builder.execute(|_, _| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         // Ensure stop_words are effectively stored | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let stop_words = index.stop_words(&rtxn).unwrap(); | ||||||
|  |         assert!(stop_words.is_some()); // at this point the index should return something | ||||||
|  |  | ||||||
|  |         let stop_words = stop_words.unwrap(); | ||||||
|  |         let expected = fst::Set::from_iter(&set).unwrap(); | ||||||
|  |         assert_eq!(stop_words.as_fst().as_bytes(), expected.as_fst().as_bytes()); | ||||||
|  |  | ||||||
|  |         // when we search for something that is a non prefix stop_words it should be ignored | ||||||
|  |         let result = index.search(&rtxn).query("the ").execute().unwrap(); | ||||||
|  |         assert!(result.documents_ids.is_empty()); | ||||||
|  |         let result = index.search(&rtxn).query("i ").execute().unwrap(); | ||||||
|  |         assert!(result.documents_ids.is_empty()); | ||||||
|  |         let result = index.search(&rtxn).query("are ").execute().unwrap(); | ||||||
|  |         assert!(result.documents_ids.is_empty()); | ||||||
|  |  | ||||||
|  |         let result = index.search(&rtxn).query("dog").execute().unwrap(); | ||||||
|  |         assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos | ||||||
|  |         let result = index.search(&rtxn).query("benoît").execute().unwrap(); | ||||||
|  |         assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data | ||||||
|  |  | ||||||
|  |         // now we'll reset the stop_words and ensure it's None | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let mut builder = Settings::new(&mut wtxn, &index, 0); | ||||||
|  |         builder.reset_stop_words(); | ||||||
|  |         builder.execute(|_, _| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let stop_words = index.stop_words(&rtxn).unwrap(); | ||||||
|  |         assert!(stop_words.is_none()); | ||||||
|  |  | ||||||
|  |         // now we can search for the stop words | ||||||
|  |         let result = index.search(&rtxn).query("the").execute().unwrap(); | ||||||
|  |         assert_eq!(result.documents_ids.len(), 2); | ||||||
|  |         let result = index.search(&rtxn).query("i").execute().unwrap(); | ||||||
|  |         assert_eq!(result.documents_ids.len(), 1); | ||||||
|  |         let result = index.search(&rtxn).query("are").execute().unwrap(); | ||||||
|  |         assert_eq!(result.documents_ids.len(), 2); | ||||||
|  |  | ||||||
|  |         // the rest of the search is still not impacted | ||||||
|  |         let result = index.search(&rtxn).query("dog").execute().unwrap(); | ||||||
|  |         assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos | ||||||
|  |         let result = index.search(&rtxn).query("benoît").execute().unwrap(); | ||||||
|  |         assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -519,6 +641,5 @@ mod tests { | |||||||
|         assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); |         assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); | ||||||
|         assert!(index.primary_key(&rtxn).unwrap().is_none()); |         assert!(index.primary_key(&rtxn).unwrap().is_none()); | ||||||
|         assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); |         assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); | ||||||
|         drop(rtxn); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user