mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Remove unused files
This commit is contained in:
		| @@ -3,8 +3,6 @@ | ||||
| mod criterion; | ||||
| mod external_documents_ids; | ||||
| mod fields_ids_map; | ||||
| mod mdfs; | ||||
| mod query_tokens; | ||||
| mod search; | ||||
| mod update_store; | ||||
| pub mod facet; | ||||
|   | ||||
| @@ -1,163 +0,0 @@ | ||||
| use std::collections::hash_map::Entry::{Occupied, Vacant}; | ||||
| use std::collections::HashMap; | ||||
| use std::mem; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
| use crate::Index; | ||||
|  | ||||
| /// A mana depth first search implementation. | ||||
| pub struct Mdfs<'a> { | ||||
|     index: &'a Index, | ||||
|     rtxn: &'a heed::RoTxn<'a>, | ||||
|     words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)], | ||||
|     union_cache: HashMap<(usize, u8), RoaringBitmap>, | ||||
|     candidates: RoaringBitmap, | ||||
|     mana: u32, | ||||
|     max_mana: u32, | ||||
| } | ||||
|  | ||||
| impl<'a> Mdfs<'a> { | ||||
|     pub fn new( | ||||
|         index: &'a Index, | ||||
|         rtxn: &'a heed::RoTxn, | ||||
|         words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)], | ||||
|         candidates: RoaringBitmap, | ||||
|     ) -> Mdfs<'a> | ||||
|     { | ||||
|         // Compute the number of pairs (windows) we have for this list of words. | ||||
|         let mana = words.len().saturating_sub(1) as u32; | ||||
|         let max_mana = mana * 8; | ||||
|         Mdfs { index, rtxn, words, union_cache: HashMap::new(), candidates, mana, max_mana } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> Iterator for Mdfs<'a> { | ||||
|     type Item = anyhow::Result<(u32, RoaringBitmap)>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         // If there is less or only one word therefore the only | ||||
|         // possible documents that we can return are the candidates. | ||||
|         if self.words.len() <= 1 { | ||||
|             if self.candidates.is_empty() { return None } | ||||
|             return Some(Ok((0, mem::take(&mut self.candidates)))); | ||||
|         } | ||||
|  | ||||
|         while self.mana <= self.max_mana { | ||||
|             let mut answer = RoaringBitmap::new(); | ||||
|             let result = mdfs_step( | ||||
|                 &self.index, | ||||
|                 &self.rtxn, | ||||
|                 self.mana, | ||||
|                 self.words, | ||||
|                 &self.candidates, | ||||
|                 &self.candidates, | ||||
|                 &mut self.union_cache, | ||||
|                 &mut answer, | ||||
|             ); | ||||
|  | ||||
|             match result { | ||||
|                 Ok(()) => { | ||||
|                     // We always increase the mana for the next loop. | ||||
|                     let proximity = self.mana; | ||||
|                     self.mana += 1; | ||||
|  | ||||
|                     // If no documents were found we must not return and continue | ||||
|                     // the search with more mana. | ||||
|                     if !answer.is_empty() { | ||||
|  | ||||
|                         // We remove the answered documents from the list of | ||||
|                         // candidates to be sure we don't search for them again. | ||||
|                         self.candidates.difference_with(&answer); | ||||
|  | ||||
|                         // We return the answer. | ||||
|                         return Some(Ok((proximity, answer))); | ||||
|                     } | ||||
|                 }, | ||||
|                 Err(e) => return Some(Err(e)), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         None | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn mdfs_step( | ||||
|     index: &Index, | ||||
|     rtxn: &heed::RoTxn, | ||||
|     mana: u32, | ||||
|     words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)], | ||||
|     candidates: &RoaringBitmap, | ||||
|     parent_docids: &RoaringBitmap, | ||||
|     union_cache: &mut HashMap<(usize, u8), RoaringBitmap>, | ||||
|     answer: &mut RoaringBitmap, | ||||
| ) -> anyhow::Result<()> | ||||
| { | ||||
|     use std::cmp::{min, max}; | ||||
|  | ||||
|     let (words1, words2) = (&words[0].0, &words[1].0); | ||||
|     let pairs = words_pair_combinations(words1, words2); | ||||
|     let tail = &words[1..]; | ||||
|     let nb_children = tail.len() as u32 - 1; | ||||
|  | ||||
|     // The minimum amount of mana that you must consume is at least 1 and the | ||||
|     // amount of mana that your children can consume. Because the last child must | ||||
|     // consume the remaining mana, it is mandatory that there not too much at the end. | ||||
|     let min_proximity = max(1, mana.saturating_sub(nb_children * 8)) as u8; | ||||
|  | ||||
|     // The maximum amount of mana that you can use is 8 or the remaining amount of | ||||
|     // mana minus your children, as you can't just consume all the mana, | ||||
|     // your children must have at least 1 mana. | ||||
|     let max_proximity = min(8, mana - nb_children) as u8; | ||||
|  | ||||
|     for proximity in min_proximity..=max_proximity { | ||||
|         let mut docids = match union_cache.entry((words.len(), proximity)) { | ||||
|             Occupied(entry) => entry.get().clone(), | ||||
|             Vacant(entry) => { | ||||
|                 let mut docids = RoaringBitmap::new(); | ||||
|                 if proximity == 8 { | ||||
|                     docids = candidates.clone(); | ||||
|                 } else { | ||||
|                     for (w1, w2) in pairs.iter().cloned() { | ||||
|                         let key = (w1, w2, proximity); | ||||
|                         if let Some(di) = index.word_pair_proximity_docids.get(rtxn, &key)? { | ||||
|                             docids.union_with(&di); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 entry.insert(docids).clone() | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         // We must be sure that we only return docids that are present in the candidates. | ||||
|         docids.intersect_with(parent_docids); | ||||
|  | ||||
|         if !docids.is_empty() { | ||||
|             let mana = mana.checked_sub(proximity as u32).unwrap(); | ||||
|             if tail.len() < 2 { | ||||
|                 // We are the last pair, we return without recuring as we don't have any child. | ||||
|                 answer.union_with(&docids); | ||||
|                 return Ok(()); | ||||
|             } else { | ||||
|                 return mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache, answer); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| fn words_pair_combinations<'h>( | ||||
|     w1: &'h HashMap<String, (u8, RoaringBitmap)>, | ||||
|     w2: &'h HashMap<String, (u8, RoaringBitmap)>, | ||||
| ) -> Vec<(&'h str, &'h str)> | ||||
| { | ||||
|     let mut pairs = Vec::new(); | ||||
|     for (w1, (_typos, docids1)) in w1 { | ||||
|         for (w2, (_typos, docids2)) in w2 { | ||||
|             if !docids1.is_disjoint(&docids2) { | ||||
|                 pairs.push((w1.as_str(), w2.as_str())); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     pairs | ||||
| } | ||||
| @@ -1,217 +0,0 @@ | ||||
| use meilisearch_tokenizer::{Token, TokenKind}; | ||||
|  | ||||
| #[derive(Debug)] | ||||
| enum State { | ||||
|     Free, | ||||
|     Quoted, | ||||
| } | ||||
|  | ||||
| impl State { | ||||
|     fn swap(&mut self) { | ||||
|         match self { | ||||
|             State::Quoted => *self = State::Free, | ||||
|             State::Free => *self = State::Quoted, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq)] | ||||
| pub enum QueryToken<'a> { | ||||
|     Free(Token<'a>), | ||||
|     Quoted(Token<'a>), | ||||
| } | ||||
|  | ||||
| pub fn query_tokens<'a>(mut tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = QueryToken<'a>> { | ||||
|     let mut state = State::Free; | ||||
|     let f = move || { | ||||
|         loop { | ||||
|             let token = tokens.next()?; | ||||
|             match token.kind() { | ||||
|                 _ if token.text().trim() == "\"" => state.swap(), | ||||
|                 TokenKind::Word => { | ||||
|                     let token = match state { | ||||
|                         State::Quoted => QueryToken::Quoted(token), | ||||
|                         State::Free => QueryToken::Free(token), | ||||
|                     }; | ||||
|                     return Some(token); | ||||
|                 }, | ||||
|                 _ => (), | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
|     std::iter::from_fn(f) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|     use QueryToken::{Quoted, Free}; | ||||
|     use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; | ||||
|     use fst::Set; | ||||
|  | ||||
|     macro_rules! assert_eq_query_token { | ||||
|         ($test:expr, Quoted($val:literal)) => { | ||||
|             match $test { | ||||
|                 Quoted(val) => assert_eq!(val.text(), $val), | ||||
|                 Free(val) => panic!("expected Quoted(\"{}\"), found Free(\"{}\")", $val, val.text()), | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         ($test:expr, Free($val:literal)) => { | ||||
|             match $test { | ||||
|                 Quoted(val) => panic!("expected Free(\"{}\"), found Quoted(\"{}\")", $val, val.text()), | ||||
|                 Free(val) => assert_eq!(val.text(), $val), | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn empty() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = ""; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert!(iter.next().is_none()); | ||||
|  | ||||
|         let query = " "; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn one_quoted_string() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "\"hello\""; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("hello")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn one_pending_quoted_string() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "\"hello"; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("hello")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn one_non_quoted_string() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "hello"; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("hello")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn quoted_directly_followed_by_free_strings() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "\"hello\"world"; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("hello")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("world")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn free_directly_followed_by_quoted_strings() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "hello\"world\""; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("hello")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("world")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn free_followed_by_quoted_strings() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "hello \"world\""; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("hello")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("world")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn multiple_spaces_separated_strings() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "hello    world   "; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("hello")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("world")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn multi_interleaved_quoted_free_strings() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "hello \"world\" coucou \"monde\""; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("hello")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("world")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("coucou")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("monde")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn multi_quoted_strings() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "\"hello world\" coucou \"monde est beau\""; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("hello")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("world")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("coucou")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("monde")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("est")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Quoted("beau")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn chinese() { | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let query = "汽车男生"; | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let mut iter = query_tokens(tokens); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("汽车")); | ||||
|         assert_eq_query_token!(iter.next().unwrap(), Free("男生")); | ||||
|         assert!(iter.next().is_none()); | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user