mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Fix code visibility issue + unimplemented detail in proximity rule
This commit is contained in:
		| @@ -121,18 +121,18 @@ impl<'transaction> DatabaseCache<'transaction> { | ||||
|         &mut self, | ||||
|         index: &Index, | ||||
|         txn: &'transaction RoTxn, | ||||
|         word1: &str, | ||||
|         prefix2: &str, | ||||
|         left_prefix: &str, | ||||
|         right: &str, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<&'transaction [u8]>> { | ||||
|         let key = (proximity, prefix2.to_owned(), word1.to_owned()); | ||||
|         let key = (proximity, left_prefix.to_owned(), right.to_owned()); | ||||
|         match self.prefix_word_pair_proximity_docids.entry(key) { | ||||
|             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), | ||||
|             Entry::Vacant(entry) => { | ||||
|                 let bitmap_ptr = index | ||||
|                     .prefix_word_pair_proximity_docids | ||||
|                     .remap_data_type::<ByteSlice>() | ||||
|                     .get(txn, &(proximity, prefix2, word1))?; | ||||
|                     .get(txn, &(proximity, left_prefix, right))?; | ||||
|                 entry.insert(bitmap_ptr); | ||||
|                 Ok(bitmap_ptr) | ||||
|             } | ||||
|   | ||||
| @@ -194,7 +194,7 @@ impl DetailedSearchLogger { | ||||
|         for event in self.events.iter() { | ||||
|             match event { | ||||
|                 SearchEvents::RankingRuleStartIteration { ranking_rule_idx, time, .. } => { | ||||
|                     let elapsed = time.duration_since(prev_time); | ||||
|                     let _elapsed = time.duration_since(prev_time); | ||||
|                     prev_time = *time; | ||||
|                     let parent_activated_id = activated_id(×tamp); | ||||
|                     timestamp.push(0); | ||||
| @@ -216,7 +216,7 @@ impl DetailedSearchLogger { | ||||
| }}").unwrap(); | ||||
|                 } | ||||
|                 SearchEvents::RankingRuleNextBucket { ranking_rule_idx, time, universe, candidates } => { | ||||
|                     let elapsed = time.duration_since(prev_time); | ||||
|                     let _elapsed = time.duration_since(prev_time); | ||||
|                     prev_time = *time; | ||||
|                     let old_activated_id = activated_id(×tamp); | ||||
|                     // writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); | ||||
| @@ -227,7 +227,7 @@ impl DetailedSearchLogger { | ||||
|                         .unwrap(); | ||||
|                 } | ||||
|                 SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, candidates, time } => { | ||||
|                     let elapsed = time.duration_since(prev_time); | ||||
|                     let _elapsed = time.duration_since(prev_time); | ||||
|                     prev_time = *time; | ||||
|                     let old_activated_id = activated_id(×tamp); | ||||
|                     // writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); | ||||
| @@ -239,7 +239,7 @@ impl DetailedSearchLogger { | ||||
|                         .unwrap(); | ||||
|                 } | ||||
|                 SearchEvents::RankingRuleEndIteration { ranking_rule_idx, time, .. } => { | ||||
|                     let elapsed = time.duration_since(prev_time); | ||||
|                     let _elapsed = time.duration_since(prev_time); | ||||
|                     prev_time = *time; | ||||
|                     let cur_activated_id = activated_id(×tamp); | ||||
|                     // writeln!(&mut file, "time.{cur_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); | ||||
| @@ -332,7 +332,7 @@ results.{random} {{ | ||||
|         writeln!(&mut file, "}}").unwrap(); | ||||
|     } | ||||
|      | ||||
|     fn query_node_d2_desc(node_idx: usize, node: &QueryNode, distances: &[u64], file: &mut File) { | ||||
|     fn query_node_d2_desc(node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) { | ||||
|         match &node { | ||||
|             QueryNode::Term(LocatedQueryTerm { value, .. }) => { | ||||
|                 match value { | ||||
|   | ||||
| @@ -1,18 +1,22 @@ | ||||
| pub mod db_cache; | ||||
| pub mod graph_based_ranking_rule; | ||||
| pub mod logger; | ||||
| pub mod query_graph; | ||||
| pub mod query_term; | ||||
| pub mod ranking_rule_graph; | ||||
| pub mod ranking_rules; | ||||
| pub mod resolve_query_graph; | ||||
| pub mod sort; | ||||
| pub mod words; | ||||
| mod db_cache; | ||||
| mod graph_based_ranking_rule; | ||||
| mod logger; | ||||
| mod query_graph; | ||||
| mod query_term; | ||||
| mod ranking_rule_graph; | ||||
| mod ranking_rules; | ||||
| mod resolve_query_graph; | ||||
| mod sort; | ||||
| mod words; | ||||
|  | ||||
| use charabia::Tokenize; | ||||
| use heed::RoTxn; | ||||
| pub use query_graph::*; | ||||
| pub use ranking_rules::*; | ||||
|  | ||||
| use query_graph::{QueryGraph, QueryNode}; | ||||
| pub use ranking_rules::{ | ||||
|     execute_search, RankingRule, RankingRuleOutput, RankingRuleOutputIter, | ||||
|     RankingRuleOutputIterWrapper, RankingRuleQueryTrait, | ||||
| }; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use self::db_cache::DatabaseCache; | ||||
|   | ||||
| @@ -6,6 +6,10 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::cheapest_paths::Path; | ||||
|  | ||||
| // What is PathsMap used for? | ||||
| // For the empty_prefixes field in the EmptyPathsCache only :/ | ||||
| // but it could be used for more, like efficient computing of a set of paths | ||||
|  | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct PathsMap<V> { | ||||
|   | ||||
| @@ -13,16 +13,14 @@ use crate::{Index, Result}; | ||||
|  | ||||
| pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, i8)>> { | ||||
|     Ok(Some(match from_node { | ||||
|         QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => { | ||||
|             match value1 { | ||||
|         QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => match value1 { | ||||
|             QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()), | ||||
|             QueryTerm::Phrase { phrase: phrase1 } => { | ||||
|                     // TODO: remove second unwrap | ||||
|                     let original = phrase1.words.last().unwrap().as_ref().unwrap().clone(); | ||||
|                 if let Some(original) = phrase1.words.last().unwrap().as_ref() { | ||||
|                     ( | ||||
|                         WordDerivations { | ||||
|                             original: original.clone(), | ||||
|                             zero_typo: vec![original], | ||||
|                             zero_typo: vec![original.to_owned()], | ||||
|                             one_typo: vec![], | ||||
|                             two_typos: vec![], | ||||
|                             use_prefix_db: false, | ||||
| @@ -31,9 +29,12 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, | ||||
|                         }, | ||||
|                         *pos1.end(), | ||||
|                     ) | ||||
|                 } else { | ||||
|                     // No word pairs if the phrase does not have a regular word as its last term | ||||
|                     return Ok(None); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         QueryNode::Start => ( | ||||
|             WordDerivations { | ||||
|                 original: String::new(), | ||||
| @@ -68,12 +69,11 @@ pub fn visit_to_node<'transaction, 'from_data>( | ||||
|     let (derivations2, pos2, ngram_len2) = match value2 { | ||||
|         QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()), | ||||
|         QueryTerm::Phrase { phrase: phrase2 } => { | ||||
|             // TODO: remove second unwrap | ||||
|             let original = phrase2.words.last().unwrap().as_ref().unwrap().clone(); | ||||
|             if let Some(original) = phrase2.words.first().unwrap().as_ref() { | ||||
|                 ( | ||||
|                     WordDerivations { | ||||
|                         original: original.clone(), | ||||
|                     zero_typo: vec![original], | ||||
|                         zero_typo: vec![original.to_owned()], | ||||
|                         one_typo: vec![], | ||||
|                         two_typos: vec![], | ||||
|                         use_prefix_db: false, | ||||
| @@ -83,11 +83,13 @@ pub fn visit_to_node<'transaction, 'from_data>( | ||||
|                     *pos2.start(), | ||||
|                     1, | ||||
|                 ) | ||||
|             } else { | ||||
|                 // No word pairs if the phrase does not have a regular word as its first term | ||||
|                 return Ok(vec![]); | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // TODO: here we would actually do it for each combination of word1 and word2 | ||||
|     // and take the union of them | ||||
|     if pos1 + 1 != pos2 { | ||||
|         // TODO: how should this actually be handled? | ||||
|         // We want to effectively ignore this pair of terms | ||||
| @@ -130,19 +132,37 @@ pub fn visit_to_node<'transaction, 'from_data>( | ||||
|                             right_prefix: original_word_2.to_owned(), | ||||
|                         }); | ||||
|                 } | ||||
|                 if db_cache | ||||
|                     .get_prefix_word_pair_proximity_docids( | ||||
|                         index, | ||||
|                         txn, | ||||
|                         original_word_2.as_str(), | ||||
|                         word1.as_str(), | ||||
|                         proximity as u8 - 1, | ||||
|                     )? | ||||
|                     .is_some() | ||||
|                 { | ||||
|                     cost_proximity_word_pairs | ||||
|                         .entry(cost) | ||||
|                         .or_default() | ||||
|                         .entry(proximity as u8) | ||||
|                         .or_default() | ||||
|                         .push(WordPair::WordPrefixSwapped { | ||||
|                             left_prefix: original_word_2.to_owned(), | ||||
|                             right: word1.to_owned(), | ||||
|                         }); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let derivations2 = derivations2.all_derivations_except_prefix_db(); | ||||
|     // TODO: safeguard in case the cartesian product is too large? | ||||
|     // TODO: add safeguard in case the cartesian product is too large? | ||||
|     let product_derivations = derivations1.cartesian_product(derivations2); | ||||
|  | ||||
|     for (word1, word2) in product_derivations { | ||||
|         for proximity in 1..=(8 - ngram_len2) { | ||||
|             let cost = (proximity + ngram_len2 - 1) as u8; | ||||
|             // TODO: do the opposite way with a proximity penalty as well! | ||||
|             //       search for (word2, word1, proximity-1), I guess? | ||||
|             if db_cache | ||||
|                 .get_word_pair_proximity_docids(index, txn, word1, word2, proximity as u8)? | ||||
|                 .is_some() | ||||
| @@ -154,6 +174,18 @@ pub fn visit_to_node<'transaction, 'from_data>( | ||||
|                     .or_default() | ||||
|                     .push(WordPair::Words { left: word1.to_owned(), right: word2.to_owned() }); | ||||
|             } | ||||
|             if proximity > 1 | ||||
|                 && db_cache | ||||
|                     .get_word_pair_proximity_docids(index, txn, word2, word1, proximity as u8 - 1)? | ||||
|                     .is_some() | ||||
|             { | ||||
|                 cost_proximity_word_pairs | ||||
|                     .entry(cost) | ||||
|                     .or_default() | ||||
|                     .entry(proximity as u8 - 1) | ||||
|                     .or_default() | ||||
|                     .push(WordPair::Words { left: word2.to_owned(), right: word1.to_owned() }); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     let mut new_edges = cost_proximity_word_pairs | ||||
|   | ||||
| @@ -20,11 +20,8 @@ pub fn compute_docids<'transaction>( | ||||
|             } | ||||
|             WordPair::WordPrefix { left, right_prefix } => db_cache | ||||
|                 .get_word_prefix_pair_proximity_docids(index, txn, left, right_prefix, *proximity), | ||||
|             WordPair::WordsSwapped { left, right } => { | ||||
|                 db_cache.get_word_pair_proximity_docids(index, txn, left, right, *proximity) | ||||
|             } | ||||
|             WordPair::WordPrefixSwapped { left, right_prefix } => db_cache | ||||
|                 .get_prefix_word_pair_proximity_docids(index, txn, left, right_prefix, *proximity), | ||||
|             WordPair::WordPrefixSwapped { left_prefix, right } => db_cache | ||||
|                 .get_prefix_word_pair_proximity_docids(index, txn, left_prefix, right, *proximity), | ||||
|         }?; | ||||
|         let bitmap = | ||||
|             bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default(); | ||||
|   | ||||
| @@ -18,9 +18,8 @@ use crate::{Index, Result}; | ||||
| #[derive(Debug, Clone)] | ||||
| pub enum WordPair { | ||||
|     Words { left: String, right: String }, | ||||
|     WordsSwapped { left: String, right: String }, | ||||
|     WordPrefix { left: String, right_prefix: String }, | ||||
|     WordPrefixSwapped { left: String, right_prefix: String }, | ||||
|     WordPrefixSwapped { left_prefix: String, right: String }, | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
|   | ||||
| @@ -24,9 +24,13 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|         mut paths: Vec<Vec<u32>>, | ||||
|     ) -> Result<RoaringBitmap> { | ||||
|         paths.sort_unstable(); | ||||
|         // let mut needs_filtering_empty_edges = false; | ||||
|         // let mut needs_filtering_empty_prefix = false; | ||||
|         // let mut needs_filtering_empty_couple_edges = false; | ||||
|         let mut needs_filtering = false; | ||||
|         let mut path_bitmaps = vec![]; | ||||
|         'path_loop: loop { | ||||
|             // TODO: distinguish between empty_edges, empty_prefix, and empty_couple_edges filtering | ||||
|             if needs_filtering { | ||||
|                 for path in paths.iter_mut() { | ||||
|                     if empty_paths_cache.path_is_empty(path) { | ||||
| @@ -61,11 +65,13 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                             self.remove_edge(edge_index); | ||||
|                             edge_docids_cache.cache.remove(&edge_index); | ||||
|                             needs_filtering = true; | ||||
|                             // needs_filtering_empty_edges = true; | ||||
|                             // 3. continue executing this function again on the remaining paths | ||||
|                             continue 'path_loop; | ||||
|                         } else { | ||||
|                             path_bitmap &= edge_docids; | ||||
|                             if path_bitmap.is_disjoint(universe) { | ||||
|                                 // needs_filtering_empty_prefix = true; | ||||
|                                 needs_filtering = true; | ||||
|                                 empty_paths_cache.forbid_prefix(&visited_edges); | ||||
|                                 // if the intersection between this edge and any | ||||
| @@ -76,6 +82,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                                 { | ||||
|                                     let intersection = edge_docids & edge_docids2; | ||||
|                                     if intersection.is_disjoint(universe) { | ||||
|                                         // needs_filtering_empty_couple_edges = true; | ||||
|                                         empty_paths_cache | ||||
|                                             .forbid_couple_edges(*edge_index2, edge_index); | ||||
|                                     } | ||||
|   | ||||
| @@ -98,20 +98,6 @@ pub struct RankingRuleOutput<Q> { | ||||
|     pub candidates: RoaringBitmap, | ||||
| } | ||||
|  | ||||
| #[allow(unused)] | ||||
| pub fn get_start_universe<'transaction>( | ||||
|     index: &Index, | ||||
|     txn: &'transaction RoTxn, | ||||
|     db_cache: &mut DatabaseCache<'transaction>, | ||||
|     query_graph: &QueryGraph, | ||||
|     term_matching_strategy: TermsMatchingStrategy, | ||||
|     // filters: Filters, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     // TODO: actually compute the universe from the query graph | ||||
|     let universe = index.documents_ids(txn).unwrap(); | ||||
|     Ok(universe) | ||||
| } | ||||
|  | ||||
| // TODO: can make it generic over the query type (either query graph or placeholder) fairly easily | ||||
| #[allow(clippy::too_many_arguments)] | ||||
| pub fn execute_search<'transaction>( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user