mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 15:36:28 +00:00 
			
		
		
		
	Prune the query graph after executing a ranking rule
This commit is contained in:
		| @@ -36,6 +36,7 @@ That is we find the documents where either: | |||||||
| - OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by` | - OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by` | ||||||
| */ | */ | ||||||
|  |  | ||||||
|  | use std::collections::HashSet; | ||||||
| use std::ops::ControlFlow; | use std::ops::ControlFlow; | ||||||
|  |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| @@ -50,6 +51,7 @@ use super::ranking_rule_graph::{ | |||||||
| use super::small_bitmap::SmallBitmap; | use super::small_bitmap::SmallBitmap; | ||||||
| use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | ||||||
| use crate::search::new::interner::Interned; | use crate::search::new::interner::Interned; | ||||||
|  | use crate::search::new::query_graph::QueryNodeData; | ||||||
| use crate::Result; | use crate::Result; | ||||||
|  |  | ||||||
| pub type Proximity = GraphBasedRankingRule<ProximityGraph>; | pub type Proximity = GraphBasedRankingRule<ProximityGraph>; | ||||||
| @@ -216,9 +218,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|         let original_universe = universe; |         let original_universe = universe; | ||||||
|         let mut universe = universe.clone(); |         let mut universe = universe.clone(); | ||||||
|  |  | ||||||
|         // TODO: remove this unnecessary clone |  | ||||||
|         let original_graph = graph.clone(); |         let original_graph = graph.clone(); | ||||||
|         // and this vector as well |         let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner); | ||||||
|         let mut paths = vec![]; |         let mut paths = vec![]; | ||||||
|  |  | ||||||
|         // For each path of the given cost, we will compute its associated |         // For each path of the given cost, we will compute its associated | ||||||
| @@ -243,8 +244,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|                 // We store the edges and their docids in vectors in case the path turns out to be |                 // We store the edges and their docids in vectors in case the path turns out to be | ||||||
|                 // empty and we need to figure out why it was empty. |                 // empty and we need to figure out why it was empty. | ||||||
|                 let mut visited_conditions = vec![]; |                 let mut visited_conditions = vec![]; | ||||||
|                 let mut cached_edge_docids = |                 let mut cached_edge_docids = vec![]; | ||||||
|                     graph.conditions_interner.map(|_| RoaringBitmap::new()); |                 // graph.conditions_interner.map(|_| RoaringBitmap::new()); | ||||||
|  |  | ||||||
|                 for &condition_interned_raw in path { |                 for &condition_interned_raw in path { | ||||||
|                     let condition = Interned::new(condition_interned_raw); |                     let condition = Interned::new(condition_interned_raw); | ||||||
| @@ -253,7 +254,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|                     let edge_docids = |                     let edge_docids = | ||||||
|                         edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?; |                         edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?; | ||||||
|  |  | ||||||
|                     *cached_edge_docids.get_mut(condition) = edge_docids.clone(); |                     cached_edge_docids.push((condition, edge_docids.clone())); // .get_mut(condition) = edge_docids.clone(); | ||||||
|  |  | ||||||
|                     // If the edge is empty, then the path will be empty as well, we update the graph |                     // If the edge is empty, then the path will be empty as well, we update the graph | ||||||
|                     // and caches accordingly and skip to the next candidate path. |                     // and caches accordingly and skip to the next candidate path. | ||||||
| @@ -279,12 +280,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|                         // then we also know that any path containing the same couple of |                         // then we also know that any path containing the same couple of | ||||||
|                         // edges will also be empty. |                         // edges will also be empty. | ||||||
|                         for (past_condition, edge_docids2) in cached_edge_docids.iter() { |                         for (past_condition, edge_docids2) in cached_edge_docids.iter() { | ||||||
|                             if past_condition == condition { |                             if *past_condition == condition { | ||||||
|                                 continue; |                                 continue; | ||||||
|                             }; |                             }; | ||||||
|                             let intersection = edge_docids & edge_docids2; |                             let intersection = edge_docids & edge_docids2; | ||||||
|                             if intersection.is_disjoint(&universe) { |                             if intersection.is_disjoint(&universe) { | ||||||
|                                 empty_paths_cache.add_condition_couple(past_condition, condition); |                                 empty_paths_cache.add_condition_couple(*past_condition, condition); | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|                         // We should maybe instead try to compute: |                         // We should maybe instead try to compute: | ||||||
| @@ -292,6 +293,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|                         return Ok(ControlFlow::Continue(())); |                         return Ok(ControlFlow::Continue(())); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |                 assert!(!path_docids.is_empty()); | ||||||
|  |                 for condition in path { | ||||||
|  |                     used_conditions.insert(Interned::new(*condition)); | ||||||
|  |                 } | ||||||
|                 bucket |= &path_docids; |                 bucket |= &path_docids; | ||||||
|                 // Reduce the size of the universe so that we can more optimistically discard candidate paths |                 // Reduce the size of the universe so that we can more optimistically discard candidate paths | ||||||
|                 universe -= path_docids; |                 universe -= path_docids; | ||||||
| @@ -307,16 +312,50 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|         G::log_state( |         G::log_state( | ||||||
|             &original_graph, |             &original_graph, | ||||||
|             &paths, |             &paths, | ||||||
|             &state.empty_paths_cache, |             empty_paths_cache, | ||||||
|             original_universe, |             original_universe, | ||||||
|             &state.all_distances, |             all_distances, | ||||||
|             cost, |             cost, | ||||||
|             logger, |             logger, | ||||||
|         ); |         ); | ||||||
|  |  | ||||||
|         // TODO: Graph-based ranking rules do not (yet) modify the query graph. We could, however, |         // We modify the next query graph so that it only contains the subgraph | ||||||
|         // remove nodes and/or terms within nodes that weren't present in any of the paths. |         // that was used to compute this bucket | ||||||
|         let next_query_graph = state.graph.query_graph.clone(); |         // But we only do it in case the bucket length is >1, because otherwise | ||||||
|  |         // we know the child ranking rule won't be called anyway | ||||||
|  |         let mut next_query_graph = original_graph.query_graph; | ||||||
|  |         next_query_graph.simplify(); | ||||||
|  |         if bucket.len() > 1 { | ||||||
|  |             // 1. Gather all the words and phrases used in the computation of this bucket | ||||||
|  |             let mut used_words = HashSet::new(); | ||||||
|  |             let mut used_phrases = HashSet::new(); | ||||||
|  |             for condition in used_conditions.iter() { | ||||||
|  |                 let condition = graph.conditions_interner.get(condition); | ||||||
|  |                 used_words.extend(G::words_used_by_edge_condition(ctx, condition)?); | ||||||
|  |                 used_phrases.extend(G::phrases_used_by_edge_condition(ctx, condition)?); | ||||||
|  |             } | ||||||
|  |             // 2. Remove the unused words and phrases from all the nodes in the graph | ||||||
|  |             let mut nodes_to_remove = vec![]; | ||||||
|  |             for (node_id, node) in next_query_graph.nodes.iter_mut() { | ||||||
|  |                 let term = match &mut node.data { | ||||||
|  |                     QueryNodeData::Term(term) => term, | ||||||
|  |                     QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue, | ||||||
|  |                 }; | ||||||
|  |                 if let Some(new_term) = ctx | ||||||
|  |                     .term_interner | ||||||
|  |                     .get(term.value) | ||||||
|  |                     .removing_forbidden_terms(&used_words, &used_phrases) | ||||||
|  |                 { | ||||||
|  |                     if new_term.is_empty() { | ||||||
|  |                         nodes_to_remove.push(node_id); | ||||||
|  |                     } else { | ||||||
|  |                         term.value = ctx.term_interner.insert(new_term); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             // 3. Remove the empty nodes from the graph | ||||||
|  |             next_query_graph.remove_nodes(&nodes_to_remove); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         self.state = Some(state); |         self.state = Some(state); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -528,7 +528,7 @@ shape: class" | |||||||
|         ctx: &mut SearchContext, |         ctx: &mut SearchContext, | ||||||
|         graph: &RankingRuleGraph<R>, |         graph: &RankingRuleGraph<R>, | ||||||
|         paths: &[Vec<u16>], |         paths: &[Vec<u16>], | ||||||
|         _empty_paths_cache: &DeadEndPathCache<R>, |         dead_end_paths_cache: &DeadEndPathCache<R>, | ||||||
|         distances: MappedInterner<Vec<(u16, SmallBitmap<R::EdgeCondition>)>, QueryNode>, |         distances: MappedInterner<Vec<(u16, SmallBitmap<R::EdgeCondition>)>, QueryNode>, | ||||||
|         file: &mut File, |         file: &mut File, | ||||||
|     ) { |     ) { | ||||||
| @@ -552,12 +552,11 @@ shape: class" | |||||||
|                         .unwrap(); |                         .unwrap(); | ||||||
|                 } |                 } | ||||||
|                 EdgeCondition::Conditional(condition) => { |                 EdgeCondition::Conditional(condition) => { | ||||||
|                     let condition = graph.conditions_interner.get(*condition); |                     // let condition = graph.conditions_interner.get(*condition); | ||||||
|                     writeln!( |                     writeln!( | ||||||
|                         file, |                         file, | ||||||
|                         "{source_node} -> {dest_node} : \"cost {cost} {edge_label}\"", |                         "{source_node} -> {dest_node} : \"{condition} cost {cost}\"", | ||||||
|                         cost = edge.cost, |                         cost = edge.cost, | ||||||
|                         edge_label = R::label_for_edge_condition(condition) |  | ||||||
|                     ) |                     ) | ||||||
|                     .unwrap(); |                     .unwrap(); | ||||||
|                 } |                 } | ||||||
| @@ -569,28 +568,33 @@ shape: class" | |||||||
|         // Self::paths_d2_description(graph, paths, file); |         // Self::paths_d2_description(graph, paths, file); | ||||||
|         // writeln!(file, "}}").unwrap(); |         // writeln!(file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         writeln!(file, "Shortest Paths {{").unwrap(); |         writeln!(file, "Paths {{").unwrap(); | ||||||
|         Self::paths_d2_description(ctx, graph, paths, file); |         Self::paths_d2_description(ctx, graph, paths, file); | ||||||
|         writeln!(file, "}}").unwrap(); |         writeln!(file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         // writeln!(file, "Empty Edge Couples {{").unwrap(); |         writeln!(file, "Dead-end couples of conditions {{").unwrap(); | ||||||
|         // for (i, (e1, e2)) in empty_paths_cache.empty_couple_edges.iter().enumerate() { |         for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() { | ||||||
|         //     writeln!(file, "{i} : \"\" {{").unwrap(); |             writeln!(file, "{i} : \"\" {{").unwrap(); | ||||||
|         //     Self::edge_d2_description(graph, *e1, file); |             Self::condition_d2_description(ctx, graph, e1, file); | ||||||
|         //     Self::edge_d2_description(graph, *e2, file); |             for e2 in e2.iter() { | ||||||
|         //     writeln!(file, "{e1} -- {e2}").unwrap(); |                 Self::condition_d2_description(ctx, graph, e2, file); | ||||||
|         //     writeln!(file, "}}").unwrap(); |                 writeln!(file, "{e1} -- {e2}").unwrap(); | ||||||
|         // } |             } | ||||||
|         // writeln!(file, "}}").unwrap(); |             writeln!(file, "}}").unwrap(); | ||||||
|  |         } | ||||||
|  |         writeln!(file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         // writeln!(file, "Removed Edges {{").unwrap(); |         writeln!(file, "Dead-end edges {{").unwrap(); | ||||||
|         // for edge_idx in empty_paths_cache.empty_edges.iter() { |         for condition in dead_end_paths_cache.conditions.iter() { | ||||||
|         //     writeln!(file, "{edge_idx}").unwrap(); |             writeln!(file, "{condition}").unwrap(); | ||||||
|         // } |         } | ||||||
|  |         writeln!(file, "}}").unwrap(); | ||||||
|  |  | ||||||
|  |         // writeln!(file, "Dead-end prefixes {{").unwrap(); | ||||||
|         // writeln!(file, "}}").unwrap(); |         // writeln!(file, "}}").unwrap(); | ||||||
|     } |     } | ||||||
|     fn condition_d2_description<R: RankingRuleGraphTrait>( |     fn condition_d2_description<R: RankingRuleGraphTrait>( | ||||||
|         _ctx: &mut SearchContext, |         ctx: &mut SearchContext, | ||||||
|         graph: &RankingRuleGraph<R>, |         graph: &RankingRuleGraph<R>, | ||||||
|         condition_id: Interned<R::EdgeCondition>, |         condition_id: Interned<R::EdgeCondition>, | ||||||
|         file: &mut File, |         file: &mut File, | ||||||
| @@ -598,10 +602,11 @@ shape: class" | |||||||
|         let condition = graph.conditions_interner.get(condition_id); |         let condition = graph.conditions_interner.get(condition_id); | ||||||
|         writeln!( |         writeln!( | ||||||
|             file, |             file, | ||||||
|             "{condition_id}: \"{}\" {{ |             "{condition_id} {{ | ||||||
|             shape: class | shape: class | ||||||
|         }}", | {} | ||||||
|             R::label_for_edge_condition(condition) | }}", | ||||||
|  |             R::label_for_edge_condition(ctx, condition).unwrap() | ||||||
|         ) |         ) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -303,7 +303,8 @@ mod tests { | |||||||
|             let mut ctx = SearchContext::new(&index, &txn); |             let mut ctx = SearchContext::new(&index, &txn); | ||||||
|             let results = execute_search( |             let results = execute_search( | ||||||
|                 &mut ctx, |                 &mut ctx, | ||||||
|                 "which a the releases from poison by the government", |                 "released from prison by the government", | ||||||
|  |                 // "which a the releases from poison by the government", | ||||||
|                 // "sun flower s are the best", |                 // "sun flower s are the best", | ||||||
|                 // "zero config", |                 // "zero config", | ||||||
|                 TermsMatchingStrategy::Last, |                 TermsMatchingStrategy::Last, | ||||||
| @@ -338,7 +339,7 @@ mod tests { | |||||||
|  |  | ||||||
|             println!("{}us: {:?}", elapsed.as_micros(), results); |             println!("{}us: {:?}", elapsed.as_micros(), results); | ||||||
|         } |         } | ||||||
|         // for (id, _document) in documents { |         // for (id, document) in documents { | ||||||
|         //     println!("{id}:"); |         //     println!("{id}:"); | ||||||
|         //     // println!("{document}"); |         //     // println!("{document}"); | ||||||
|         // } |         // } | ||||||
| @@ -359,9 +360,13 @@ mod tests { | |||||||
|         let start = Instant::now(); |         let start = Instant::now(); | ||||||
|  |  | ||||||
|         let mut s = Search::new(&txn, &index); |         let mut s = Search::new(&txn, &index); | ||||||
|         s.query("which a the releases from poison by the government"); |         s.query( | ||||||
|  |             // "which a the releases from poison by the government", | ||||||
|  |             // "sun flower s are the best", | ||||||
|  |             "zero config", | ||||||
|  |         ); | ||||||
|         s.terms_matching_strategy(TermsMatchingStrategy::Last); |         s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||||
|         // s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); |         // s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlyIterative); | ||||||
|         let docs = s.execute().unwrap(); |         let docs = s.execute().unwrap(); | ||||||
|  |  | ||||||
|         let elapsed = start.elapsed(); |         let elapsed = start.elapsed(); | ||||||
|   | |||||||
| @@ -281,7 +281,7 @@ impl QueryGraph { | |||||||
|  |  | ||||||
|     /// Simplify the query graph by removing all nodes that are disconnected from |     /// Simplify the query graph by removing all nodes that are disconnected from | ||||||
|     /// the start or end nodes. |     /// the start or end nodes. | ||||||
|     fn simplify(&mut self) { |     pub fn simplify(&mut self) { | ||||||
|         loop { |         loop { | ||||||
|             let mut nodes_to_remove = vec![]; |             let mut nodes_to_remove = vec![]; | ||||||
|             for (node_idx, node) in self.nodes.iter() { |             for (node_idx, node) in self.nodes.iter() { | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use std::collections::HashSet; | ||||||
| use std::mem; | use std::mem; | ||||||
| use std::ops::RangeInclusive; | use std::ops::RangeInclusive; | ||||||
|  |  | ||||||
| @@ -59,6 +60,111 @@ pub struct QueryTerm { | |||||||
|     pub use_prefix_db: Option<Interned<String>>, |     pub use_prefix_db: Option<Interned<String>>, | ||||||
| } | } | ||||||
| impl QueryTerm { | impl QueryTerm { | ||||||
|  |     pub fn removing_forbidden_terms( | ||||||
|  |         &self, | ||||||
|  |         allowed_words: &HashSet<Interned<String>>, | ||||||
|  |         allowed_phrases: &HashSet<Interned<Phrase>>, | ||||||
|  |     ) -> Option<Self> { | ||||||
|  |         let QueryTerm { | ||||||
|  |             original, | ||||||
|  |             is_ngram, | ||||||
|  |             is_prefix, | ||||||
|  |             phrase, | ||||||
|  |             zero_typo, | ||||||
|  |             prefix_of, | ||||||
|  |             synonyms, | ||||||
|  |             split_words, | ||||||
|  |             one_typo, | ||||||
|  |             two_typos, | ||||||
|  |             use_prefix_db, | ||||||
|  |         } = self; | ||||||
|  |  | ||||||
|  |         let mut changed = false; | ||||||
|  |  | ||||||
|  |         let mut new_zero_typo = None; | ||||||
|  |         if let Some(w) = zero_typo { | ||||||
|  |             if allowed_words.contains(w) { | ||||||
|  |                 new_zero_typo = Some(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         // TODO: this is incorrect, prefix DB stuff should be treated separately | ||||||
|  |         let mut new_use_prefix_db = None; | ||||||
|  |         if let Some(w) = use_prefix_db { | ||||||
|  |             if allowed_words.contains(w) { | ||||||
|  |                 new_use_prefix_db = Some(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let mut new_prefix_of = vec![]; | ||||||
|  |         for w in prefix_of.iter() { | ||||||
|  |             if allowed_words.contains(w) { | ||||||
|  |                 new_prefix_of.push(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let mut new_one_typo = vec![]; | ||||||
|  |         for w in one_typo.iter() { | ||||||
|  |             if allowed_words.contains(w) { | ||||||
|  |                 new_one_typo.push(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let mut new_two_typos = vec![]; | ||||||
|  |         for w in two_typos.iter() { | ||||||
|  |             if allowed_words.contains(w) { | ||||||
|  |                 new_two_typos.push(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         // TODO: this is incorrect, prefix DB stuff should be treated separately | ||||||
|  |         let mut new_phrase = None; | ||||||
|  |         if let Some(w) = phrase { | ||||||
|  |             if !allowed_phrases.contains(w) { | ||||||
|  |                 new_phrase = Some(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let mut new_split_words = None; | ||||||
|  |         if let Some(w) = split_words { | ||||||
|  |             if allowed_phrases.contains(w) { | ||||||
|  |                 new_split_words = Some(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let mut new_synonyms = vec![]; | ||||||
|  |         for w in synonyms.iter() { | ||||||
|  |             if allowed_phrases.contains(w) { | ||||||
|  |                 new_synonyms.push(*w); | ||||||
|  |             } else { | ||||||
|  |                 changed = true; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         if changed { | ||||||
|  |             Some(QueryTerm { | ||||||
|  |                 original: *original, | ||||||
|  |                 is_ngram: *is_ngram, | ||||||
|  |                 is_prefix: *is_prefix, | ||||||
|  |                 phrase: new_phrase, | ||||||
|  |                 zero_typo: new_zero_typo, | ||||||
|  |                 prefix_of: new_prefix_of.into_boxed_slice(), | ||||||
|  |                 synonyms: new_synonyms.into_boxed_slice(), | ||||||
|  |                 split_words: new_split_words, | ||||||
|  |                 one_typo: new_one_typo.into_boxed_slice(), | ||||||
|  |                 two_typos: new_two_typos.into_boxed_slice(), | ||||||
|  |                 use_prefix_db: new_use_prefix_db, | ||||||
|  |             }) | ||||||
|  |         } else { | ||||||
|  |             None | ||||||
|  |         } | ||||||
|  |     } | ||||||
|     pub fn phrase( |     pub fn phrase( | ||||||
|         word_interner: &mut DedupInterner<String>, |         word_interner: &mut DedupInterner<String>, | ||||||
|         phrase_interner: &mut DedupInterner<Phrase>, |         phrase_interner: &mut DedupInterner<Phrase>, | ||||||
|   | |||||||
| @@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|             empty_paths_cache, |             empty_paths_cache, | ||||||
|             &mut visit, |             &mut visit, | ||||||
|             &mut vec![], |             &mut vec![], | ||||||
|             &mut SmallBitmap::new(self.edges_store.len()), |             &mut SmallBitmap::for_interned_values_in(&self.conditions_interner), | ||||||
|             &mut empty_paths_cache.conditions.clone(), |             &mut empty_paths_cache.conditions.clone(), | ||||||
|         )?; |         )?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ mod proximity; | |||||||
| /// Implementation of the `typo` ranking rule | /// Implementation of the `typo` ranking rule | ||||||
| mod typo; | mod typo; | ||||||
|  |  | ||||||
|  | use std::collections::HashSet; | ||||||
| use std::hash::Hash; | use std::hash::Hash; | ||||||
|  |  | ||||||
| pub use edge_docids_cache::EdgeConditionDocIdsCache; | pub use edge_docids_cache::EdgeConditionDocIdsCache; | ||||||
| @@ -26,6 +27,7 @@ pub use typo::{TypoEdge, TypoGraph}; | |||||||
|  |  | ||||||
| use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner}; | use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner}; | ||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
|  | use super::query_term::Phrase; | ||||||
| use super::small_bitmap::SmallBitmap; | use super::small_bitmap::SmallBitmap; | ||||||
| use super::{QueryGraph, QueryNode, SearchContext}; | use super::{QueryGraph, QueryNode, SearchContext}; | ||||||
| use crate::Result; | use crate::Result; | ||||||
| @@ -82,7 +84,19 @@ pub trait RankingRuleGraphTrait: Sized { | |||||||
|  |  | ||||||
|     /// Return the label of the given edge condition, to be used when visualising |     /// Return the label of the given edge condition, to be used when visualising | ||||||
|     /// the ranking rule graph. |     /// the ranking rule graph. | ||||||
|     fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String; |     fn label_for_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<String>; | ||||||
|  |  | ||||||
|  |     fn words_used_by_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<HashSet<Interned<String>>>; | ||||||
|  |     fn phrases_used_by_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<HashSet<Interned<Phrase>>>; | ||||||
|  |  | ||||||
|     /// Compute the document ids associated with the given edge condition, |     /// Compute the document ids associated with the given edge condition, | ||||||
|     /// restricted to the given universe. |     /// restricted to the given universe. | ||||||
|   | |||||||
| @@ -1,6 +1,9 @@ | |||||||
| pub mod build; | pub mod build; | ||||||
| pub mod compute_docids; | pub mod compute_docids; | ||||||
|  |  | ||||||
|  | use std::collections::HashSet; | ||||||
|  | use std::iter::FromIterator; | ||||||
|  |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::empty_paths_cache::DeadEndPathCache; | use super::empty_paths_cache::DeadEndPathCache; | ||||||
| @@ -44,17 +47,6 @@ pub enum ProximityGraph {} | |||||||
| impl RankingRuleGraphTrait for ProximityGraph { | impl RankingRuleGraphTrait for ProximityGraph { | ||||||
|     type EdgeCondition = ProximityCondition; |     type EdgeCondition = ProximityCondition; | ||||||
|  |  | ||||||
|     fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String { |  | ||||||
|         match edge { |  | ||||||
|             ProximityCondition::Term { term } => { |  | ||||||
|                 format!("term {term}") |  | ||||||
|             } |  | ||||||
|             ProximityCondition::Pairs { pairs } => { |  | ||||||
|                 format!("pairs {}", pairs.len()) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn resolve_edge_condition<'ctx>( |     fn resolve_edge_condition<'ctx>( | ||||||
|         ctx: &mut SearchContext<'ctx>, |         ctx: &mut SearchContext<'ctx>, | ||||||
|         condition: &Self::EdgeCondition, |         condition: &Self::EdgeCondition, | ||||||
| @@ -83,4 +75,113 @@ impl RankingRuleGraphTrait for ProximityGraph { | |||||||
|     ) { |     ) { | ||||||
|         logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost); |         logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn label_for_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<String> { | ||||||
|  |         match edge { | ||||||
|  |             ProximityCondition::Term { term } => { | ||||||
|  |                 let term = ctx.term_interner.get(*term); | ||||||
|  |                 Ok(format!("{} : exists", ctx.word_interner.get(term.original))) | ||||||
|  |             } | ||||||
|  |             ProximityCondition::Pairs { pairs } => { | ||||||
|  |                 let mut s = String::new(); | ||||||
|  |                 for pair in pairs.iter() { | ||||||
|  |                     match pair { | ||||||
|  |                         WordPair::Words { phrases, left, right, proximity } => { | ||||||
|  |                             let left = ctx.word_interner.get(*left); | ||||||
|  |                             let right = ctx.word_interner.get(*right); | ||||||
|  |                             if !phrases.is_empty() { | ||||||
|  |                                 s.push_str(&format!("{} phrases + ", phrases.len())); | ||||||
|  |                             } | ||||||
|  |                             s.push_str(&format!("\"{left} {right}\": {proximity}\n")); | ||||||
|  |                         } | ||||||
|  |                         WordPair::WordPrefix { phrases, left, right_prefix, proximity } => { | ||||||
|  |                             let left = ctx.word_interner.get(*left); | ||||||
|  |                             let right = ctx.word_interner.get(*right_prefix); | ||||||
|  |                             if !phrases.is_empty() { | ||||||
|  |                                 s.push_str(&format!("{} phrases + ", phrases.len())); | ||||||
|  |                             } | ||||||
|  |                             s.push_str(&format!("\"{left} {right}...\" : {proximity}\n")); | ||||||
|  |                         } | ||||||
|  |                         WordPair::WordPrefixSwapped { left_prefix, right, proximity } => { | ||||||
|  |                             let left = ctx.word_interner.get(*left_prefix); | ||||||
|  |                             let right = ctx.word_interner.get(*right); | ||||||
|  |                             s.push_str(&format!("\"{left}... {right}\" : {proximity}\n")); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 Ok(s) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn words_used_by_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<HashSet<Interned<String>>> { | ||||||
|  |         match edge { | ||||||
|  |             ProximityCondition::Term { term } => { | ||||||
|  |                 let term = ctx.term_interner.get(*term); | ||||||
|  |                 Ok(HashSet::from_iter(term.all_single_words_except_prefix_db())) | ||||||
|  |             } | ||||||
|  |             ProximityCondition::Pairs { pairs } => { | ||||||
|  |                 let mut set = HashSet::new(); | ||||||
|  |                 for pair in pairs.iter() { | ||||||
|  |                     match pair { | ||||||
|  |                         WordPair::Words { phrases: _, left, right, proximity: _ } => { | ||||||
|  |                             set.insert(*left); | ||||||
|  |                             set.insert(*right); | ||||||
|  |                         } | ||||||
|  |                         WordPair::WordPrefix { phrases: _, left, right_prefix, proximity: _ } => { | ||||||
|  |                             set.insert(*left); | ||||||
|  |                             // TODO: this is not correct, there should be another trait method for collecting the prefixes | ||||||
|  |                             // to be used with the prefix DBs | ||||||
|  |                             set.insert(*right_prefix); | ||||||
|  |                         } | ||||||
|  |                         WordPair::WordPrefixSwapped { left_prefix, right, proximity: _ } => { | ||||||
|  |                             // TODO: this is not correct, there should be another trait method for collecting the prefixes | ||||||
|  |                             // to be used with the prefix DBs | ||||||
|  |                             set.insert(*left_prefix); | ||||||
|  |                             set.insert(*right); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 Ok(set) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn phrases_used_by_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<HashSet<Interned<Phrase>>> { | ||||||
|  |         match edge { | ||||||
|  |             ProximityCondition::Term { term } => { | ||||||
|  |                 let term = ctx.term_interner.get(*term); | ||||||
|  |                 Ok(HashSet::from_iter(term.all_phrases())) | ||||||
|  |             } | ||||||
|  |             ProximityCondition::Pairs { pairs } => { | ||||||
|  |                 let mut set = HashSet::new(); | ||||||
|  |                 for pair in pairs.iter() { | ||||||
|  |                     match pair { | ||||||
|  |                         WordPair::Words { phrases, left: _, right: _, proximity: _ } => { | ||||||
|  |                             set.extend(phrases.iter().copied()); | ||||||
|  |                         } | ||||||
|  |                         WordPair::WordPrefix { | ||||||
|  |                             phrases, | ||||||
|  |                             left: _, | ||||||
|  |                             right_prefix: _, | ||||||
|  |                             proximity: _, | ||||||
|  |                         } => { | ||||||
|  |                             set.extend(phrases.iter().copied()); | ||||||
|  |                         } | ||||||
|  |                         WordPair::WordPrefixSwapped { left_prefix: _, right: _, proximity: _ } => {} | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 Ok(set) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -5,10 +5,13 @@ use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; | |||||||
| use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; | use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; | ||||||
| use crate::search::new::logger::SearchLogger; | use crate::search::new::logger::SearchLogger; | ||||||
| use crate::search::new::query_graph::QueryNodeData; | use crate::search::new::query_graph::QueryNodeData; | ||||||
| use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; | use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm}; | ||||||
| use crate::search::new::small_bitmap::SmallBitmap; | use crate::search::new::small_bitmap::SmallBitmap; | ||||||
| use crate::search::new::{QueryGraph, QueryNode, SearchContext}; | use crate::search::new::{QueryGraph, QueryNode, SearchContext}; | ||||||
| use crate::Result; | use crate::Result; | ||||||
|  | use std::collections::HashSet; | ||||||
|  | use std::fmt::Write; | ||||||
|  | use std::iter::FromIterator; | ||||||
|  |  | ||||||
| #[derive(Clone, PartialEq, Eq, Hash)] | #[derive(Clone, PartialEq, Eq, Hash)] | ||||||
| pub struct TypoEdge { | pub struct TypoEdge { | ||||||
| @@ -21,10 +24,6 @@ pub enum TypoGraph {} | |||||||
| impl RankingRuleGraphTrait for TypoGraph { | impl RankingRuleGraphTrait for TypoGraph { | ||||||
|     type EdgeCondition = TypoEdge; |     type EdgeCondition = TypoEdge; | ||||||
|  |  | ||||||
|     fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String { |  | ||||||
|         format!(", {} typos", edge.nbr_typos) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn resolve_edge_condition<'db_cache, 'ctx>( |     fn resolve_edge_condition<'db_cache, 'ctx>( | ||||||
|         ctx: &mut SearchContext<'ctx>, |         ctx: &mut SearchContext<'ctx>, | ||||||
|         edge: &Self::EdgeCondition, |         edge: &Self::EdgeCondition, | ||||||
| @@ -147,4 +146,78 @@ impl RankingRuleGraphTrait for TypoGraph { | |||||||
|     ) { |     ) { | ||||||
|         logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost); |         logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn label_for_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<String> { | ||||||
|  |         let TypoEdge { term, nbr_typos: _ } = edge; | ||||||
|  |         let term = ctx.term_interner.get(*term); | ||||||
|  |         let QueryTerm { | ||||||
|  |             original: _, | ||||||
|  |             is_ngram: _, | ||||||
|  |             is_prefix: _, | ||||||
|  |             phrase, | ||||||
|  |             zero_typo, | ||||||
|  |             prefix_of, | ||||||
|  |             synonyms, | ||||||
|  |             split_words, | ||||||
|  |             one_typo, | ||||||
|  |             two_typos, | ||||||
|  |             use_prefix_db, | ||||||
|  |         } = term; | ||||||
|  |         let mut s = String::new(); | ||||||
|  |         if let Some(phrase) = phrase { | ||||||
|  |             let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner); | ||||||
|  |             writeln!(&mut s, "\"{phrase}\" : phrase").unwrap(); | ||||||
|  |         } | ||||||
|  |         if let Some(w) = zero_typo { | ||||||
|  |             let w = ctx.word_interner.get(*w); | ||||||
|  |             writeln!(&mut s, "\"{w}\" : 0 typo").unwrap(); | ||||||
|  |         } | ||||||
|  |         for w in prefix_of.iter() { | ||||||
|  |             let w = ctx.word_interner.get(*w); | ||||||
|  |             writeln!(&mut s, "\"{w}\" : prefix").unwrap(); | ||||||
|  |         } | ||||||
|  |         for w in one_typo.iter() { | ||||||
|  |             let w = ctx.word_interner.get(*w); | ||||||
|  |             writeln!(&mut s, "\"{w}\" : 1 typo").unwrap(); | ||||||
|  |         } | ||||||
|  |         for w in two_typos.iter() { | ||||||
|  |             let w = ctx.word_interner.get(*w); | ||||||
|  |             writeln!(&mut s, "\"{w}\" : 2 typos").unwrap(); | ||||||
|  |         } | ||||||
|  |         if let Some(phrase) = split_words { | ||||||
|  |             let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner); | ||||||
|  |             writeln!(&mut s, "\"{phrase}\" : split words").unwrap(); | ||||||
|  |         } | ||||||
|  |         for phrase in synonyms.iter() { | ||||||
|  |             let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner); | ||||||
|  |             writeln!(&mut s, "\"{phrase}\" : synonym").unwrap(); | ||||||
|  |         } | ||||||
|  |         if let Some(w) = use_prefix_db { | ||||||
|  |             let w = ctx.word_interner.get(*w); | ||||||
|  |             writeln!(&mut s, "\"{w}\" : use prefix db").unwrap(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(s) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn words_used_by_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<HashSet<Interned<String>>> { | ||||||
|  |         let TypoEdge { term, .. } = edge; | ||||||
|  |         let term = ctx.term_interner.get(*term); | ||||||
|  |         Ok(HashSet::from_iter(term.all_single_words_except_prefix_db())) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn phrases_used_by_edge_condition<'ctx>( | ||||||
|  |         ctx: &mut SearchContext<'ctx>, | ||||||
|  |         edge: &Self::EdgeCondition, | ||||||
|  |     ) -> Result<HashSet<Interned<Phrase>>> { | ||||||
|  |         let TypoEdge { term, .. } = edge; | ||||||
|  |         let term = ctx.term_interner.get(*term); | ||||||
|  |         Ok(HashSet::from_iter(term.all_phrases())) | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user