mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Rewrite the dead-ends cache to detect more dead-ends
This commit is contained in:
		| @@ -135,7 +135,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | ||||
|         query_graph: &QueryGraph, | ||||
|     ) -> Result<()> { | ||||
|         let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; | ||||
|         let mut condition_docids_cache = ConditionDocIdsCache::new(universe); | ||||
|         let mut condition_docids_cache = ConditionDocIdsCache::default(); | ||||
|         let mut dead_end_path_cache = DeadEndsCache::new(&graph.conditions_interner); | ||||
|  | ||||
|         // First simplify the graph as much as possible, by computing the docids of all the conditions | ||||
| @@ -215,36 +215,36 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | ||||
|  | ||||
|         let original_graph = graph.clone(); | ||||
|         let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner); | ||||
|         let mut paths = vec![]; | ||||
|         let mut considered_paths = vec![]; | ||||
|         let mut good_paths = vec![]; | ||||
|  | ||||
|         // For each path of the given cost, we will compute its associated | ||||
|         // document ids. | ||||
|         // In case the path does not resolve to any document id, we try to figure out why | ||||
|         // and update the `dead_end_path_cache` accordingly. | ||||
|         // For example, it may be that the path is empty because one of its edges is disjoint | ||||
|         // with the universe, or because a prefix of the path is disjoint with the universe, or because | ||||
|         // the path contains two edges that are disjoint from each other within the universe. | ||||
|         // Updating the dead_end_path_cache helps speed up the execution of `visit_paths_of_cost` and reduces | ||||
|         // the number of future candidate paths given by that same function. | ||||
|         graph.visit_paths_of_cost( | ||||
|             graph.query_graph.root_node, | ||||
|             cost, | ||||
|             all_distances, | ||||
|             dead_end_path_cache.forbidden.clone(), | ||||
|             |condition, forbidden_conditions| {}, | ||||
|             dead_end_path_cache, | ||||
|             |path, graph, dead_end_path_cache| { | ||||
|                 if universe.is_empty() { | ||||
|                     return Ok(ControlFlow::Break(())); | ||||
|                 } | ||||
|                 // Accumulate the path for logging purposes only | ||||
|                 paths.push(path.to_vec()); | ||||
|                 considered_paths.push(path.to_vec()); | ||||
|  | ||||
|                 let mut path_docids = universe.clone(); | ||||
|  | ||||
|                 // We store the edges and their docids in vectors in case the path turns out to be | ||||
|                 // empty and we need to figure out why it was empty. | ||||
|                 let mut visited_conditions = vec![]; | ||||
|                 let mut cached_condition_docids = vec![]; | ||||
|                 // let mut cached_condition_docids = vec![]; | ||||
|                 let mut subpath_docids = vec![]; | ||||
|  | ||||
|                 for &latest_condition in path { | ||||
|                 for (latest_condition_path_idx, &latest_condition) in path.iter().enumerate() { | ||||
|                     visited_conditions.push(latest_condition); | ||||
|  | ||||
|                     let condition_docids = condition_docids_cache.get_condition_docids( | ||||
| @@ -254,11 +254,9 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | ||||
|                         &universe, | ||||
|                     )?; | ||||
|  | ||||
|                     cached_condition_docids.push((latest_condition, condition_docids.clone())); | ||||
|  | ||||
|                     // If the edge is empty, then the path will be empty as well, we update the graph | ||||
|                     // and caches accordingly and skip to the next candidate path. | ||||
|                     if condition_docids.is_disjoint(&universe) { | ||||
|                     if condition_docids.is_empty() { | ||||
|                         // 1. Store in the cache that this edge is empty for this universe | ||||
|                         dead_end_path_cache.forbid_condition(latest_condition); | ||||
|                         // 2. remove all the edges with this condition from the ranking rule graph | ||||
| @@ -267,45 +265,71 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | ||||
|                         condition_docids_cache.cache.remove(&latest_condition); | ||||
|                         return Ok(ControlFlow::Continue(())); | ||||
|                     } | ||||
|                     path_docids &= condition_docids; | ||||
|                     subpath_docids.push(path_docids.clone()); | ||||
|  | ||||
|                     // If the (sub)path is empty, we try to figure out why and update the caches accordingly. | ||||
|                     if path_docids.is_disjoint(condition_docids) { | ||||
|                     if path_docids.is_empty() { | ||||
|                         let len_prefix = subpath_docids.len() - 1; | ||||
|                         // First, we know that this path is empty, and thus any path | ||||
|                         // that is a superset of it will also be empty. | ||||
|                         dead_end_path_cache.forbid_condition_after_prefix( | ||||
|                             visited_conditions[..visited_conditions.len() - 1].iter().copied(), | ||||
|                             visited_conditions[..len_prefix].iter().copied(), | ||||
|                             latest_condition, | ||||
|                         ); | ||||
|  | ||||
|                         let mut dead_end_cache_cursor = dead_end_path_cache; | ||||
|                         if visited_conditions.len() > 1 { | ||||
|                             let mut subprefix = vec![]; | ||||
|                             // Deadend if the intersection between this edge and any | ||||
|                             // previous prefix is disjoint with the universe | ||||
|                             for (past_condition, subpath_docids) in visited_conditions[..len_prefix] | ||||
|                                 .iter() | ||||
|                                 .zip(subpath_docids[..len_prefix].iter()) | ||||
|                             { | ||||
|                                 if *past_condition == latest_condition { | ||||
|                                     todo!(); | ||||
|                                 }; | ||||
|                                 subprefix.push(*past_condition); | ||||
|                                 if condition_docids.is_disjoint(subpath_docids) { | ||||
|                                     dead_end_path_cache.forbid_condition_after_prefix( | ||||
|                                         subprefix.iter().copied(), | ||||
|                                         latest_condition, | ||||
|                                     ); | ||||
|                                 } | ||||
|                             } | ||||
|  | ||||
|                         // Second, if the intersection between this edge and any | ||||
|                         // previous prefix is disjoint with the universe, then... TODO | ||||
|                         for (past_condition, past_condition_docids) in | ||||
|                             cached_condition_docids.iter() | ||||
|                         { | ||||
|                             // TODO: should ensure that it is simply not possible to have twice | ||||
|                             // the same condition in the cached_condition_docids. Maybe it is | ||||
|                             // already the case? | ||||
|                             dead_end_cache_cursor = | ||||
|                                 dead_end_cache_cursor.advance(*past_condition).unwrap(); | ||||
|                             // TODO: check how that interacts with the dead end cache? | ||||
|                             if *past_condition == latest_condition { | ||||
|                                 // TODO: should we break instead? | ||||
|                                 // Is it even possible? | ||||
|                                 continue; | ||||
|                             }; | ||||
|                             if condition_docids.is_disjoint(past_condition_docids) { | ||||
|                                 dead_end_cache_cursor.forbid_condition(latest_condition); | ||||
|                             // keep the same prefix and check the intersection with | ||||
|                             // all the remaining conditions | ||||
|                             let mut forbidden = dead_end_path_cache.forbidden.clone(); | ||||
|                             let mut cursor = dead_end_path_cache; | ||||
|                             for &c in visited_conditions[..len_prefix].iter() { | ||||
|                                 cursor = cursor.advance(c).unwrap(); | ||||
|                                 forbidden.union(&cursor.forbidden); | ||||
|                             } | ||||
|  | ||||
|                             let past_path_docids = &subpath_docids[subpath_docids.len() - 2]; | ||||
|  | ||||
|                             let remaining_conditions = | ||||
|                                 path[latest_condition_path_idx..].iter().skip(1); | ||||
|                             for next_condition in remaining_conditions { | ||||
|                                 if forbidden.contains(*next_condition) { | ||||
|                                     continue; | ||||
|                                 } | ||||
|                                 let next_condition_docids = condition_docids_cache | ||||
|                                     .get_condition_docids(ctx, *next_condition, graph, &universe)?; | ||||
|  | ||||
|                                 if past_path_docids.is_disjoint(next_condition_docids) { | ||||
|                                     cursor.forbid_condition(*next_condition); | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                         // We should maybe instead try to compute: | ||||
|                         // 0th & nth & 1st & n-1th & 2nd & etc... | ||||
|  | ||||
|                         return Ok(ControlFlow::Continue(())); | ||||
|                     } else { | ||||
|                         path_docids &= condition_docids; | ||||
|                     } | ||||
|                 } | ||||
|                 assert!(!path_docids.is_empty()); | ||||
|                 // Accumulate the path for logging purposes only | ||||
|                 good_paths.push(path.to_vec()); | ||||
|                 for condition in path { | ||||
|                     used_conditions.insert(*condition); | ||||
|                 } | ||||
| @@ -323,7 +347,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | ||||
|         // println!("  {} paths of cost {} in {}", paths.len(), cost, self.id); | ||||
|         G::log_state( | ||||
|             &original_graph, | ||||
|             &paths, | ||||
|             &good_paths, | ||||
|             dead_end_path_cache, | ||||
|             original_universe, | ||||
|             all_distances, | ||||
|   | ||||
| @@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner}; | ||||
| use crate::search::new::query_graph::QueryNodeData; | ||||
| use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; | ||||
| use crate::search::new::ranking_rule_graph::{ | ||||
|     DeadEndPathCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph, | ||||
|     DeadEndsCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph, | ||||
|     RankingRuleGraphTrait, TypoCondition, TypoGraph, | ||||
| }; | ||||
| use crate::search::new::small_bitmap::SmallBitmap; | ||||
| @@ -44,7 +44,7 @@ pub enum SearchEvents { | ||||
|     ProximityState { | ||||
|         graph: RankingRuleGraph<ProximityGraph>, | ||||
|         paths: Vec<Vec<Interned<ProximityCondition>>>, | ||||
|         dead_end_path_cache: DeadEndPathCache<ProximityGraph>, | ||||
|         dead_end_path_cache: DeadEndsCache<ProximityCondition>, | ||||
|         universe: RoaringBitmap, | ||||
|         distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
| @@ -52,7 +52,7 @@ pub enum SearchEvents { | ||||
|     TypoState { | ||||
|         graph: RankingRuleGraph<TypoGraph>, | ||||
|         paths: Vec<Vec<Interned<TypoCondition>>>, | ||||
|         dead_end_path_cache: DeadEndPathCache<TypoGraph>, | ||||
|         dead_end_path_cache: DeadEndsCache<TypoCondition>, | ||||
|         universe: RoaringBitmap, | ||||
|         distances: MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
| @@ -170,7 +170,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger { | ||||
|         &mut self, | ||||
|         query_graph: &RankingRuleGraph<ProximityGraph>, | ||||
|         paths_map: &[Vec<Interned<ProximityCondition>>], | ||||
|         dead_end_path_cache: &DeadEndPathCache<ProximityGraph>, | ||||
|         dead_end_path_cache: &DeadEndsCache<ProximityCondition>, | ||||
|         universe: &RoaringBitmap, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
| @@ -189,7 +189,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger { | ||||
|         &mut self, | ||||
|         query_graph: &RankingRuleGraph<TypoGraph>, | ||||
|         paths_map: &[Vec<Interned<TypoCondition>>], | ||||
|         dead_end_path_cache: &DeadEndPathCache<TypoGraph>, | ||||
|         dead_end_path_cache: &DeadEndsCache<TypoCondition>, | ||||
|         universe: &RoaringBitmap, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
| @@ -527,7 +527,7 @@ shape: class" | ||||
|         ctx: &mut SearchContext, | ||||
|         graph: &RankingRuleGraph<R>, | ||||
|         paths: &[Vec<Interned<R::Condition>>], | ||||
|         dead_end_paths_cache: &DeadEndPathCache<R>, | ||||
|         dead_end_paths_cache: &DeadEndsCache<R::Condition>, | ||||
|         distances: MappedInterner<Vec<(u16, SmallBitmap<R::Condition>)>, QueryNode>, | ||||
|         file: &mut File, | ||||
|     ) { | ||||
| @@ -583,11 +583,11 @@ shape: class" | ||||
|         // } | ||||
|         // writeln!(file, "}}").unwrap(); | ||||
|  | ||||
|         writeln!(file, "Dead-end edges {{").unwrap(); | ||||
|         for condition in dead_end_paths_cache.conditions.iter() { | ||||
|             writeln!(file, "{condition}").unwrap(); | ||||
|         } | ||||
|         writeln!(file, "}}").unwrap(); | ||||
|         // writeln!(file, "Dead-end edges {{").unwrap(); | ||||
|         // for condition in dead_end_paths_cache.conditions.iter() { | ||||
|         //     writeln!(file, "{condition}").unwrap(); | ||||
|         // } | ||||
|         // writeln!(file, "}}").unwrap(); | ||||
|  | ||||
|         // writeln!(file, "Dead-end prefixes {{").unwrap(); | ||||
|         // writeln!(file, "}}").unwrap(); | ||||
|   | ||||
| @@ -37,7 +37,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|             &mut visit, | ||||
|             &mut vec![], | ||||
|             &mut SmallBitmap::for_interned_values_in(&self.conditions_interner), | ||||
|             &mut dead_end_path_cache.forbidden.clone(), | ||||
|             dead_end_path_cache.forbidden.clone(), | ||||
|         )?; | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -54,12 +54,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|         ) -> Result<ControlFlow<()>>, | ||||
|         prev_conditions: &mut Vec<Interned<G::Condition>>, | ||||
|         cur_path: &mut SmallBitmap<G::Condition>, | ||||
|         forbidden_conditions: &mut SmallBitmap<G::Condition>, | ||||
|         mut forbidden_conditions: SmallBitmap<G::Condition>, | ||||
|     ) -> Result<bool> { | ||||
|         let mut any_valid = false; | ||||
|  | ||||
|         let edges = self.edges_of_node.get(from).clone(); | ||||
|         for edge_idx in edges.iter() { | ||||
|         'edges_loop: for edge_idx in edges.iter() { | ||||
|             let Some(edge) = self.edges_store.get(edge_idx).as_ref() else { continue }; | ||||
|             if cost < edge.cost as u16 { | ||||
|                 continue; | ||||
| @@ -73,6 +73,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                             ControlFlow::Continue(_) => {} | ||||
|                             ControlFlow::Break(_) => return Ok(true), | ||||
|                         } | ||||
|                         true | ||||
|                     } else { | ||||
|                         self.visit_paths_of_cost_rec( | ||||
|                             edge.dest_node, | ||||
| @@ -82,8 +83,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                             visit, | ||||
|                             prev_conditions, | ||||
|                             cur_path, | ||||
|                             forbidden_conditions, | ||||
|                         )?; | ||||
|                             forbidden_conditions.clone(), | ||||
|                         )? | ||||
|                     } | ||||
|                 } | ||||
|                 Some(condition) => { | ||||
| @@ -101,18 +102,19 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                     prev_conditions.push(condition); | ||||
|                     let mut new_forbidden_conditions = forbidden_conditions.clone(); | ||||
|                     if let Some(next_forbidden) = | ||||
|                         dead_end_path_cache.forbidden_conditions_after_prefix(&prev_conditions) | ||||
|                         dead_end_path_cache.forbidden_conditions_after_prefix(prev_conditions) | ||||
|                     { | ||||
|                         new_forbidden_conditions.union(&next_forbidden); | ||||
|                     } | ||||
|  | ||||
|                     if edge.dest_node == self.query_graph.end_node { | ||||
|                     let next_any_valid = if edge.dest_node == self.query_graph.end_node { | ||||
|                         any_valid = true; | ||||
|                         let control_flow = visit(prev_conditions, self, dead_end_path_cache)?; | ||||
|                         match control_flow { | ||||
|                             ControlFlow::Continue(_) => {} | ||||
|                             ControlFlow::Break(_) => return Ok(true), | ||||
|                         } | ||||
|                         true | ||||
|                     } else { | ||||
|                         self.visit_paths_of_cost_rec( | ||||
|                             edge.dest_node, | ||||
| @@ -122,13 +124,23 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                             visit, | ||||
|                             prev_conditions, | ||||
|                             cur_path, | ||||
|                             &mut new_forbidden_conditions, | ||||
|                         )?; | ||||
|                     } | ||||
|                             new_forbidden_conditions, | ||||
|                         )? | ||||
|                     }; | ||||
|                     cur_path.remove(condition); | ||||
|                     prev_conditions.pop(); | ||||
|                     next_any_valid | ||||
|                 } | ||||
|             }; | ||||
|             any_valid |= next_any_valid; | ||||
|  | ||||
|             if next_any_valid { | ||||
|                 forbidden_conditions = dead_end_path_cache | ||||
|                     .forbidden_conditions_for_all_prefixes_up_to(prev_conditions); | ||||
|                 if cur_path.intersects(&forbidden_conditions) { | ||||
|                     break 'edges_loop; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(any_valid) | ||||
|   | ||||
| @@ -8,20 +8,17 @@ use crate::search::new::interner::Interned; | ||||
| use crate::search::new::SearchContext; | ||||
| use crate::Result; | ||||
|  | ||||
| // TODO: give a generation to each universe, then be able to get the exact | ||||
| // delta of docids between two universes of different generations! | ||||
|  | ||||
| /// A cache storing the document ids associated with each ranking rule edge | ||||
| pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> { | ||||
|     // TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap> | ||||
|     pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>, | ||||
|     pub universe_length: u64, | ||||
|     pub cache: FxHashMap<Interned<G::Condition>, (u64, RoaringBitmap)>, | ||||
|     _phantom: PhantomData<G>, | ||||
| } | ||||
| impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { | ||||
|     pub fn new(universe: &RoaringBitmap) -> Self { | ||||
|         Self { | ||||
|             cache: Default::default(), | ||||
|             _phantom: Default::default(), | ||||
|             universe_length: universe.len(), | ||||
|         } | ||||
| impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> { | ||||
|     fn default() -> Self { | ||||
|         Self { cache: Default::default(), _phantom: Default::default() } | ||||
|     } | ||||
| } | ||||
| impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { | ||||
| @@ -40,20 +37,21 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { | ||||
|         if self.cache.contains_key(&interned_condition) { | ||||
|             // TODO compare length of universe compared to the one in self | ||||
|             // if it is smaller, then update the value | ||||
|  | ||||
|             // TODO: should we update the bitmap in the cache if the new universe | ||||
|             // reduces it? | ||||
|             // TODO: maybe have a generation: u32 to track every time the universe was | ||||
|             // reduced. Then only attempt to recompute the intersection when there is a chance | ||||
|             // that condition_docids & universe changed | ||||
|             return Ok(&self.cache[&interned_condition]); | ||||
|             let (universe_len, docids) = self.cache.entry(interned_condition).or_default(); | ||||
|             if *universe_len == universe.len() { | ||||
|                 return Ok(docids); | ||||
|             } else { | ||||
|                 *docids &= universe; | ||||
|                 *universe_len = universe.len(); | ||||
|                 return Ok(docids); | ||||
|             } | ||||
|         } | ||||
|         // TODO: maybe universe doesn't belong here | ||||
|         let condition = graph.conditions_interner.get(interned_condition); | ||||
|         // TODO: faster way to do this? | ||||
|         let docids = universe & G::resolve_condition(ctx, condition, universe)?; | ||||
|         let _ = self.cache.insert(interned_condition, docids); | ||||
|         let docids = &self.cache[&interned_condition]; | ||||
|         let docids = G::resolve_condition(ctx, condition, universe)?; | ||||
|         let _ = self.cache.insert(interned_condition, (universe.len(), docids)); | ||||
|         let (_, docids) = &self.cache[&interned_condition]; | ||||
|         Ok(docids) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,83 +0,0 @@ | ||||
| // use super::{path_set::PathSet, RankingRuleGraphTrait}; | ||||
| // use crate::search::new::{ | ||||
| //     interner::{FixedSizeInterner, Interned, MappedInterner}, | ||||
| //     small_bitmap::SmallBitmap, | ||||
| // }; | ||||
|  | ||||
| // /// A cache which stores sufficient conditions for a path | ||||
| // /// to resolve to an empty set of candidates within the current | ||||
| // /// universe. | ||||
| // pub struct DeadEndPathCache<G: RankingRuleGraphTrait> { | ||||
| //     /// The set of edge conditions that resolve to no documents. | ||||
| //     pub conditions: SmallBitmap<G::Condition>, | ||||
| //     /// A set of path prefixes that resolve to no documents. | ||||
| //     pub prefixes: PathSet<G::Condition>, | ||||
| //     /// A set of empty couples of edge conditions that resolve to no documents. | ||||
| //     pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>, | ||||
| // } | ||||
| // impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> { | ||||
| //     fn clone(&self) -> Self { | ||||
| //         Self { | ||||
| //             conditions: self.conditions.clone(), | ||||
| //             prefixes: self.prefixes.clone(), | ||||
| //             condition_couples: self.condition_couples.clone(), | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> { | ||||
| //     /// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges. | ||||
| //     pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self { | ||||
| //         Self { | ||||
| //             conditions: SmallBitmap::for_interned_values_in(all_conditions), | ||||
| //             prefixes: PathSet::default(), | ||||
| //             condition_couples: all_conditions | ||||
| //                 .map(|_| SmallBitmap::for_interned_values_in(all_conditions)), | ||||
| //         } | ||||
| //     } | ||||
|  | ||||
| //     /// Store in the cache that every path containing the given edge resolves to no documents. | ||||
| //     pub fn add_condition(&mut self, condition: Interned<G::Condition>) { | ||||
| //         self.conditions.insert(condition); | ||||
| //         self.condition_couples.get_mut(condition).clear(); | ||||
| //         self.prefixes.remove_edge(condition); | ||||
| //         for (_, edges2) in self.condition_couples.iter_mut() { | ||||
| //             edges2.remove(condition); | ||||
| //         } | ||||
| //     } | ||||
| //     /// Store in the cache that every path containing the given prefix resolves to no documents. | ||||
| //     pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) { | ||||
| //         // TODO: typed PathSet | ||||
| //         self.prefixes.insert(prefix.iter().copied()); | ||||
| //     } | ||||
|  | ||||
| //     /// Store in the cache that every path containing the two given edges resolves to no documents. | ||||
| //     pub fn add_condition_couple( | ||||
| //         &mut self, | ||||
| //         edge1: Interned<G::Condition>, | ||||
| //         edge2: Interned<G::Condition>, | ||||
| //     ) { | ||||
| //         self.condition_couples.get_mut(edge1).insert(edge2); | ||||
| //     } | ||||
|  | ||||
| //     /// Returns true if the cache can determine that the given path resolves to no documents. | ||||
| //     pub fn path_is_dead_end( | ||||
| //         &self, | ||||
| //         path: &[Interned<G::Condition>], | ||||
| //         path_bitmap: &SmallBitmap<G::Condition>, | ||||
| //     ) -> bool { | ||||
| //         if path_bitmap.intersects(&self.conditions) { | ||||
| //             return true; | ||||
| //         } | ||||
| //         for condition in path.iter() { | ||||
| //             let forbidden_other_edges = self.condition_couples.get(*condition); | ||||
| //             if path_bitmap.intersects(forbidden_other_edges) { | ||||
| //                 return true; | ||||
| //             } | ||||
| //         } | ||||
| //         if self.prefixes.contains_prefix_of_path(path) { | ||||
| //             return true; | ||||
| //         } | ||||
| //         false | ||||
| //     } | ||||
| // } | ||||
| @@ -8,8 +8,7 @@ the same but the edges are replaced. | ||||
| mod build; | ||||
| mod cheapest_paths; | ||||
| mod condition_docids_cache; | ||||
| mod dead_end_path_cache; | ||||
| mod path_set; | ||||
| mod dead_ends_cache; | ||||
|  | ||||
| /// Implementation of the `proximity` ranking rule | ||||
| mod proximity; | ||||
| @@ -20,8 +19,7 @@ use std::collections::HashSet; | ||||
| use std::hash::Hash; | ||||
|  | ||||
| pub use condition_docids_cache::ConditionDocIdsCache; | ||||
| // pub use dead_end_path_cache::DeadEndPathCache; | ||||
| pub use path_set::DeadEndsCache; | ||||
| pub use dead_ends_cache::DeadEndsCache; | ||||
| pub use proximity::{ProximityCondition, ProximityGraph}; | ||||
| use roaring::RoaringBitmap; | ||||
| pub use typo::{TypoCondition, TypoGraph}; | ||||
|   | ||||
| @@ -1,166 +0,0 @@ | ||||
| // What is PathSet used for? | ||||
| // For the empty_prefixes field in the EmptyPathsCache only :/ | ||||
| // but it could be used for more, like efficient computing of a set of paths | ||||
|  | ||||
| use crate::search::new::{ | ||||
|     interner::{FixedSizeInterner, Interned}, | ||||
|     small_bitmap::SmallBitmap, | ||||
| }; | ||||
|  | ||||
| pub struct DeadEndsCache<T> { | ||||
|     nodes: Vec<(Interned<T>, Self)>, | ||||
|     pub forbidden: SmallBitmap<T>, | ||||
| } | ||||
| impl<T> DeadEndsCache<T> { | ||||
|     pub fn new(for_interner: &FixedSizeInterner<T>) -> Self { | ||||
|         Self { nodes: vec![], forbidden: SmallBitmap::for_interned_values_in(for_interner) } | ||||
|     } | ||||
|     pub fn forbid_condition(&mut self, condition: Interned<T>) { | ||||
|         self.forbidden.insert(condition); | ||||
|     } | ||||
|     fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> { | ||||
|         for (e, next_node) in &mut self.nodes { | ||||
|             if condition == *e { | ||||
|                 return Some(next_node); | ||||
|             } | ||||
|         } | ||||
|         None | ||||
|     } | ||||
|     pub fn forbidden_conditions_after_prefix( | ||||
|         &mut self, | ||||
|         mut prefix: &[Interned<T>], | ||||
|     ) -> Option<SmallBitmap<T>> { | ||||
|         let mut cursor = self; | ||||
|         for c in prefix.iter() { | ||||
|             if let Some(next) = cursor.advance(*c) { | ||||
|                 cursor = next; | ||||
|             } else { | ||||
|                 return None; | ||||
|             } | ||||
|         } | ||||
|         Some(cursor.forbidden.clone()) | ||||
|     } | ||||
|     pub fn forbid_condition_after_prefix( | ||||
|         &mut self, | ||||
|         mut prefix: impl Iterator<Item = Interned<T>>, | ||||
|         forbidden: Interned<T>, | ||||
|     ) { | ||||
|         match prefix.next() { | ||||
|             None => { | ||||
|                 self.forbidden.insert(forbidden); | ||||
|             } | ||||
|             Some(first_condition) => { | ||||
|                 for (condition, next_node) in &mut self.nodes { | ||||
|                     if condition == &first_condition { | ||||
|                         return next_node.forbid_condition_after_prefix(prefix, forbidden); | ||||
|                     } | ||||
|                 } | ||||
|                 let mut rest = DeadEndsCache { | ||||
|                     nodes: vec![], | ||||
|                     forbidden: SmallBitmap::new(self.forbidden.universe_length()), | ||||
|                 }; | ||||
|                 rest.forbid_condition_after_prefix(prefix, forbidden); | ||||
|                 self.nodes.push((first_condition, rest)); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| // /// A set of `Vec<Interned<T>>` implemented as a prefix tree. | ||||
| // pub struct PathSet<T> { | ||||
| //     nodes: Vec<(Interned<T>, Self)>, | ||||
| //     is_end: bool, | ||||
| // } | ||||
|  | ||||
| // impl<T> Clone for PathSet<T> { | ||||
| //     fn clone(&self) -> Self { | ||||
| //         Self { nodes: self.nodes.clone(), is_end: self.is_end } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<T> std::fmt::Debug for PathSet<T> { | ||||
| //     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
| //         f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish() | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<T> Default for PathSet<T> { | ||||
| //     fn default() -> Self { | ||||
| //         Self { nodes: Default::default(), is_end: Default::default() } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<T> PathSet<T> { | ||||
| //     pub fn insert(&mut self, mut conditions: impl Iterator<Item = Interned<T>>) { | ||||
| //         match conditions.next() { | ||||
| //             None => { | ||||
| //                 self.is_end = true; | ||||
| //             } | ||||
| //             Some(first_condition) => { | ||||
| //                 for (condition, next_node) in &mut self.nodes { | ||||
| //                     if condition == &first_condition { | ||||
| //                         return next_node.insert(conditions); | ||||
| //                     } | ||||
| //                 } | ||||
| //                 let mut rest = PathSet::default(); | ||||
| //                 rest.insert(conditions); | ||||
| //                 self.nodes.push((first_condition, rest)); | ||||
| //             } | ||||
| //         } | ||||
| //     } | ||||
|  | ||||
| //     pub fn remove_condition(&mut self, forbidden_condition: Interned<T>) { | ||||
| //         let mut i = 0; | ||||
| //         while i < self.nodes.len() { | ||||
| //             let should_remove = if self.nodes[i].0 == forbidden_condition { | ||||
| //                 true | ||||
| //             } else if !self.nodes[i].1.nodes.is_empty() { | ||||
| //                 self.nodes[i].1.remove_condition(forbidden_condition); | ||||
| //                 self.nodes[i].1.nodes.is_empty() | ||||
| //             } else { | ||||
| //                 false | ||||
| //             }; | ||||
| //             if should_remove { | ||||
| //                 self.nodes.remove(i); | ||||
| //             } else { | ||||
| //                 i += 1; | ||||
| //             } | ||||
| //         } | ||||
| //     } | ||||
|  | ||||
| //     pub fn final_conditions_after_prefix( | ||||
| //         &self, | ||||
| //         prefix: &[Interned<T>], | ||||
| //         visit: &mut impl FnMut(Interned<T>), | ||||
| //     ) { | ||||
| //         let [first_condition, remaining_prefix @ ..] = prefix else { | ||||
| //             for node in self.nodes.iter() { | ||||
| //                 if node.1.is_end { | ||||
| //                     visit(node.0) | ||||
| //                 } | ||||
| //             } | ||||
| //             return | ||||
| //         }; | ||||
| //         for (condition, rest) in self.nodes.iter() { | ||||
| //             if condition == first_condition { | ||||
| //                 return rest.final_conditions_after_prefix(remaining_prefix, visit); | ||||
| //             } | ||||
| //         } | ||||
| //     } | ||||
|  | ||||
| //     pub fn contains_prefix_of_path(&self, path: &[Interned<T>]) -> bool { | ||||
| //         if self.is_end { | ||||
| //             return true; | ||||
| //         } | ||||
| //         match path { | ||||
| //             [] => false, | ||||
| //             [first_condition, remaining_path @ ..] => { | ||||
| //                 for (condition, rest) in self.nodes.iter() { | ||||
| //                     if condition == first_condition { | ||||
| //                         return rest.contains_prefix_of_path(remaining_path); | ||||
| //                     } | ||||
| //                 } | ||||
| //                 false | ||||
| //             } | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
		Reference in New Issue
	
	Block a user