mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	WIP
This commit is contained in:
		| @@ -70,7 +70,9 @@ pub mod update; | |||||||
| #[macro_use] | #[macro_use] | ||||||
| pub mod snapshot_tests; | pub mod snapshot_tests; | ||||||
|  |  | ||||||
| pub use search::new::{execute_search, SearchContext}; | pub use search::new::DetailedSearchLogger; | ||||||
|  |  | ||||||
|  | pub use search::new::{execute_search, DefaultSearchLogger, SearchContext}; | ||||||
|  |  | ||||||
| use std::collections::{BTreeMap, HashMap}; | use std::collections::{BTreeMap, HashMap}; | ||||||
| use std::convert::{TryFrom, TryInto}; | use std::convert::{TryFrom, TryInto}; | ||||||
|   | |||||||
| @@ -45,8 +45,8 @@ use super::interner::MappedInterner; | |||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
| use super::query_graph::QueryNode; | use super::query_graph::QueryNode; | ||||||
| use super::ranking_rule_graph::{ | use super::ranking_rule_graph::{ | ||||||
|     ConditionDocIdsCache, DeadEndPathCache, ProximityGraph, RankingRuleGraph, |     ConditionDocIdsCache, DeadEndsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, | ||||||
|     RankingRuleGraphTrait, TypoGraph, |     TypoGraph, | ||||||
| }; | }; | ||||||
| use super::small_bitmap::SmallBitmap; | use super::small_bitmap::SmallBitmap; | ||||||
| use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | ||||||
| @@ -87,7 +87,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> { | |||||||
|     /// Cache to retrieve the docids associated with each edge |     /// Cache to retrieve the docids associated with each edge | ||||||
|     conditions_cache: ConditionDocIdsCache<G>, |     conditions_cache: ConditionDocIdsCache<G>, | ||||||
|     /// Cache used to optimistically discard paths that resolve to no documents. |     /// Cache used to optimistically discard paths that resolve to no documents. | ||||||
|     dead_end_path_cache: DeadEndPathCache<G>, |     dead_end_path_cache: DeadEndsCache<G::Condition>, | ||||||
|     /// A structure giving the list of possible costs from each node to the end node, |     /// A structure giving the list of possible costs from each node to the end node, | ||||||
|     /// along with a set of unavoidable edges that must be traversed to achieve that distance. |     /// along with a set of unavoidable edges that must be traversed to achieve that distance. | ||||||
|     all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>, |     all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>, | ||||||
| @@ -103,7 +103,7 @@ fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>( | |||||||
|     graph: &mut RankingRuleGraph<G>, |     graph: &mut RankingRuleGraph<G>, | ||||||
|     condition_docids_cache: &mut ConditionDocIdsCache<G>, |     condition_docids_cache: &mut ConditionDocIdsCache<G>, | ||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
|     dead_end_path_cache: &mut DeadEndPathCache<G>, |     dead_end_path_cache: &mut DeadEndsCache<G::Condition>, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|     for edge_id in graph.edges_store.indexes() { |     for edge_id in graph.edges_store.indexes() { | ||||||
|         let Some(edge) = graph.edges_store.get(edge_id).as_ref() else { |         let Some(edge) = graph.edges_store.get(edge_id).as_ref() else { | ||||||
| @@ -113,9 +113,9 @@ fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>( | |||||||
|  |  | ||||||
|         let docids = |         let docids = | ||||||
|             condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?; |             condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?; | ||||||
|         if docids.is_disjoint(universe) { |         if docids.is_empty() { | ||||||
|             graph.remove_edges_with_condition(condition); |             graph.remove_edges_with_condition(condition); | ||||||
|             dead_end_path_cache.add_condition(condition); |             dead_end_path_cache.forbid_condition(condition); // add_condition(condition); | ||||||
|             condition_docids_cache.cache.remove(&condition); |             condition_docids_cache.cache.remove(&condition); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
| @@ -135,8 +135,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|         query_graph: &QueryGraph, |         query_graph: &QueryGraph, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; |         let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; | ||||||
|         let mut condition_docids_cache = ConditionDocIdsCache::default(); |         let mut condition_docids_cache = ConditionDocIdsCache::new(universe); | ||||||
|         let mut dead_end_path_cache = DeadEndPathCache::new(&graph.conditions_interner); |         let mut dead_end_path_cache = DeadEndsCache::new(&graph.conditions_interner); | ||||||
|  |  | ||||||
|         // First simplify the graph as much as possible, by computing the docids of all the conditions |         // First simplify the graph as much as possible, by computing the docids of all the conditions | ||||||
|         // within the rule's universe and removing the edges that have no associated docids. |         // within the rule's universe and removing the edges that have no associated docids. | ||||||
| @@ -230,62 +230,79 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|             graph.query_graph.root_node, |             graph.query_graph.root_node, | ||||||
|             cost, |             cost, | ||||||
|             all_distances, |             all_distances, | ||||||
|  |             dead_end_path_cache.forbidden.clone(), | ||||||
|  |             |condition, forbidden_conditions| {}, | ||||||
|             dead_end_path_cache, |             dead_end_path_cache, | ||||||
|             |path, graph, dead_end_path_cache| { |             |path, graph, dead_end_path_cache| { | ||||||
|                 // Accumulate the path for logging purposes only |                 // Accumulate the path for logging purposes only | ||||||
|                 paths.push(path.to_vec()); |                 paths.push(path.to_vec()); | ||||||
|  |  | ||||||
|                 let mut path_docids = universe.clone(); |                 let mut path_docids = universe.clone(); | ||||||
|  |  | ||||||
|                 // We store the edges and their docids in vectors in case the path turns out to be |                 // We store the edges and their docids in vectors in case the path turns out to be | ||||||
|                 // empty and we need to figure out why it was empty. |                 // empty and we need to figure out why it was empty. | ||||||
|                 let mut visited_conditions = vec![]; |                 let mut visited_conditions = vec![]; | ||||||
|                 let mut cached_condition_docids = vec![]; |                 let mut cached_condition_docids = vec![]; | ||||||
|                 // graph.conditions_interner.map(|_| RoaringBitmap::new()); |  | ||||||
|  |  | ||||||
|                 for &condition in path { |                 for &latest_condition in path { | ||||||
|                     visited_conditions.push(condition); |                     visited_conditions.push(latest_condition); | ||||||
|  |  | ||||||
|                     let condition_docids = condition_docids_cache |                     let condition_docids = condition_docids_cache.get_condition_docids( | ||||||
|                         .get_condition_docids(ctx, condition, graph, &universe)?; |                         ctx, | ||||||
|  |                         latest_condition, | ||||||
|  |                         graph, | ||||||
|  |                         &universe, | ||||||
|  |                     )?; | ||||||
|  |  | ||||||
|                     cached_condition_docids.push((condition, condition_docids.clone())); // .get_mut(condition) = condition_docids.clone(); |                     cached_condition_docids.push((latest_condition, condition_docids.clone())); | ||||||
|  |  | ||||||
|                     // If the edge is empty, then the path will be empty as well, we update the graph |                     // If the edge is empty, then the path will be empty as well, we update the graph | ||||||
|                     // and caches accordingly and skip to the next candidate path. |                     // and caches accordingly and skip to the next candidate path. | ||||||
|                     if condition_docids.is_disjoint(&universe) { |                     if condition_docids.is_disjoint(&universe) { | ||||||
|                         // 1. Store in the cache that this edge is empty for this universe |                         // 1. Store in the cache that this edge is empty for this universe | ||||||
|                         dead_end_path_cache.add_condition(condition); |                         dead_end_path_cache.forbid_condition(latest_condition); | ||||||
|                         // 2. remove this edge from the ranking rule graph |                         // 2. remove all the edges with this condition from the ranking rule graph | ||||||
|                         // ouch, no! :( need to link a condition to one or more ranking rule edges |                         graph.remove_edges_with_condition(latest_condition); | ||||||
|                         graph.remove_edges_with_condition(condition); |  | ||||||
|                         // 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore |                         // 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore | ||||||
|                         condition_docids_cache.cache.remove(&condition); |                         condition_docids_cache.cache.remove(&latest_condition); | ||||||
|                         return Ok(ControlFlow::Continue(())); |                         return Ok(ControlFlow::Continue(())); | ||||||
|                     } |                     } | ||||||
|                     path_docids &= condition_docids; |  | ||||||
|  |  | ||||||
|                     // If the (sub)path is empty, we try to figure out why and update the caches accordingly. |                     // If the (sub)path is empty, we try to figure out why and update the caches accordingly. | ||||||
|                     if path_docids.is_disjoint(&universe) { |                     if path_docids.is_disjoint(condition_docids) { | ||||||
|                         // First, we know that this path is empty, and thus any path |                         // First, we know that this path is empty, and thus any path | ||||||
|                         // that is a superset of it will also be empty. |                         // that is a superset of it will also be empty. | ||||||
|                         dead_end_path_cache.add_prefix(&visited_conditions); |                         dead_end_path_cache.forbid_condition_after_prefix( | ||||||
|  |                             visited_conditions[..visited_conditions.len() - 1].iter().copied(), | ||||||
|  |                             latest_condition, | ||||||
|  |                         ); | ||||||
|  |  | ||||||
|  |                         let mut dead_end_cache_cursor = dead_end_path_cache; | ||||||
|  |  | ||||||
|                         // Second, if the intersection between this edge and any |                         // Second, if the intersection between this edge and any | ||||||
|                         // previous one is disjoint with the universe, |                         // previous prefix is disjoint with the universe, then... TODO | ||||||
|                         // then we also know that any path containing the same couple of |                         for (past_condition, past_condition_docids) in | ||||||
|                         // edges will also be empty. |                             cached_condition_docids.iter() | ||||||
|                         for (past_condition, condition_docids2) in cached_condition_docids.iter() { |                         { | ||||||
|                             if *past_condition == condition { |                             // TODO: should ensure that it is simply not possible to have twice | ||||||
|  |                             // the same condition in the cached_condition_docids. Maybe it is | ||||||
|  |                             // already the case? | ||||||
|  |                             dead_end_cache_cursor = | ||||||
|  |                                 dead_end_cache_cursor.advance(*past_condition).unwrap(); | ||||||
|  |                             // TODO: check how that interacts with the dead end cache? | ||||||
|  |                             if *past_condition == latest_condition { | ||||||
|  |                                 // TODO: should we break instead? | ||||||
|  |                                 // Is it even possible? | ||||||
|                                 continue; |                                 continue; | ||||||
|                             }; |                             }; | ||||||
|                             let intersection = condition_docids & condition_docids2; |                             if condition_docids.is_disjoint(past_condition_docids) { | ||||||
|                             if intersection.is_disjoint(&universe) { |                                 dead_end_cache_cursor.forbid_condition(latest_condition); | ||||||
|                                 dead_end_path_cache |  | ||||||
|                                     .add_condition_couple(*past_condition, condition); |  | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|                         // We should maybe instead try to compute: |                         // We should maybe instead try to compute: | ||||||
|                         // 0th & nth & 1st & n-1th & 2nd & etc... |                         // 0th & nth & 1st & n-1th & 2nd & etc... | ||||||
|                         return Ok(ControlFlow::Continue(())); |                         return Ok(ControlFlow::Continue(())); | ||||||
|  |                     } else { | ||||||
|  |                         path_docids &= condition_docids; | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 assert!(!path_docids.is_empty()); |                 assert!(!path_docids.is_empty()); | ||||||
| @@ -303,7 +320,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | |||||||
|                 } |                 } | ||||||
|             }, |             }, | ||||||
|         )?; |         )?; | ||||||
|  |         // println!("  {} paths of cost {} in {}", paths.len(), cost, self.id); | ||||||
|         G::log_state( |         G::log_state( | ||||||
|             &original_graph, |             &original_graph, | ||||||
|             &paths, |             &paths, | ||||||
|   | |||||||
| @@ -152,7 +152,7 @@ impl<T> Hash for Interned<T> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<T: Ord> Ord for Interned<T> { | impl<T> Ord for Interned<T> { | ||||||
|     fn cmp(&self, other: &Self) -> std::cmp::Ordering { |     fn cmp(&self, other: &Self) -> std::cmp::Ordering { | ||||||
|         self.idx.cmp(&other.idx) |         self.idx.cmp(&other.idx) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ use std::io::Write; | |||||||
| use std::path::PathBuf; | use std::path::PathBuf; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
|  |  | ||||||
| use rand::random; | // use rand::random; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::search::new::interner::{Interned, MappedInterner}; | use crate::search::new::interner::{Interned, MappedInterner}; | ||||||
| @@ -323,12 +323,11 @@ impl DetailedSearchLogger { | |||||||
|                     let cur_activated_id = activated_id(×tamp); |                     let cur_activated_id = activated_id(×tamp); | ||||||
|                     let docids = new.iter().collect::<Vec<_>>(); |                     let docids = new.iter().collect::<Vec<_>>(); | ||||||
|                     let len = new.len(); |                     let len = new.len(); | ||||||
|                     let random = random::<u64>(); |  | ||||||
|  |  | ||||||
|                     writeln!( |                     writeln!( | ||||||
|                         &mut file, |                         &mut file, | ||||||
|                         "{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\" |                         "{cur_ranking_rule}.{cur_activated_id} -> results.{cur_ranking_rule}{cur_activated_id} : \"add {len}\" | ||||||
| results.{random} {{ | results.{cur_ranking_rule}{cur_activated_id} {{ | ||||||
|     tooltip: \"{docids:?}\" |     tooltip: \"{docids:?}\" | ||||||
|     style {{ |     style {{ | ||||||
|         fill: \"#B6E2D3\" |         fill: \"#B6E2D3\" | ||||||
| @@ -572,17 +571,17 @@ shape: class" | |||||||
|         Self::paths_d2_description(ctx, graph, paths, file); |         Self::paths_d2_description(ctx, graph, paths, file); | ||||||
|         writeln!(file, "}}").unwrap(); |         writeln!(file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         writeln!(file, "Dead-end couples of conditions {{").unwrap(); |         // writeln!(file, "Dead-end couples of conditions {{").unwrap(); | ||||||
|         for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() { |         // for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() { | ||||||
|             writeln!(file, "{i} : \"\" {{").unwrap(); |         //     writeln!(file, "{i} : \"\" {{").unwrap(); | ||||||
|             Self::condition_d2_description(ctx, graph, e1, file); |         //     Self::condition_d2_description(ctx, graph, e1, file); | ||||||
|             for e2 in e2.iter() { |         //     for e2 in e2.iter() { | ||||||
|                 Self::condition_d2_description(ctx, graph, e2, file); |         //         Self::condition_d2_description(ctx, graph, e2, file); | ||||||
|                 writeln!(file, "{e1} -- {e2}").unwrap(); |         //         writeln!(file, "{e1} -- {e2}").unwrap(); | ||||||
|             } |         //     } | ||||||
|             writeln!(file, "}}").unwrap(); |         //     writeln!(file, "}}").unwrap(); | ||||||
|         } |         // } | ||||||
|         writeln!(file, "}}").unwrap(); |         // writeln!(file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         writeln!(file, "Dead-end edges {{").unwrap(); |         writeln!(file, "Dead-end edges {{").unwrap(); | ||||||
|         for condition in dead_end_paths_cache.conditions.iter() { |         for condition in dead_end_paths_cache.conditions.iter() { | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| #[cfg(test)] | // #[cfg(test)] | ||||||
| pub mod detailed; | pub mod detailed; | ||||||
|  |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| @@ -6,8 +6,7 @@ use roaring::RoaringBitmap; | |||||||
| use super::interner::{Interned, MappedInterner}; | use super::interner::{Interned, MappedInterner}; | ||||||
| use super::query_graph::QueryNode; | use super::query_graph::QueryNode; | ||||||
| use super::ranking_rule_graph::{ | use super::ranking_rule_graph::{ | ||||||
|     DeadEndPathCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoCondition, |     DeadEndsCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoCondition, TypoGraph, | ||||||
|     TypoGraph, |  | ||||||
| }; | }; | ||||||
| use super::small_bitmap::SmallBitmap; | use super::small_bitmap::SmallBitmap; | ||||||
| use super::{RankingRule, RankingRuleQueryTrait}; | use super::{RankingRule, RankingRuleQueryTrait}; | ||||||
| @@ -67,7 +66,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> { | |||||||
|         &mut self, |         &mut self, | ||||||
|         query_graph: &RankingRuleGraph<ProximityGraph>, |         query_graph: &RankingRuleGraph<ProximityGraph>, | ||||||
|         paths: &[Vec<Interned<ProximityCondition>>], |         paths: &[Vec<Interned<ProximityCondition>>], | ||||||
|         dead_end_path_cache: &DeadEndPathCache<ProximityGraph>, |         dead_end_path_cache: &DeadEndsCache<ProximityCondition>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, |         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||||
|         cost: u16, |         cost: u16, | ||||||
| @@ -78,7 +77,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> { | |||||||
|         &mut self, |         &mut self, | ||||||
|         query_graph: &RankingRuleGraph<TypoGraph>, |         query_graph: &RankingRuleGraph<TypoGraph>, | ||||||
|         paths: &[Vec<Interned<TypoCondition>>], |         paths: &[Vec<Interned<TypoCondition>>], | ||||||
|         dead_end_path_cache: &DeadEndPathCache<TypoGraph>, |         dead_end_path_cache: &DeadEndsCache<TypoCondition>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, |         distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, | ||||||
|         cost: u16, |         cost: u16, | ||||||
| @@ -138,7 +137,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger { | |||||||
|         &mut self, |         &mut self, | ||||||
|         _query_graph: &RankingRuleGraph<ProximityGraph>, |         _query_graph: &RankingRuleGraph<ProximityGraph>, | ||||||
|         _paths_map: &[Vec<Interned<ProximityCondition>>], |         _paths_map: &[Vec<Interned<ProximityCondition>>], | ||||||
|         _dead_end_path_cache: &DeadEndPathCache<ProximityGraph>, |         _dead_end_path_cache: &DeadEndsCache<ProximityCondition>, | ||||||
|         _universe: &RoaringBitmap, |         _universe: &RoaringBitmap, | ||||||
|         _distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, |         _distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||||
|         _cost: u16, |         _cost: u16, | ||||||
| @@ -149,7 +148,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger { | |||||||
|         &mut self, |         &mut self, | ||||||
|         _query_graph: &RankingRuleGraph<TypoGraph>, |         _query_graph: &RankingRuleGraph<TypoGraph>, | ||||||
|         _paths: &[Vec<Interned<TypoCondition>>], |         _paths: &[Vec<Interned<TypoCondition>>], | ||||||
|         _dead_end_path_cache: &DeadEndPathCache<TypoGraph>, |         _dead_end_path_cache: &DeadEndsCache<TypoCondition>, | ||||||
|         _universe: &RoaringBitmap, |         _universe: &RoaringBitmap, | ||||||
|         _distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, |         _distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, | ||||||
|         _cost: u16, |         _cost: u16, | ||||||
|   | |||||||
| @@ -15,26 +15,26 @@ mod sort; | |||||||
| // TODO: documentation + comments | // TODO: documentation + comments | ||||||
| mod words; | mod words; | ||||||
|  |  | ||||||
|  | // #[cfg(test)] | ||||||
|  | pub use logger::detailed::DetailedSearchLogger; | ||||||
| pub use logger::{DefaultSearchLogger, SearchLogger}; | pub use logger::{DefaultSearchLogger, SearchLogger}; | ||||||
|  |  | ||||||
| use std::collections::{BTreeSet, HashSet}; | use std::collections::{BTreeSet, HashSet}; | ||||||
|  |  | ||||||
|  | use crate::{Filter, Index, MatchingWords, Result, Search, SearchResult, TermsMatchingStrategy}; | ||||||
| use charabia::Tokenize; | use charabia::Tokenize; | ||||||
| use db_cache::DatabaseCache; | use db_cache::DatabaseCache; | ||||||
|  | use graph_based_ranking_rule::{Proximity, Typo}; | ||||||
| use heed::RoTxn; | use heed::RoTxn; | ||||||
| use query_graph::{QueryGraph, QueryNode}; | use interner::DedupInterner; | ||||||
| pub use ranking_rules::{bucket_sort, RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; | use query_graph::{QueryGraph, QueryNode, QueryNodeData}; | ||||||
|  | use query_term::{located_query_terms_from_string, Phrase, QueryTerm}; | ||||||
|  | use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait}; | ||||||
|  | use resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  | use words::Words; | ||||||
|  |  | ||||||
| use self::interner::DedupInterner; | use self::ranking_rules::RankingRule; | ||||||
| use self::query_graph::QueryNodeData; |  | ||||||
| use self::query_term::{Phrase, QueryTerm}; |  | ||||||
| use self::ranking_rules::PlaceholderQuery; |  | ||||||
| use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache}; |  | ||||||
| use crate::search::new::graph_based_ranking_rule::{Proximity, Typo}; |  | ||||||
| use crate::search::new::query_term::located_query_terms_from_string; |  | ||||||
| use crate::search::new::words::Words; |  | ||||||
| use crate::{Filter, Index, Result, TermsMatchingStrategy}; |  | ||||||
|  |  | ||||||
| /// A structure used throughout the execution of a search query. | /// A structure used throughout the execution of a search query. | ||||||
| pub struct SearchContext<'ctx> { | pub struct SearchContext<'ctx> { | ||||||
| @@ -231,12 +231,12 @@ pub fn execute_search<'ctx>( | |||||||
|     length: usize, |     length: usize, | ||||||
|     placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, |     placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, | ||||||
|     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, |     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
| ) -> Result<Vec<u32>> { | ) -> Result<SearchResult> { | ||||||
|     assert!(!query.is_empty()); |     assert!(!query.is_empty()); | ||||||
|     let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?; |     let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?; | ||||||
|     let graph = QueryGraph::from_query(ctx, query_terms)?; |     let graph = QueryGraph::from_query(ctx, query_terms)?; | ||||||
|  |  | ||||||
|     let universe = if let Some(filters) = filters { |     let mut universe = if let Some(filters) = filters { | ||||||
|         filters.evaluate(ctx.txn, ctx.index)? |         filters.evaluate(ctx.txn, ctx.index)? | ||||||
|     } else { |     } else { | ||||||
|         ctx.index.documents_ids(ctx.txn)? |         ctx.index.documents_ids(ctx.txn)? | ||||||
| @@ -249,8 +249,8 @@ pub fn execute_search<'ctx>( | |||||||
|     // But in that case, we should return no results. |     // But in that case, we should return no results. | ||||||
|     // |     // | ||||||
|     // The search is a placeholder search only if there are no tokens? |     // The search is a placeholder search only if there are no tokens? | ||||||
|     if graph.nodes.len() > 2 { |     let documents_ids = if graph.nodes.len() > 2 { | ||||||
|         let universe = resolve_maximally_reduced_query_graph( |         universe = resolve_maximally_reduced_query_graph( | ||||||
|             ctx, |             ctx, | ||||||
|             &universe, |             &universe, | ||||||
|             &graph, |             &graph, | ||||||
| @@ -259,7 +259,7 @@ pub fn execute_search<'ctx>( | |||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|         let ranking_rules = get_ranking_rules_for_query_graph_search(ctx, terms_matching_strategy)?; |         let ranking_rules = get_ranking_rules_for_query_graph_search(ctx, terms_matching_strategy)?; | ||||||
|         bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger) |         bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)? | ||||||
|     } else { |     } else { | ||||||
|         let ranking_rules = get_ranking_rules_for_placeholder_search(ctx)?; |         let ranking_rules = get_ranking_rules_for_placeholder_search(ctx)?; | ||||||
|         bucket_sort( |         bucket_sort( | ||||||
| @@ -270,7 +270,22 @@ pub fn execute_search<'ctx>( | |||||||
|             from, |             from, | ||||||
|             length, |             length, | ||||||
|             placeholder_search_logger, |             placeholder_search_logger, | ||||||
|         ) |         )? | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     Ok(SearchResult { | ||||||
|  |         // TODO: correct matching words | ||||||
|  |         matching_words: MatchingWords::default(), | ||||||
|  |         // TODO: candidates with distinct | ||||||
|  |         candidates: universe, | ||||||
|  |         documents_ids, | ||||||
|  |     }) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> Search<'a> { | ||||||
|  |     // TODO | ||||||
|  |     pub fn execute_new(&self) -> Result<SearchResult> { | ||||||
|  |         todo!() | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -329,7 +344,7 @@ mod tests { | |||||||
|             println!("{}us", elapsed.as_micros()); |             println!("{}us", elapsed.as_micros()); | ||||||
|  |  | ||||||
|             let _documents = index |             let _documents = index | ||||||
|                 .documents(&txn, results.iter().copied()) |                 .documents(&txn, results.documents_ids.iter().copied()) | ||||||
|                 .unwrap() |                 .unwrap() | ||||||
|                 .into_iter() |                 .into_iter() | ||||||
|                 .map(|(id, obkv)| { |                 .map(|(id, obkv)| { | ||||||
|   | |||||||
| @@ -4,8 +4,7 @@ use std::collections::btree_map::Entry; | |||||||
| use std::collections::{BTreeMap, VecDeque}; | use std::collections::{BTreeMap, VecDeque}; | ||||||
| use std::ops::ControlFlow; | use std::ops::ControlFlow; | ||||||
|  |  | ||||||
| use super::dead_end_path_cache::DeadEndPathCache; | use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait}; | ||||||
| use super::{RankingRuleGraph, RankingRuleGraphTrait}; |  | ||||||
| use crate::search::new::interner::{Interned, MappedInterner}; | use crate::search::new::interner::{Interned, MappedInterner}; | ||||||
| use crate::search::new::query_graph::QueryNode; | use crate::search::new::query_graph::QueryNode; | ||||||
| use crate::search::new::small_bitmap::SmallBitmap; | use crate::search::new::small_bitmap::SmallBitmap; | ||||||
| @@ -23,11 +22,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|         from: Interned<QueryNode>, |         from: Interned<QueryNode>, | ||||||
|         cost: u16, |         cost: u16, | ||||||
|         all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>, |         all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>, | ||||||
|         dead_end_path_cache: &mut DeadEndPathCache<G>, |         dead_end_path_cache: &mut DeadEndsCache<G::Condition>, | ||||||
|         mut visit: impl FnMut( |         mut visit: impl FnMut( | ||||||
|             &[Interned<G::Condition>], |             &[Interned<G::Condition>], | ||||||
|             &mut Self, |             &mut Self, | ||||||
|             &mut DeadEndPathCache<G>, |             &mut DeadEndsCache<G::Condition>, | ||||||
|         ) -> Result<ControlFlow<()>>, |         ) -> Result<ControlFlow<()>>, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         let _ = self.visit_paths_of_cost_rec( |         let _ = self.visit_paths_of_cost_rec( | ||||||
| @@ -38,7 +37,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|             &mut visit, |             &mut visit, | ||||||
|             &mut vec![], |             &mut vec![], | ||||||
|             &mut SmallBitmap::for_interned_values_in(&self.conditions_interner), |             &mut SmallBitmap::for_interned_values_in(&self.conditions_interner), | ||||||
|             &mut dead_end_path_cache.conditions.clone(), |             &mut dead_end_path_cache.forbidden.clone(), | ||||||
|         )?; |         )?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| @@ -47,11 +46,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|         from: Interned<QueryNode>, |         from: Interned<QueryNode>, | ||||||
|         cost: u16, |         cost: u16, | ||||||
|         all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>, |         all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>, | ||||||
|         dead_end_path_cache: &mut DeadEndPathCache<G>, |         dead_end_path_cache: &mut DeadEndsCache<G::Condition>, | ||||||
|         visit: &mut impl FnMut( |         visit: &mut impl FnMut( | ||||||
|             &[Interned<G::Condition>], |             &[Interned<G::Condition>], | ||||||
|             &mut Self, |             &mut Self, | ||||||
|             &mut DeadEndPathCache<G>, |             &mut DeadEndsCache<G::Condition>, | ||||||
|         ) -> Result<ControlFlow<()>>, |         ) -> Result<ControlFlow<()>>, | ||||||
|         prev_conditions: &mut Vec<Interned<G::Condition>>, |         prev_conditions: &mut Vec<Interned<G::Condition>>, | ||||||
|         cur_path: &mut SmallBitmap<G::Condition>, |         cur_path: &mut SmallBitmap<G::Condition>, | ||||||
| @@ -74,7 +73,6 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|                             ControlFlow::Continue(_) => {} |                             ControlFlow::Continue(_) => {} | ||||||
|                             ControlFlow::Break(_) => return Ok(true), |                             ControlFlow::Break(_) => return Ok(true), | ||||||
|                         } |                         } | ||||||
|                         true |  | ||||||
|                     } else { |                     } else { | ||||||
|                         self.visit_paths_of_cost_rec( |                         self.visit_paths_of_cost_rec( | ||||||
|                             edge.dest_node, |                             edge.dest_node, | ||||||
| @@ -85,7 +83,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|                             prev_conditions, |                             prev_conditions, | ||||||
|                             cur_path, |                             cur_path, | ||||||
|                             forbidden_conditions, |                             forbidden_conditions, | ||||||
|                         )? |                         )?; | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 Some(condition) => { |                 Some(condition) => { | ||||||
| @@ -101,24 +99,20 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|                     } |                     } | ||||||
|                     cur_path.insert(condition); |                     cur_path.insert(condition); | ||||||
|                     prev_conditions.push(condition); |                     prev_conditions.push(condition); | ||||||
|  |  | ||||||
|                     let mut new_forbidden_conditions = forbidden_conditions.clone(); |                     let mut new_forbidden_conditions = forbidden_conditions.clone(); | ||||||
|                     new_forbidden_conditions |                     if let Some(next_forbidden) = | ||||||
|                         .union(dead_end_path_cache.condition_couples.get(condition)); |                         dead_end_path_cache.forbidden_conditions_after_prefix(&prev_conditions) | ||||||
|                     dead_end_path_cache.prefixes.final_edges_after_prefix( |                     { | ||||||
|                         prev_conditions, |                         new_forbidden_conditions.union(&next_forbidden); | ||||||
|                         &mut |x| { |                     } | ||||||
|                             new_forbidden_conditions.insert(x); |  | ||||||
|                         }, |                     if edge.dest_node == self.query_graph.end_node { | ||||||
|                     ); |  | ||||||
|                     let next_any_valid = if edge.dest_node == self.query_graph.end_node { |  | ||||||
|                         any_valid = true; |                         any_valid = true; | ||||||
|                         let control_flow = visit(prev_conditions, self, dead_end_path_cache)?; |                         let control_flow = visit(prev_conditions, self, dead_end_path_cache)?; | ||||||
|                         match control_flow { |                         match control_flow { | ||||||
|                             ControlFlow::Continue(_) => {} |                             ControlFlow::Continue(_) => {} | ||||||
|                             ControlFlow::Break(_) => return Ok(true), |                             ControlFlow::Break(_) => return Ok(true), | ||||||
|                         } |                         } | ||||||
|                         true |  | ||||||
|                     } else { |                     } else { | ||||||
|                         self.visit_paths_of_cost_rec( |                         self.visit_paths_of_cost_rec( | ||||||
|                             edge.dest_node, |                             edge.dest_node, | ||||||
| @@ -129,28 +123,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|                             prev_conditions, |                             prev_conditions, | ||||||
|                             cur_path, |                             cur_path, | ||||||
|                             &mut new_forbidden_conditions, |                             &mut new_forbidden_conditions, | ||||||
|                         )? |                         )?; | ||||||
|                     }; |                     } | ||||||
|                     cur_path.remove(condition); |                     cur_path.remove(condition); | ||||||
|                     prev_conditions.pop(); |                     prev_conditions.pop(); | ||||||
|                     next_any_valid |  | ||||||
|                 } |                 } | ||||||
|             }; |             }; | ||||||
|             any_valid |= next_any_valid; |  | ||||||
|  |  | ||||||
|             if next_any_valid { |  | ||||||
|                 if dead_end_path_cache.path_is_dead_end(prev_conditions, cur_path) { |  | ||||||
|                     return Ok(any_valid); |  | ||||||
|                 } |  | ||||||
|                 forbidden_conditions.union(&dead_end_path_cache.conditions); |  | ||||||
|                 for prev_condition in prev_conditions.iter() { |  | ||||||
|                     forbidden_conditions |  | ||||||
|                         .union(dead_end_path_cache.condition_couples.get(*prev_condition)); |  | ||||||
|                 } |  | ||||||
|                 dead_end_path_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| { |  | ||||||
|                     forbidden_conditions.insert(x); |  | ||||||
|                 }); |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         Ok(any_valid) |         Ok(any_valid) | ||||||
|   | |||||||
| @@ -12,11 +12,16 @@ use crate::Result; | |||||||
| pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> { | pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> { | ||||||
|     // TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap> |     // TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap> | ||||||
|     pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>, |     pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>, | ||||||
|  |     pub universe_length: u64, | ||||||
|     _phantom: PhantomData<G>, |     _phantom: PhantomData<G>, | ||||||
| } | } | ||||||
| impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> { | impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { | ||||||
|     fn default() -> Self { |     pub fn new(universe: &RoaringBitmap) -> Self { | ||||||
|         Self { cache: Default::default(), _phantom: Default::default() } |         Self { | ||||||
|  |             cache: Default::default(), | ||||||
|  |             _phantom: Default::default(), | ||||||
|  |             universe_length: universe.len(), | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { | impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { | ||||||
| @@ -33,6 +38,9 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { | |||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|     ) -> Result<&'s RoaringBitmap> { |     ) -> Result<&'s RoaringBitmap> { | ||||||
|         if self.cache.contains_key(&interned_condition) { |         if self.cache.contains_key(&interned_condition) { | ||||||
|  |             // TODO compare length of universe compared to the one in self | ||||||
|  |             // if it is smaller, then update the value | ||||||
|  |  | ||||||
|             // TODO: should we update the bitmap in the cache if the new universe |             // TODO: should we update the bitmap in the cache if the new universe | ||||||
|             // reduces it? |             // reduces it? | ||||||
|             // TODO: maybe have a generation: u32 to track every time the universe was |             // TODO: maybe have a generation: u32 to track every time the universe was | ||||||
|   | |||||||
| @@ -1,84 +1,83 @@ | |||||||
| use super::{path_set::PathSet, RankingRuleGraphTrait}; | // use super::{path_set::PathSet, RankingRuleGraphTrait}; | ||||||
| use crate::search::new::{ | // use crate::search::new::{ | ||||||
|     interner::{FixedSizeInterner, Interned, MappedInterner}, | //     interner::{FixedSizeInterner, Interned, MappedInterner}, | ||||||
|     small_bitmap::SmallBitmap, | //     small_bitmap::SmallBitmap, | ||||||
| }; | // }; | ||||||
|  |  | ||||||
| /// A cache which stores sufficient conditions for a path | // /// A cache which stores sufficient conditions for a path | ||||||
| /// to resolve to an empty set of candidates within the current | // /// to resolve to an empty set of candidates within the current | ||||||
| /// universe. | // /// universe. | ||||||
| pub struct DeadEndPathCache<G: RankingRuleGraphTrait> { | // pub struct DeadEndPathCache<G: RankingRuleGraphTrait> { | ||||||
|     /// The set of edge conditions that resolve to no documents. | //     /// The set of edge conditions that resolve to no documents. | ||||||
|     pub conditions: SmallBitmap<G::Condition>, | //     pub conditions: SmallBitmap<G::Condition>, | ||||||
|     /// A set of path prefixes that resolve to no documents. | //     /// A set of path prefixes that resolve to no documents. | ||||||
|     pub prefixes: PathSet<G::Condition>, | //     pub prefixes: PathSet<G::Condition>, | ||||||
|     /// A set of empty couples of edge conditions that resolve to no documents. | //     /// A set of empty couples of edge conditions that resolve to no documents. | ||||||
|     pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>, | //     pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>, | ||||||
| } | // } | ||||||
| impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> { | // impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> { | ||||||
|     fn clone(&self) -> Self { | //     fn clone(&self) -> Self { | ||||||
|         Self { | //         Self { | ||||||
|             conditions: self.conditions.clone(), | //             conditions: self.conditions.clone(), | ||||||
|             prefixes: self.prefixes.clone(), | //             prefixes: self.prefixes.clone(), | ||||||
|             condition_couples: self.condition_couples.clone(), | //             condition_couples: self.condition_couples.clone(), | ||||||
|         } | //         } | ||||||
|     } | //     } | ||||||
| } | // } | ||||||
|  |  | ||||||
| impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> { | // impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> { | ||||||
|     /// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges. | //     /// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges. | ||||||
|     pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self { | //     pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self { | ||||||
|         Self { | //         Self { | ||||||
|             conditions: SmallBitmap::for_interned_values_in(all_conditions), | //             conditions: SmallBitmap::for_interned_values_in(all_conditions), | ||||||
|             prefixes: PathSet::default(), | //             prefixes: PathSet::default(), | ||||||
|             condition_couples: all_conditions | //             condition_couples: all_conditions | ||||||
|                 .map(|_| SmallBitmap::for_interned_values_in(all_conditions)), | //                 .map(|_| SmallBitmap::for_interned_values_in(all_conditions)), | ||||||
|         } | //         } | ||||||
|     } | //     } | ||||||
|  |  | ||||||
|     /// Store in the cache that every path containing the given edge resolves to no documents. | //     /// Store in the cache that every path containing the given edge resolves to no documents. | ||||||
|     pub fn add_condition(&mut self, condition: Interned<G::Condition>) { | //     pub fn add_condition(&mut self, condition: Interned<G::Condition>) { | ||||||
|         self.conditions.insert(condition); | //         self.conditions.insert(condition); | ||||||
|         self.condition_couples.get_mut(condition).clear(); | //         self.condition_couples.get_mut(condition).clear(); | ||||||
|         self.prefixes.remove_edge(condition); | //         self.prefixes.remove_edge(condition); | ||||||
|         for (_, edges2) in self.condition_couples.iter_mut() { | //         for (_, edges2) in self.condition_couples.iter_mut() { | ||||||
|             edges2.remove(condition); | //             edges2.remove(condition); | ||||||
|         } | //         } | ||||||
|     } | //     } | ||||||
|     /// Store in the cache that every path containing the given prefix resolves to no documents. | //     /// Store in the cache that every path containing the given prefix resolves to no documents. | ||||||
|     pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) { | //     pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) { | ||||||
|         // TODO: typed PathSet | //         // TODO: typed PathSet | ||||||
|         self.prefixes.insert(prefix.iter().copied()); | //         self.prefixes.insert(prefix.iter().copied()); | ||||||
|     } | //     } | ||||||
|  |  | ||||||
|     /// Store in the cache that every path containing the two given edges resolves to no documents. | //     /// Store in the cache that every path containing the two given edges resolves to no documents. | ||||||
|     pub fn add_condition_couple( | //     pub fn add_condition_couple( | ||||||
|         &mut self, | //         &mut self, | ||||||
|         edge1: Interned<G::Condition>, | //         edge1: Interned<G::Condition>, | ||||||
|         edge2: Interned<G::Condition>, | //         edge2: Interned<G::Condition>, | ||||||
|     ) { | //     ) { | ||||||
|         self.condition_couples.get_mut(edge1).insert(edge2); | //         self.condition_couples.get_mut(edge1).insert(edge2); | ||||||
|     } | //     } | ||||||
|  |  | ||||||
|     /// Returns true if the cache can determine that the given path resolves to no documents. | //     /// Returns true if the cache can determine that the given path resolves to no documents. | ||||||
|     pub fn path_is_dead_end( | //     pub fn path_is_dead_end( | ||||||
|         &self, | //         &self, | ||||||
|         path: &[Interned<G::Condition>], | //         path: &[Interned<G::Condition>], | ||||||
|         path_bitmap: &SmallBitmap<G::Condition>, | //         path_bitmap: &SmallBitmap<G::Condition>, | ||||||
|     ) -> bool { | //     ) -> bool { | ||||||
|         if path_bitmap.intersects(&self.conditions) { | //         if path_bitmap.intersects(&self.conditions) { | ||||||
|             return true; | //             return true; | ||||||
|         } | //         } | ||||||
|         for condition in path.iter() { | //         for condition in path.iter() { | ||||||
|             // TODO: typed path | //             let forbidden_other_edges = self.condition_couples.get(*condition); | ||||||
|             let forbidden_other_edges = self.condition_couples.get(*condition); | //             if path_bitmap.intersects(forbidden_other_edges) { | ||||||
|             if path_bitmap.intersects(forbidden_other_edges) { | //                 return true; | ||||||
|                 return true; | //             } | ||||||
|             } | //         } | ||||||
|         } | //         if self.prefixes.contains_prefix_of_path(path) { | ||||||
|         if self.prefixes.contains_prefix_of_path(path) { | //             return true; | ||||||
|             return true; | //         } | ||||||
|         } | //         false | ||||||
|         false | //     } | ||||||
|     } | // } | ||||||
| } |  | ||||||
|   | |||||||
| @@ -20,7 +20,8 @@ use std::collections::HashSet; | |||||||
| use std::hash::Hash; | use std::hash::Hash; | ||||||
|  |  | ||||||
| pub use condition_docids_cache::ConditionDocIdsCache; | pub use condition_docids_cache::ConditionDocIdsCache; | ||||||
| pub use dead_end_path_cache::DeadEndPathCache; | // pub use dead_end_path_cache::DeadEndPathCache; | ||||||
|  | pub use path_set::DeadEndsCache; | ||||||
| pub use proximity::{ProximityCondition, ProximityGraph}; | pub use proximity::{ProximityCondition, ProximityGraph}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| pub use typo::{TypoCondition, TypoGraph}; | pub use typo::{TypoCondition, TypoGraph}; | ||||||
| @@ -113,7 +114,7 @@ pub trait RankingRuleGraphTrait: Sized { | |||||||
|     fn log_state( |     fn log_state( | ||||||
|         graph: &RankingRuleGraph<Self>, |         graph: &RankingRuleGraph<Self>, | ||||||
|         paths: &[Vec<Interned<Self::Condition>>], |         paths: &[Vec<Interned<Self::Condition>>], | ||||||
|         dead_end_path_cache: &DeadEndPathCache<Self>, |         dead_end_path_cache: &DeadEndsCache<Self::Condition>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::Condition>)>, QueryNode>, |         distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::Condition>)>, QueryNode>, | ||||||
|         cost: u16, |         cost: u16, | ||||||
|   | |||||||
| @@ -2,104 +2,165 @@ | |||||||
| // For the empty_prefixes field in the EmptyPathsCache only :/ | // For the empty_prefixes field in the EmptyPathsCache only :/ | ||||||
| // but it could be used for more, like efficient computing of a set of paths | // but it could be used for more, like efficient computing of a set of paths | ||||||
|  |  | ||||||
| use crate::search::new::interner::Interned; | use crate::search::new::{ | ||||||
|  |     interner::{FixedSizeInterner, Interned}, | ||||||
|  |     small_bitmap::SmallBitmap, | ||||||
|  | }; | ||||||
|  |  | ||||||
| /// A set of `Vec<Interned<T>>` implemented as a prefix tree. | pub struct DeadEndsCache<T> { | ||||||
| pub struct PathSet<T> { |  | ||||||
|     nodes: Vec<(Interned<T>, Self)>, |     nodes: Vec<(Interned<T>, Self)>, | ||||||
|     is_end: bool, |     pub forbidden: SmallBitmap<T>, | ||||||
| } | } | ||||||
|  | impl<T> DeadEndsCache<T> { | ||||||
| impl<T> Clone for PathSet<T> { |     pub fn new(for_interner: &FixedSizeInterner<T>) -> Self { | ||||||
|     fn clone(&self) -> Self { |         Self { nodes: vec![], forbidden: SmallBitmap::for_interned_values_in(for_interner) } | ||||||
|         Self { nodes: self.nodes.clone(), is_end: self.is_end } |  | ||||||
|     } |     } | ||||||
| } |     pub fn forbid_condition(&mut self, condition: Interned<T>) { | ||||||
|  |         self.forbidden.insert(condition); | ||||||
| impl<T> std::fmt::Debug for PathSet<T> { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish() |  | ||||||
|     } |     } | ||||||
| } |     fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> { | ||||||
|  |         for (e, next_node) in &mut self.nodes { | ||||||
| impl<T> Default for PathSet<T> { |             if condition == *e { | ||||||
|     fn default() -> Self { |                 return Some(next_node); | ||||||
|         Self { nodes: Default::default(), is_end: Default::default() } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<T> PathSet<T> { |  | ||||||
|     pub fn insert(&mut self, mut edges: impl Iterator<Item = Interned<T>>) { |  | ||||||
|         match edges.next() { |  | ||||||
|             None => { |  | ||||||
|                 self.is_end = true; |  | ||||||
|             } |  | ||||||
|             Some(first_edge) => { |  | ||||||
|                 for (edge, next_node) in &mut self.nodes { |  | ||||||
|                     if edge == &first_edge { |  | ||||||
|                         return next_node.insert(edges); |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|                 let mut rest = PathSet::default(); |         None | ||||||
|                 rest.insert(edges); |  | ||||||
|                 self.nodes.push((first_edge, rest)); |  | ||||||
|     } |     } | ||||||
|         } |     pub fn forbidden_conditions_after_prefix( | ||||||
|     } |         &mut self, | ||||||
|  |         mut prefix: &[Interned<T>], | ||||||
|     pub fn remove_edge(&mut self, forbidden_edge: Interned<T>) { |     ) -> Option<SmallBitmap<T>> { | ||||||
|         let mut i = 0; |         let mut cursor = self; | ||||||
|         while i < self.nodes.len() { |         for c in prefix.iter() { | ||||||
|             let should_remove = if self.nodes[i].0 == forbidden_edge { |             if let Some(next) = cursor.advance(*c) { | ||||||
|                 true |                 cursor = next; | ||||||
|             } else if !self.nodes[i].1.nodes.is_empty() { |  | ||||||
|                 self.nodes[i].1.remove_edge(forbidden_edge); |  | ||||||
|                 self.nodes[i].1.nodes.is_empty() |  | ||||||
|             } else { |             } else { | ||||||
|                 false |                 return None; | ||||||
|             }; |  | ||||||
|             if should_remove { |  | ||||||
|                 self.nodes.remove(i); |  | ||||||
|             } else { |  | ||||||
|                 i += 1; |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |         Some(cursor.forbidden.clone()) | ||||||
|     } |     } | ||||||
|  |     pub fn forbid_condition_after_prefix( | ||||||
|     pub fn final_edges_after_prefix( |         &mut self, | ||||||
|         &self, |         mut prefix: impl Iterator<Item = Interned<T>>, | ||||||
|         prefix: &[Interned<T>], |         forbidden: Interned<T>, | ||||||
|         visit: &mut impl FnMut(Interned<T>), |  | ||||||
|     ) { |     ) { | ||||||
|         let [first_edge, remaining_prefix @ ..] = prefix else { |         match prefix.next() { | ||||||
|             for node in self.nodes.iter() { |             None => { | ||||||
|                 if node.1.is_end { |                 self.forbidden.insert(forbidden); | ||||||
|                     visit(node.0) |             } | ||||||
|  |             Some(first_condition) => { | ||||||
|  |                 for (condition, next_node) in &mut self.nodes { | ||||||
|  |                     if condition == &first_condition { | ||||||
|  |                         return next_node.forbid_condition_after_prefix(prefix, forbidden); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|             return |                 let mut rest = DeadEndsCache { | ||||||
|  |                     nodes: vec![], | ||||||
|  |                     forbidden: SmallBitmap::new(self.forbidden.universe_length()), | ||||||
|                 }; |                 }; | ||||||
|         for (edge, rest) in self.nodes.iter() { |                 rest.forbid_condition_after_prefix(prefix, forbidden); | ||||||
|             if edge == first_edge { |                 self.nodes.push((first_condition, rest)); | ||||||
|                 return rest.final_edges_after_prefix(remaining_prefix, visit); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn contains_prefix_of_path(&self, path: &[Interned<T>]) -> bool { |  | ||||||
|         if self.is_end { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         match path { |  | ||||||
|             [] => false, |  | ||||||
|             [first_edge, remaining_path @ ..] => { |  | ||||||
|                 for (edge, rest) in self.nodes.iter() { |  | ||||||
|                     if edge == first_edge { |  | ||||||
|                         return rest.contains_prefix_of_path(remaining_path); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 false |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | // /// A set of `Vec<Interned<T>>` implemented as a prefix tree. | ||||||
|  | // pub struct PathSet<T> { | ||||||
|  | //     nodes: Vec<(Interned<T>, Self)>, | ||||||
|  | //     is_end: bool, | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // impl<T> Clone for PathSet<T> { | ||||||
|  | //     fn clone(&self) -> Self { | ||||||
|  | //         Self { nodes: self.nodes.clone(), is_end: self.is_end } | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // impl<T> std::fmt::Debug for PathSet<T> { | ||||||
|  | //     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  | //         f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish() | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // impl<T> Default for PathSet<T> { | ||||||
|  | //     fn default() -> Self { | ||||||
|  | //         Self { nodes: Default::default(), is_end: Default::default() } | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // impl<T> PathSet<T> { | ||||||
|  | //     pub fn insert(&mut self, mut conditions: impl Iterator<Item = Interned<T>>) { | ||||||
|  | //         match conditions.next() { | ||||||
|  | //             None => { | ||||||
|  | //                 self.is_end = true; | ||||||
|  | //             } | ||||||
|  | //             Some(first_condition) => { | ||||||
|  | //                 for (condition, next_node) in &mut self.nodes { | ||||||
|  | //                     if condition == &first_condition { | ||||||
|  | //                         return next_node.insert(conditions); | ||||||
|  | //                     } | ||||||
|  | //                 } | ||||||
|  | //                 let mut rest = PathSet::default(); | ||||||
|  | //                 rest.insert(conditions); | ||||||
|  | //                 self.nodes.push((first_condition, rest)); | ||||||
|  | //             } | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  |  | ||||||
|  | //     pub fn remove_condition(&mut self, forbidden_condition: Interned<T>) { | ||||||
|  | //         let mut i = 0; | ||||||
|  | //         while i < self.nodes.len() { | ||||||
|  | //             let should_remove = if self.nodes[i].0 == forbidden_condition { | ||||||
|  | //                 true | ||||||
|  | //             } else if !self.nodes[i].1.nodes.is_empty() { | ||||||
|  | //                 self.nodes[i].1.remove_condition(forbidden_condition); | ||||||
|  | //                 self.nodes[i].1.nodes.is_empty() | ||||||
|  | //             } else { | ||||||
|  | //                 false | ||||||
|  | //             }; | ||||||
|  | //             if should_remove { | ||||||
|  | //                 self.nodes.remove(i); | ||||||
|  | //             } else { | ||||||
|  | //                 i += 1; | ||||||
|  | //             } | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  |  | ||||||
|  | //     pub fn final_conditions_after_prefix( | ||||||
|  | //         &self, | ||||||
|  | //         prefix: &[Interned<T>], | ||||||
|  | //         visit: &mut impl FnMut(Interned<T>), | ||||||
|  | //     ) { | ||||||
|  | //         let [first_condition, remaining_prefix @ ..] = prefix else { | ||||||
|  | //             for node in self.nodes.iter() { | ||||||
|  | //                 if node.1.is_end { | ||||||
|  | //                     visit(node.0) | ||||||
|  | //                 } | ||||||
|  | //             } | ||||||
|  | //             return | ||||||
|  | //         }; | ||||||
|  | //         for (condition, rest) in self.nodes.iter() { | ||||||
|  | //             if condition == first_condition { | ||||||
|  | //                 return rest.final_conditions_after_prefix(remaining_prefix, visit); | ||||||
|  | //             } | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  |  | ||||||
|  | //     pub fn contains_prefix_of_path(&self, path: &[Interned<T>]) -> bool { | ||||||
|  | //         if self.is_end { | ||||||
|  | //             return true; | ||||||
|  | //         } | ||||||
|  | //         match path { | ||||||
|  | //             [] => false, | ||||||
|  | //             [first_condition, remaining_path @ ..] => { | ||||||
|  | //                 for (condition, rest) in self.nodes.iter() { | ||||||
|  | //                     if condition == first_condition { | ||||||
|  | //                         return rest.contains_prefix_of_path(remaining_path); | ||||||
|  | //                     } | ||||||
|  | //                 } | ||||||
|  | //                 false | ||||||
|  | //             } | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|   | |||||||
| @@ -6,8 +6,7 @@ use std::iter::FromIterator; | |||||||
|  |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::dead_end_path_cache::DeadEndPathCache; | use super::{RankingRuleGraph, RankingRuleGraphTrait, DeadEndsCache}; | ||||||
| use super::{RankingRuleGraph, RankingRuleGraphTrait}; |  | ||||||
| use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; | use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; | ||||||
| use crate::search::new::logger::SearchLogger; | use crate::search::new::logger::SearchLogger; | ||||||
| use crate::search::new::query_term::{Phrase, QueryTerm}; | use crate::search::new::query_term::{Phrase, QueryTerm}; | ||||||
| @@ -67,7 +66,7 @@ impl RankingRuleGraphTrait for ProximityGraph { | |||||||
|     fn log_state( |     fn log_state( | ||||||
|         graph: &RankingRuleGraph<Self>, |         graph: &RankingRuleGraph<Self>, | ||||||
|         paths: &[Vec<Interned<ProximityCondition>>], |         paths: &[Vec<Interned<ProximityCondition>>], | ||||||
|         dead_end_path_cache: &DeadEndPathCache<Self>, |         dead_end_path_cache: &DeadEndsCache<Self::Condition>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, |         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||||
|         cost: u16, |         cost: u16, | ||||||
|   | |||||||
| @@ -1,7 +1,6 @@ | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::dead_end_path_cache::DeadEndPathCache; | use super::{RankingRuleGraph, RankingRuleGraphTrait, DeadEndsCache}; | ||||||
| use super::{RankingRuleGraph, RankingRuleGraphTrait}; |  | ||||||
| use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; | use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; | ||||||
| use crate::search::new::logger::SearchLogger; | use crate::search::new::logger::SearchLogger; | ||||||
| use crate::search::new::query_graph::QueryNodeData; | use crate::search::new::query_graph::QueryNodeData; | ||||||
| @@ -137,7 +136,7 @@ impl RankingRuleGraphTrait for TypoGraph { | |||||||
|     fn log_state( |     fn log_state( | ||||||
|         graph: &RankingRuleGraph<Self>, |         graph: &RankingRuleGraph<Self>, | ||||||
|         paths: &[Vec<Interned<TypoCondition>>], |         paths: &[Vec<Interned<TypoCondition>>], | ||||||
|         dead_end_path_cache: &DeadEndPathCache<Self>, |         dead_end_path_cache: &DeadEndsCache<TypoCondition>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, |         distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>, | ||||||
|         cost: u16, |         cost: u16, | ||||||
|   | |||||||
| @@ -28,6 +28,12 @@ impl<T> SmallBitmap<T> { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |     pub fn universe_length(&self) -> u16 { | ||||||
|  |         match &self.internal { | ||||||
|  |             SmallBitmapInternal::Tiny(_) => 64, | ||||||
|  |             SmallBitmapInternal::Small(xs) => 64 * xs.len() as u16, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|     pub fn from_iter( |     pub fn from_iter( | ||||||
|         xs: impl Iterator<Item = Interned<T>>, |         xs: impl Iterator<Item = Interned<T>>, | ||||||
|         for_interner: &FixedSizeInterner<T>, |         for_interner: &FixedSizeInterner<T>, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user