Rewrite the dead-ends cache to detect more dead-ends

This commit is contained in:
Loïc Lecrenier
2023-03-19 14:30:19 +01:00
parent 49240c367a
commit c6ff97a220
7 changed files with 116 additions and 333 deletions

View File

@ -37,7 +37,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
&mut visit,
&mut vec![],
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
&mut dead_end_path_cache.forbidden.clone(),
dead_end_path_cache.forbidden.clone(),
)?;
Ok(())
}
@ -54,12 +54,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
) -> Result<ControlFlow<()>>,
prev_conditions: &mut Vec<Interned<G::Condition>>,
cur_path: &mut SmallBitmap<G::Condition>,
forbidden_conditions: &mut SmallBitmap<G::Condition>,
mut forbidden_conditions: SmallBitmap<G::Condition>,
) -> Result<bool> {
let mut any_valid = false;
let edges = self.edges_of_node.get(from).clone();
for edge_idx in edges.iter() {
'edges_loop: for edge_idx in edges.iter() {
let Some(edge) = self.edges_store.get(edge_idx).as_ref() else { continue };
if cost < edge.cost as u16 {
continue;
@ -73,6 +73,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
ControlFlow::Continue(_) => {}
ControlFlow::Break(_) => return Ok(true),
}
true
} else {
self.visit_paths_of_cost_rec(
edge.dest_node,
@ -82,8 +83,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
visit,
prev_conditions,
cur_path,
forbidden_conditions,
)?;
forbidden_conditions.clone(),
)?
}
}
Some(condition) => {
@ -101,18 +102,19 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
prev_conditions.push(condition);
let mut new_forbidden_conditions = forbidden_conditions.clone();
if let Some(next_forbidden) =
dead_end_path_cache.forbidden_conditions_after_prefix(&prev_conditions)
dead_end_path_cache.forbidden_conditions_after_prefix(prev_conditions)
{
new_forbidden_conditions.union(&next_forbidden);
}
if edge.dest_node == self.query_graph.end_node {
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
any_valid = true;
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
match control_flow {
ControlFlow::Continue(_) => {}
ControlFlow::Break(_) => return Ok(true),
}
true
} else {
self.visit_paths_of_cost_rec(
edge.dest_node,
@ -122,13 +124,23 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
visit,
prev_conditions,
cur_path,
&mut new_forbidden_conditions,
)?;
}
new_forbidden_conditions,
)?
};
cur_path.remove(condition);
prev_conditions.pop();
next_any_valid
}
};
any_valid |= next_any_valid;
if next_any_valid {
forbidden_conditions = dead_end_path_cache
.forbidden_conditions_for_all_prefixes_up_to(prev_conditions);
if cur_path.intersects(&forbidden_conditions) {
break 'edges_loop;
}
}
}
Ok(any_valid)

View File

@ -8,20 +8,17 @@ use crate::search::new::interner::Interned;
use crate::search::new::SearchContext;
use crate::Result;
// TODO: give a generation to each universe, then be able to get the exact
// delta of docids between two universes of different generations!
/// A cache storing the document ids associated with each ranking rule edge
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>,
pub universe_length: u64,
pub cache: FxHashMap<Interned<G::Condition>, (u64, RoaringBitmap)>,
_phantom: PhantomData<G>,
}
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
pub fn new(universe: &RoaringBitmap) -> Self {
Self {
cache: Default::default(),
_phantom: Default::default(),
universe_length: universe.len(),
}
impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
fn default() -> Self {
Self { cache: Default::default(), _phantom: Default::default() }
}
}
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
@ -40,20 +37,21 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
if self.cache.contains_key(&interned_condition) {
// TODO compare length of universe compared to the one in self
// if it is smaller, then update the value
// TODO: should we update the bitmap in the cache if the new universe
// reduces it?
// TODO: maybe have a generation: u32 to track every time the universe was
// reduced. Then only attempt to recompute the intersection when there is a chance
// that condition_docids & universe changed
return Ok(&self.cache[&interned_condition]);
let (universe_len, docids) = self.cache.entry(interned_condition).or_default();
if *universe_len == universe.len() {
return Ok(docids);
} else {
*docids &= universe;
*universe_len = universe.len();
return Ok(docids);
}
}
// TODO: maybe universe doesn't belong here
let condition = graph.conditions_interner.get(interned_condition);
// TODO: faster way to do this?
let docids = universe & G::resolve_condition(ctx, condition, universe)?;
let _ = self.cache.insert(interned_condition, docids);
let docids = &self.cache[&interned_condition];
let docids = G::resolve_condition(ctx, condition, universe)?;
let _ = self.cache.insert(interned_condition, (universe.len(), docids));
let (_, docids) = &self.cache[&interned_condition];
Ok(docids)
}
}

View File

@ -1,83 +0,0 @@
// use super::{path_set::PathSet, RankingRuleGraphTrait};
// use crate::search::new::{
// interner::{FixedSizeInterner, Interned, MappedInterner},
// small_bitmap::SmallBitmap,
// };
// /// A cache which stores sufficient conditions for a path
// /// to resolve to an empty set of candidates within the current
// /// universe.
// pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
// /// The set of edge conditions that resolve to no documents.
// pub conditions: SmallBitmap<G::Condition>,
// /// A set of path prefixes that resolve to no documents.
// pub prefixes: PathSet<G::Condition>,
// /// A set of empty couples of edge conditions that resolve to no documents.
// pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>,
// }
// impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
// fn clone(&self) -> Self {
// Self {
// conditions: self.conditions.clone(),
// prefixes: self.prefixes.clone(),
// condition_couples: self.condition_couples.clone(),
// }
// }
// }
// impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
// /// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
// pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self {
// Self {
// conditions: SmallBitmap::for_interned_values_in(all_conditions),
// prefixes: PathSet::default(),
// condition_couples: all_conditions
// .map(|_| SmallBitmap::for_interned_values_in(all_conditions)),
// }
// }
// /// Store in the cache that every path containing the given edge resolves to no documents.
// pub fn add_condition(&mut self, condition: Interned<G::Condition>) {
// self.conditions.insert(condition);
// self.condition_couples.get_mut(condition).clear();
// self.prefixes.remove_edge(condition);
// for (_, edges2) in self.condition_couples.iter_mut() {
// edges2.remove(condition);
// }
// }
// /// Store in the cache that every path containing the given prefix resolves to no documents.
// pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) {
// // TODO: typed PathSet
// self.prefixes.insert(prefix.iter().copied());
// }
// /// Store in the cache that every path containing the two given edges resolves to no documents.
// pub fn add_condition_couple(
// &mut self,
// edge1: Interned<G::Condition>,
// edge2: Interned<G::Condition>,
// ) {
// self.condition_couples.get_mut(edge1).insert(edge2);
// }
// /// Returns true if the cache can determine that the given path resolves to no documents.
// pub fn path_is_dead_end(
// &self,
// path: &[Interned<G::Condition>],
// path_bitmap: &SmallBitmap<G::Condition>,
// ) -> bool {
// if path_bitmap.intersects(&self.conditions) {
// return true;
// }
// for condition in path.iter() {
// let forbidden_other_edges = self.condition_couples.get(*condition);
// if path_bitmap.intersects(forbidden_other_edges) {
// return true;
// }
// }
// if self.prefixes.contains_prefix_of_path(path) {
// return true;
// }
// false
// }
// }

View File

@ -8,8 +8,7 @@ the same but the edges are replaced.
mod build;
mod cheapest_paths;
mod condition_docids_cache;
mod dead_end_path_cache;
mod path_set;
mod dead_ends_cache;
/// Implementation of the `proximity` ranking rule
mod proximity;
@ -20,8 +19,7 @@ use std::collections::HashSet;
use std::hash::Hash;
pub use condition_docids_cache::ConditionDocIdsCache;
// pub use dead_end_path_cache::DeadEndPathCache;
pub use path_set::DeadEndsCache;
pub use dead_ends_cache::DeadEndsCache;
pub use proximity::{ProximityCondition, ProximityGraph};
use roaring::RoaringBitmap;
pub use typo::{TypoCondition, TypoGraph};

View File

@ -1,166 +0,0 @@
// What is PathSet used for?
// For the empty_prefixes field in the EmptyPathsCache only :/
// but it could be used for more, like efficient computing of a set of paths
use crate::search::new::{
interner::{FixedSizeInterner, Interned},
small_bitmap::SmallBitmap,
};
pub struct DeadEndsCache<T> {
nodes: Vec<(Interned<T>, Self)>,
pub forbidden: SmallBitmap<T>,
}
impl<T> DeadEndsCache<T> {
pub fn new(for_interner: &FixedSizeInterner<T>) -> Self {
Self { nodes: vec![], forbidden: SmallBitmap::for_interned_values_in(for_interner) }
}
pub fn forbid_condition(&mut self, condition: Interned<T>) {
self.forbidden.insert(condition);
}
fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> {
for (e, next_node) in &mut self.nodes {
if condition == *e {
return Some(next_node);
}
}
None
}
pub fn forbidden_conditions_after_prefix(
&mut self,
mut prefix: &[Interned<T>],
) -> Option<SmallBitmap<T>> {
let mut cursor = self;
for c in prefix.iter() {
if let Some(next) = cursor.advance(*c) {
cursor = next;
} else {
return None;
}
}
Some(cursor.forbidden.clone())
}
pub fn forbid_condition_after_prefix(
&mut self,
mut prefix: impl Iterator<Item = Interned<T>>,
forbidden: Interned<T>,
) {
match prefix.next() {
None => {
self.forbidden.insert(forbidden);
}
Some(first_condition) => {
for (condition, next_node) in &mut self.nodes {
if condition == &first_condition {
return next_node.forbid_condition_after_prefix(prefix, forbidden);
}
}
let mut rest = DeadEndsCache {
nodes: vec![],
forbidden: SmallBitmap::new(self.forbidden.universe_length()),
};
rest.forbid_condition_after_prefix(prefix, forbidden);
self.nodes.push((first_condition, rest));
}
}
}
}
// /// A set of `Vec<Interned<T>>` implemented as a prefix tree.
// pub struct PathSet<T> {
// nodes: Vec<(Interned<T>, Self)>,
// is_end: bool,
// }
// impl<T> Clone for PathSet<T> {
// fn clone(&self) -> Self {
// Self { nodes: self.nodes.clone(), is_end: self.is_end }
// }
// }
// impl<T> std::fmt::Debug for PathSet<T> {
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish()
// }
// }
// impl<T> Default for PathSet<T> {
// fn default() -> Self {
// Self { nodes: Default::default(), is_end: Default::default() }
// }
// }
// impl<T> PathSet<T> {
// pub fn insert(&mut self, mut conditions: impl Iterator<Item = Interned<T>>) {
// match conditions.next() {
// None => {
// self.is_end = true;
// }
// Some(first_condition) => {
// for (condition, next_node) in &mut self.nodes {
// if condition == &first_condition {
// return next_node.insert(conditions);
// }
// }
// let mut rest = PathSet::default();
// rest.insert(conditions);
// self.nodes.push((first_condition, rest));
// }
// }
// }
// pub fn remove_condition(&mut self, forbidden_condition: Interned<T>) {
// let mut i = 0;
// while i < self.nodes.len() {
// let should_remove = if self.nodes[i].0 == forbidden_condition {
// true
// } else if !self.nodes[i].1.nodes.is_empty() {
// self.nodes[i].1.remove_condition(forbidden_condition);
// self.nodes[i].1.nodes.is_empty()
// } else {
// false
// };
// if should_remove {
// self.nodes.remove(i);
// } else {
// i += 1;
// }
// }
// }
// pub fn final_conditions_after_prefix(
// &self,
// prefix: &[Interned<T>],
// visit: &mut impl FnMut(Interned<T>),
// ) {
// let [first_condition, remaining_prefix @ ..] = prefix else {
// for node in self.nodes.iter() {
// if node.1.is_end {
// visit(node.0)
// }
// }
// return
// };
// for (condition, rest) in self.nodes.iter() {
// if condition == first_condition {
// return rest.final_conditions_after_prefix(remaining_prefix, visit);
// }
// }
// }
// pub fn contains_prefix_of_path(&self, path: &[Interned<T>]) -> bool {
// if self.is_end {
// return true;
// }
// match path {
// [] => false,
// [first_condition, remaining_path @ ..] => {
// for (condition, rest) in self.nodes.iter() {
// if condition == first_condition {
// return rest.contains_prefix_of_path(remaining_path);
// }
// }
// false
// }
// }
// }
// }