Replace EdgeCondition with an Option<..> + other code cleanup

This commit is contained in:
Loïc Lecrenier
2023-03-16 11:52:51 +01:00
parent 7b1d8f4c6d
commit aa59c3bc2c
16 changed files with 202 additions and 204 deletions

View File

@ -45,7 +45,7 @@ use super::interner::MappedInterner;
use super::logger::SearchLogger;
use super::query_graph::QueryNode;
use super::ranking_rule_graph::{
DeadEndPathCache, EdgeCondition, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
DeadEndPathCache, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
RankingRuleGraphTrait, TypoGraph,
};
use super::small_bitmap::SmallBitmap;
@ -87,7 +87,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
/// Cache to retrieve the docids associated with each edge
edge_conditions_cache: EdgeConditionDocIdsCache<G>,
/// Cache used to optimistically discard paths that resolve to no documents.
empty_paths_cache: DeadEndPathCache<G>,
dead_end_path_cache: DeadEndPathCache<G>,
/// A structure giving the list of possible costs from each node to the end node,
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
@ -101,27 +101,23 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
ctx: &mut SearchContext<'ctx>,
graph: &mut RankingRuleGraph<G>,
edge_docids_cache: &mut EdgeConditionDocIdsCache<G>,
condition_docids_cache: &mut EdgeConditionDocIdsCache<G>,
universe: &RoaringBitmap,
empty_paths_cache: &mut DeadEndPathCache<G>,
dead_end_path_cache: &mut DeadEndPathCache<G>,
) -> Result<()> {
for edge_id in graph.edges_store.indexes() {
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
continue;
};
let condition = edge.condition;
let Some(condition) = edge.condition else { continue };
match condition {
EdgeCondition::Unconditional => continue,
EdgeCondition::Conditional(condition) => {
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
if docids.is_disjoint(universe) {
graph.remove_edges_with_condition(condition);
empty_paths_cache.add_condition(condition);
edge_docids_cache.cache.remove(&condition);
continue;
}
}
let docids =
condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?;
if docids.is_disjoint(universe) {
graph.remove_edges_with_condition(condition);
dead_end_path_cache.add_condition(condition);
condition_docids_cache.cache.remove(&condition);
continue;
}
}
Ok(())
@ -139,17 +135,17 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
query_graph: &QueryGraph,
) -> Result<()> {
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
let mut edge_docids_cache = EdgeConditionDocIdsCache::default();
let mut empty_paths_cache = DeadEndPathCache::new(&graph.conditions_interner);
let mut condition_docids_cache = EdgeConditionDocIdsCache::default();
let mut dead_end_path_cache = DeadEndPathCache::new(&graph.conditions_interner);
// First simplify the graph as much as possible, by computing the docids of the edges
// First simplify the graph as much as possible, by computing the docids of all the conditions
// within the rule's universe and removing the edges that have no associated docids.
remove_empty_edges(
ctx,
&mut graph,
&mut edge_docids_cache,
&mut condition_docids_cache,
universe,
&mut empty_paths_cache,
&mut dead_end_path_cache,
)?;
// Then pre-compute the cost of all paths from each node to the end node
@ -157,8 +153,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let state = GraphBasedRankingRuleState {
graph,
edge_conditions_cache: edge_docids_cache,
empty_paths_cache,
edge_conditions_cache: condition_docids_cache,
dead_end_path_cache,
all_distances,
cur_distance_idx: 0,
};
@ -187,7 +183,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
&mut state.graph,
&mut state.edge_conditions_cache,
universe,
&mut state.empty_paths_cache,
&mut state.dead_end_path_cache,
)?;
// If the cur_distance_idx does not point to a valid cost in the `all_distances`
@ -208,8 +204,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let GraphBasedRankingRuleState {
graph,
edge_conditions_cache: edge_docids_cache,
empty_paths_cache,
edge_conditions_cache: condition_docids_cache,
dead_end_path_cache,
all_distances,
cur_distance_idx: _,
} = &mut state;
@ -224,18 +220,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// For each path of the given cost, we will compute its associated
// document ids.
// In case the path does not resolve to any document id, we try to figure out why
// and update the `empty_paths_cache` accordingly.
// and update the `dead_end_path_cache` accordingly.
// For example, it may be that the path is empty because one of its edges is disjoint
// with the universe, or because a prefix of the path is disjoint with the universe, or because
// the path contains two edges that are disjoint from each other within the universe.
// Updating the empty_paths_cache helps speed up the execution of `visit_paths_of_cost` and reduces
// Updating the dead_end_path_cache helps speed up the execution of `visit_paths_of_cost` and reduces
// the number of future candidate paths given by that same function.
graph.visit_paths_of_cost(
graph.query_graph.root_node,
cost,
all_distances,
empty_paths_cache,
|path, graph, empty_paths_cache| {
dead_end_path_cache,
|path, graph, dead_end_path_cache| {
// Accumulate the path for logging purposes only
paths.push(path.to_vec());
let mut path_docids = universe.clone();
@ -243,47 +239,48 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// We store the edges and their docids in vectors in case the path turns out to be
// empty and we need to figure out why it was empty.
let mut visited_conditions = vec![];
let mut cached_edge_docids = vec![];
let mut cached_condition_docids = vec![];
// graph.conditions_interner.map(|_| RoaringBitmap::new());
for &condition in path {
visited_conditions.push(condition);
let edge_docids =
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
let condition_docids = condition_docids_cache
.get_condition_docids(ctx, condition, graph, &universe)?;
cached_edge_docids.push((condition, edge_docids.clone())); // .get_mut(condition) = edge_docids.clone();
cached_condition_docids.push((condition, condition_docids.clone())); // .get_mut(condition) = condition_docids.clone();
// If the edge is empty, then the path will be empty as well, we update the graph
// and caches accordingly and skip to the next candidate path.
if edge_docids.is_disjoint(&universe) {
if condition_docids.is_disjoint(&universe) {
// 1. Store in the cache that this edge is empty for this universe
empty_paths_cache.add_condition(condition);
dead_end_path_cache.add_condition(condition);
// 2. remove this edge from the ranking rule graph
// ouch, no! :( need to link a condition to one or more ranking rule edges
graph.remove_edges_with_condition(condition);
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
edge_docids_cache.cache.remove(&condition);
// 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore
condition_docids_cache.cache.remove(&condition);
return Ok(ControlFlow::Continue(()));
}
path_docids &= edge_docids;
path_docids &= condition_docids;
// If the (sub)path is empty, we try to figure out why and update the caches accordingly.
if path_docids.is_disjoint(&universe) {
// First, we know that this path is empty, and thus any path
// that is a superset of it will also be empty.
empty_paths_cache.add_prefix(&visited_conditions);
dead_end_path_cache.add_prefix(&visited_conditions);
// Second, if the intersection between this edge and any
// previous one is disjoint with the universe,
// then we also know that any path containing the same couple of
// edges will also be empty.
for (past_condition, edge_docids2) in cached_edge_docids.iter() {
for (past_condition, condition_docids2) in cached_condition_docids.iter() {
if *past_condition == condition {
continue;
};
let intersection = edge_docids & edge_docids2;
let intersection = condition_docids & condition_docids2;
if intersection.is_disjoint(&universe) {
empty_paths_cache.add_condition_couple(*past_condition, condition);
dead_end_path_cache
.add_condition_couple(*past_condition, condition);
}
}
// We should maybe instead try to compute:
@ -310,7 +307,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
G::log_state(
&original_graph,
&paths,
empty_paths_cache,
dead_end_path_cache,
original_universe,
all_distances,
cost,
@ -322,8 +319,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// But we only do it in case the bucket length is >1, because otherwise
// we know the child ranking rule won't be called anyway
let mut next_query_graph = original_graph.query_graph;
next_query_graph.simplify();
if bucket.len() > 1 {
next_query_graph.simplify();
// 1. Gather all the words and phrases used in the computation of this bucket
let mut used_words = HashSet::new();
let mut used_phrases = HashSet::new();