mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-29 09:39:58 +00:00
Improve performance of the cheapest path finder algorithm
This commit is contained in:
@ -100,16 +100,21 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
let ControlFlow::Continue(next_any_valid) = cf else {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
};
|
||||
any_valid |= next_any_valid;
|
||||
if next_any_valid {
|
||||
// backtrack as much as possible if a valid path was found and the dead_ends_cache
|
||||
// was updated such that the current prefix is now invalid
|
||||
self.forbidden_conditions = ctx
|
||||
.dead_ends_cache
|
||||
.forbidden_conditions_for_all_prefixes_up_to(self.path.iter().copied());
|
||||
if self.visited_conditions.intersects(&self.forbidden_conditions) {
|
||||
break;
|
||||
return Ok(ControlFlow::Continue(true));
|
||||
}
|
||||
}
|
||||
any_valid |= next_any_valid;
|
||||
}
|
||||
// if there wasn't any valid path from this node to the end node, then
|
||||
// this node is a dead end **for this specific cost**.
|
||||
// we could encode this in the dead-ends cache
|
||||
|
||||
Ok(ControlFlow::Continue(any_valid))
|
||||
}
|
||||
@ -117,7 +122,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
fn visit_no_condition(
|
||||
&mut self,
|
||||
dest_node: Interned<QueryNode>,
|
||||
edge_forbidden_nodes: &SmallBitmap<QueryNode>,
|
||||
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||
visit: VisitFn<G>,
|
||||
ctx: &mut VisitorContext<G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
@ -137,7 +142,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
}
|
||||
} else {
|
||||
let old_fbct = self.forbidden_conditions_to_nodes.clone();
|
||||
self.forbidden_conditions_to_nodes.union(edge_forbidden_nodes);
|
||||
self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip);
|
||||
let cf = self.visit_node(dest_node, visit, ctx)?;
|
||||
self.forbidden_conditions_to_nodes = old_fbct;
|
||||
Ok(cf)
|
||||
@ -147,14 +152,14 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
&mut self,
|
||||
condition: Interned<G::Condition>,
|
||||
dest_node: Interned<QueryNode>,
|
||||
edge_forbidden_nodes: &SmallBitmap<QueryNode>,
|
||||
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||
visit: VisitFn<G>,
|
||||
ctx: &mut VisitorContext<G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
assert!(dest_node != ctx.graph.query_graph.end_node);
|
||||
|
||||
if self.forbidden_conditions_to_nodes.contains(dest_node)
|
||||
|| edge_forbidden_nodes.intersects(&self.visited_nodes)
|
||||
|| edge_new_nodes_to_skip.intersects(&self.visited_nodes)
|
||||
{
|
||||
return Ok(ControlFlow::Continue(false));
|
||||
}
|
||||
@ -162,11 +167,13 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
return Ok(ControlFlow::Continue(false));
|
||||
}
|
||||
|
||||
if ctx
|
||||
// Checking that from the destination node, there is at least
|
||||
// one cost that we can visit that corresponds to our remaining budget.
|
||||
if !ctx
|
||||
.all_costs_from_node
|
||||
.get(dest_node)
|
||||
.iter()
|
||||
.all(|next_cost| *next_cost != self.remaining_cost)
|
||||
.any(|next_cost| *next_cost == self.remaining_cost)
|
||||
{
|
||||
return Ok(ControlFlow::Continue(false));
|
||||
}
|
||||
@ -182,7 +189,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
self.forbidden_conditions.union(&next_forbidden);
|
||||
}
|
||||
let old_fctn = self.forbidden_conditions_to_nodes.clone();
|
||||
self.forbidden_conditions_to_nodes.union(edge_forbidden_nodes);
|
||||
self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip);
|
||||
|
||||
let cf = self.visit_node(dest_node, visit, ctx)?;
|
||||
|
||||
@ -212,22 +219,21 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
}
|
||||
|
||||
while let Some(cur_node) = node_stack.pop_front() {
|
||||
let mut self_costs = BTreeSet::<u64>::new();
|
||||
let mut self_costs = Vec::<u64>::new();
|
||||
|
||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||
for edge_idx in cur_node_edges.iter() {
|
||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||
let succ_node = edge.dest_node;
|
||||
let succ_costs = costs_to_end.get(succ_node);
|
||||
for succ_distance in succ_costs {
|
||||
self_costs.insert(edge.cost as u64 + succ_distance);
|
||||
for succ_cost in succ_costs {
|
||||
self_costs.push(edge.cost as u64 + succ_cost);
|
||||
}
|
||||
}
|
||||
let costs_to_end_cur_node = costs_to_end.get_mut(cur_node);
|
||||
for cost in self_costs.iter() {
|
||||
costs_to_end_cur_node.push(*cost);
|
||||
}
|
||||
*costs_to_end.get_mut(cur_node) = self_costs.into_iter().collect();
|
||||
self_costs.sort_unstable();
|
||||
self_costs.dedup();
|
||||
|
||||
*costs_to_end.get_mut(cur_node) = self_costs;
|
||||
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||
if !enqueued.contains(prev_node) {
|
||||
node_stack.push_back(prev_node);
|
||||
@ -237,4 +243,56 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
}
|
||||
costs_to_end
|
||||
}
|
||||
|
||||
pub fn update_all_costs_before_nodes(
|
||||
&self,
|
||||
removed_nodes: &BTreeSet<Interned<QueryNode>>,
|
||||
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
|
||||
) {
|
||||
// unsafe {
|
||||
// FIND_ALL_COSTS_INC_COUNT += 1;
|
||||
// println!(
|
||||
// "update_all_costs_after_removing_edge incrementally count: {}",
|
||||
// FIND_ALL_COSTS_INC_COUNT
|
||||
// );
|
||||
// }
|
||||
|
||||
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
|
||||
let mut node_stack = VecDeque::new();
|
||||
|
||||
for node in removed_nodes.iter() {
|
||||
enqueued.insert(*node);
|
||||
node_stack.push_back(*node);
|
||||
}
|
||||
|
||||
while let Some(cur_node) = node_stack.pop_front() {
|
||||
let mut self_costs = BTreeSet::<u64>::new();
|
||||
|
||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||
for edge_idx in cur_node_edges.iter() {
|
||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||
let succ_node = edge.dest_node;
|
||||
let succ_costs = costs.get(succ_node);
|
||||
for succ_distance in succ_costs {
|
||||
self_costs.insert(edge.cost as u64 + succ_distance);
|
||||
}
|
||||
}
|
||||
let costs_to_end_cur_node = costs.get_mut(cur_node);
|
||||
for cost in self_costs.iter() {
|
||||
costs_to_end_cur_node.push(*cost);
|
||||
}
|
||||
let self_costs = self_costs.into_iter().collect::<Vec<_>>();
|
||||
if &self_costs == costs.get(cur_node) {
|
||||
continue;
|
||||
}
|
||||
*costs.get_mut(cur_node) = self_costs;
|
||||
|
||||
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||
if !enqueued.contains(prev_node) {
|
||||
node_stack.push_back(prev_node);
|
||||
enqueued.insert(prev_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -88,4 +88,12 @@ impl<T> DeadEndsCache<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn debug_print(&self, indent: usize) {
|
||||
// println!("{} {:?}", " ".repeat(indent), self.forbidden.iter().collect::<Vec<_>>());
|
||||
// for (condition, next) in self.conditions.iter().zip(self.next.iter()) {
|
||||
// println!("{} {condition}:", " ".repeat(indent));
|
||||
// next.debug_print(indent + 2);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
@ -10,10 +10,10 @@ mod cheapest_paths;
|
||||
mod condition_docids_cache;
|
||||
mod dead_ends_cache;
|
||||
|
||||
/// Implementation of the `attribute` ranking rule
|
||||
mod fid;
|
||||
/// Implementation of the `exactness` ranking rule
|
||||
mod exactness;
|
||||
/// Implementation of the `attribute` ranking rule
|
||||
mod fid;
|
||||
/// Implementation of the `position` ranking rule
|
||||
mod position;
|
||||
/// Implementation of the `proximity` ranking rule
|
||||
@ -21,13 +21,14 @@ mod proximity;
|
||||
/// Implementation of the `typo` ranking rule
|
||||
mod typo;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub use fid::{FidCondition, FidGraph};
|
||||
pub use cheapest_paths::PathVisitor;
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||
pub use fid::{FidCondition, FidGraph};
|
||||
pub use position::{PositionCondition, PositionGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
use roaring::RoaringBitmap;
|
||||
@ -130,7 +131,12 @@ impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
/// Remove all edges with the given condition
|
||||
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::Condition>) {
|
||||
/// Return a set of all the source nodes of the removed edges
|
||||
pub fn remove_edges_with_condition(
|
||||
&mut self,
|
||||
condition_to_remove: Interned<G::Condition>,
|
||||
) -> BTreeSet<Interned<QueryNode>> {
|
||||
let mut source_nodes = BTreeSet::new();
|
||||
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
||||
let Some(edge) = edge_opt.as_mut() else { continue };
|
||||
let Some(condition) = edge.condition else { continue };
|
||||
@ -139,7 +145,9 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||
*edge_opt = None;
|
||||
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||
source_nodes.insert(source_node);
|
||||
}
|
||||
}
|
||||
source_nodes
|
||||
}
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
|
||||
let mut edges = vec![];
|
||||
for position in all_positions {
|
||||
let cost = {
|
||||
let sum_positions = {
|
||||
let mut cost = 0;
|
||||
for i in 0..term.term_ids.len() {
|
||||
// This is actually not fully correct and slightly penalises ngrams unfairly.
|
||||
@ -89,7 +89,7 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
// TODO: We can improve performances and relevancy by storing
|
||||
// the term subsets associated to each position fetched.
|
||||
edges.push((
|
||||
cost,
|
||||
cost_from_sum_positions(sum_positions),
|
||||
conditions_interner.insert(PositionCondition {
|
||||
term: term.clone(), // TODO remove this ugly clone
|
||||
position,
|
||||
@ -100,3 +100,26 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
Ok(edges)
|
||||
}
|
||||
}
|
||||
|
||||
fn cost_from_sum_positions(sum_positions: u32) -> u32 {
|
||||
match sum_positions {
|
||||
0 | 1 | 2 | 3 => sum_positions,
|
||||
4 | 5 => 4,
|
||||
6 | 7 => 5,
|
||||
8 | 9 => 6,
|
||||
10 | 11 => 7,
|
||||
12 | 13 => 8,
|
||||
14 | 15 => 9,
|
||||
16 | 17..=24 => 10,
|
||||
25..=32 => 11,
|
||||
33..=64 => 12,
|
||||
65..=128 => 13,
|
||||
129..=256 => 14,
|
||||
257..=512 => 15,
|
||||
513..=1024 => 16,
|
||||
1025..=2048 => 17,
|
||||
2049..=4096 => 18,
|
||||
4097..=8192 => 19,
|
||||
_ => 20,
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user