Add a few more optimisations to new search algorithms

This commit is contained in:
Loïc Lecrenier
2023-03-08 09:53:05 +01:00
parent 9051065c22
commit 10626dddfc
10 changed files with 104 additions and 86 deletions

View File

@ -4,7 +4,8 @@ use super::empty_paths_cache::EmptyPathsCache;
use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::small_bitmap::SmallBitmap;
use crate::Result;
use std::collections::VecDeque;
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, VecDeque};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Path {
@ -17,7 +18,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
&mut self,
from: usize,
cost: u16,
all_distances: &[Vec<u16>],
all_distances: &[Vec<(u16, SmallBitmap)>],
empty_paths_cache: &mut EmptyPathsCache,
mut visit: impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
) -> Result<()> {
@ -37,13 +38,9 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
&mut self,
from: usize,
cost: u16,
// TODO: replace all_distances with a Vec<SmallBitmap> where the SmallBitmap contains true if the cost exists and false otherwise
all_distances: &[Vec<u16>],
all_distances: &[Vec<(u16, SmallBitmap)>],
empty_paths_cache: &mut EmptyPathsCache,
visit: &mut impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
// replace prev edges by:
// (1) a small bitmap representing the path
// (2) a pointer within the EmptyPathsCache::forbidden_prefixes structure
prev_edges: &mut Vec<u16>,
cur_path: &mut SmallBitmap,
mut forbidden_edges: SmallBitmap,
@ -55,7 +52,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
let Some(edge) = self.all_edges[edge_idx as usize].as_ref() else { continue };
if cost < edge.cost as u16
|| forbidden_edges.contains(edge_idx)
|| !all_distances[edge.to_node as usize].contains(&(cost - edge.cost as u16))
|| !all_distances[edge.to_node as usize].iter().any(
|(next_cost, necessary_edges)| {
(*next_cost == cost - edge.cost as u16)
&& !forbidden_edges.intersects(necessary_edges)
},
)
{
continue;
}
@ -99,21 +101,20 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
forbidden_edges.insert(x);
});
}
if next_any_valid && empty_paths_cache.path_is_empty(prev_edges, cur_path) {
return Ok(any_valid);
}
}
Ok(any_valid)
}
pub fn initialize_distances_cheapest(&self) -> Vec<Vec<u16>> {
let mut distances_to_end: Vec<Vec<u16>> = vec![vec![]; self.query_graph.nodes.len()];
pub fn initialize_distances_with_necessary_edges(&self) -> Vec<Vec<(u16, SmallBitmap)>> {
let mut distances_to_end: Vec<Vec<(u16, SmallBitmap)>> =
vec![vec![]; self.query_graph.nodes.len()];
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len() as u16);
let mut node_stack = VecDeque::new();
distances_to_end[self.query_graph.end_node as usize] = vec![0];
distances_to_end[self.query_graph.end_node as usize] =
vec![(0, SmallBitmap::new(self.all_edges.len() as u16))];
for prev_node in
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
@ -123,21 +124,29 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
}
while let Some(cur_node) = node_stack.pop_front() {
let mut self_distances = vec![];
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
let cur_node_edges = &self.node_edges[cur_node];
for edge_idx in cur_node_edges.iter() {
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap();
let succ_node = edge.to_node;
let succ_distances = &distances_to_end[succ_node as usize];
for succ_distance in succ_distances {
self_distances.push(edge.cost as u16 + succ_distance);
for (succ_distance, succ_necessary_edges) in succ_distances {
let potential_necessary_edges = SmallBitmap::from_iter(
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
self.all_edges.len() as u16,
);
match self_distances.entry(edge.cost as u16 + succ_distance) {
Entry::Occupied(mut prev_necessary_edges) => {
prev_necessary_edges.get_mut().intersection(&potential_necessary_edges);
}
Entry::Vacant(entry) => {
entry.insert(potential_necessary_edges);
}
}
}
}
self_distances.sort_unstable();
self_distances.dedup();
distances_to_end[cur_node] = self_distances;
distances_to_end[cur_node] = self_distances.into_iter().collect();
for prev_node in self.query_graph.edges[cur_node].predecessors.iter() {
if !enqueued.contains(prev_node) {
node_stack.push_back(prev_node as usize);

View File

@ -49,6 +49,9 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
if self.cache.contains_key(&edge_index) {
// TODO: should we update the bitmap in the cache if the new universe
// reduces it?
// TODO: maybe have a generation: u32 to track every time the universe was
// reduced. Then only attempt to recompute the intersection when there is a chance
// that edge_docids & universe changed
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
}
// TODO: maybe universe doesn't belong here

View File

@ -119,7 +119,7 @@ pub trait RankingRuleGraphTrait: Sized {
paths: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: &[Vec<u16>],
distances: &[Vec<(u16, SmallBitmap)>],
cost: u16,
logger: &mut dyn SearchLogger<QueryGraph>,
);

View File

@ -6,6 +6,7 @@ use super::{EdgeDetails, RankingRuleGraphTrait};
use crate::new::interner::Interned;
use crate::new::logger::SearchLogger;
use crate::new::query_term::WordDerivations;
use crate::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
use roaring::RoaringBitmap;
@ -64,7 +65,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
paths: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: &[Vec<u16>],
distances: &[Vec<(u16, SmallBitmap)>],
cost: u16,
logger: &mut dyn SearchLogger<QueryGraph>,
) {

View File

@ -4,6 +4,7 @@ use crate::new::interner::Interned;
use crate::new::logger::SearchLogger;
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
use crate::new::resolve_query_graph::resolve_phrase;
use crate::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::{Result, RoaringBitmapCodec};
use heed::BytesDecode;
@ -123,7 +124,7 @@ impl RankingRuleGraphTrait for TypoGraph {
paths: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: &[Vec<u16>],
distances: &[Vec<(u16, SmallBitmap)>],
cost: u16,
logger: &mut dyn SearchLogger<QueryGraph>,
) {