Apply a few optimisations for graph-based ranking rules

This commit is contained in:
Loïc Lecrenier
2023-03-07 14:42:58 +01:00
parent e8c76cf7bf
commit 9051065c22
19 changed files with 682 additions and 438 deletions

View File

@ -111,6 +111,8 @@ pub fn visit_to_node<'search, 'from_data>(
for word1 in derivations1.clone() {
for proximity in 1..=(8 - ngram_len2) {
let cost = (proximity + ngram_len2 - 1) as u8;
// TODO: if we had access to the universe here, we could already check whether
// the bitmap corresponding to this word pair is disjoint with the universe or not
if ctx
.get_word_prefix_pair_proximity_docids(
word1,
@ -183,8 +185,13 @@ pub fn visit_to_node<'search, 'from_data>(
.flat_map(|(cost, proximity_word_pairs)| {
let mut edges = vec![];
for (proximity, word_pairs) in proximity_word_pairs {
edges
.push((cost, EdgeDetails::Data(ProximityEdge { pairs: word_pairs, proximity })))
edges.push((
cost,
EdgeDetails::Data(ProximityEdge {
pairs: word_pairs.into_boxed_slice(),
proximity,
}),
))
}
edges
})

View File

@ -1,14 +1,15 @@
use super::{ProximityEdge, WordPair};
use crate::new::SearchContext;
use crate::{CboRoaringBitmapCodec, Result};
use roaring::{MultiOps, RoaringBitmap};
use roaring::RoaringBitmap;
pub fn compute_docids<'search>(
ctx: &mut SearchContext<'search>,
edge: &ProximityEdge,
universe: &RoaringBitmap,
) -> Result<RoaringBitmap> {
let ProximityEdge { pairs, proximity } = edge;
let mut pair_docids = vec![];
let mut pair_docids = RoaringBitmap::new();
for pair in pairs.iter() {
let bytes = match pair {
WordPair::Words { left, right } => {
@ -21,10 +22,11 @@ pub fn compute_docids<'search>(
ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity)
}
}?;
let bitmap =
bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
pair_docids.push(bitmap);
// TODO: deserialize bitmap within a universe, and (maybe) using a bump allocator?
let bitmap = universe
& bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
pair_docids |= bitmap;
}
let docids = MultiOps::union(pair_docids);
Ok(docids)
Ok(pair_docids)
}

View File

@ -10,7 +10,7 @@ use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
use roaring::RoaringBitmap;
// TODO: intern the strings, refer to them by their pointer?
// TODO: intern the proximity edges as well?
#[derive(Clone)]
pub enum WordPair {
@ -21,8 +21,7 @@ pub enum WordPair {
#[derive(Clone)]
pub struct ProximityEdge {
// TODO: use a list of pointers to the word pairs instead?
pairs: Vec<WordPair>,
pairs: Box<[WordPair]>,
proximity: u8,
}
@ -40,8 +39,9 @@ impl RankingRuleGraphTrait for ProximityGraph {
fn compute_docids<'search>(
ctx: &mut SearchContext<'search>,
edge: &Self::EdgeDetails,
universe: &RoaringBitmap,
) -> Result<roaring::RoaringBitmap> {
compute_docids::compute_docids(ctx, edge)
compute_docids::compute_docids(ctx, edge, universe)
}
fn build_visit_from_node<'search>(
@ -61,11 +61,11 @@ impl RankingRuleGraphTrait for ProximityGraph {
fn log_state(
graph: &super::RankingRuleGraph<Self>,
paths: &[Vec<u32>],
paths: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: &[Vec<u64>],
cost: u64,
distances: &[Vec<u16>],
cost: u16,
logger: &mut dyn SearchLogger<QueryGraph>,
) {
logger.log_proximity_state(