mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-31 10:50:03 +00:00
Apply a few optimisations for graph-based ranking rules
This commit is contained in:
@ -111,6 +111,8 @@ pub fn visit_to_node<'search, 'from_data>(
|
||||
for word1 in derivations1.clone() {
|
||||
for proximity in 1..=(8 - ngram_len2) {
|
||||
let cost = (proximity + ngram_len2 - 1) as u8;
|
||||
// TODO: if we had access to the universe here, we could already check whether
|
||||
// the bitmap corresponding to this word pair is disjoint with the universe or not
|
||||
if ctx
|
||||
.get_word_prefix_pair_proximity_docids(
|
||||
word1,
|
||||
@ -183,8 +185,13 @@ pub fn visit_to_node<'search, 'from_data>(
|
||||
.flat_map(|(cost, proximity_word_pairs)| {
|
||||
let mut edges = vec![];
|
||||
for (proximity, word_pairs) in proximity_word_pairs {
|
||||
edges
|
||||
.push((cost, EdgeDetails::Data(ProximityEdge { pairs: word_pairs, proximity })))
|
||||
edges.push((
|
||||
cost,
|
||||
EdgeDetails::Data(ProximityEdge {
|
||||
pairs: word_pairs.into_boxed_slice(),
|
||||
proximity,
|
||||
}),
|
||||
))
|
||||
}
|
||||
edges
|
||||
})
|
||||
|
@ -1,14 +1,15 @@
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use crate::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &ProximityEdge,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let ProximityEdge { pairs, proximity } = edge;
|
||||
let mut pair_docids = vec![];
|
||||
let mut pair_docids = RoaringBitmap::new();
|
||||
for pair in pairs.iter() {
|
||||
let bytes = match pair {
|
||||
WordPair::Words { left, right } => {
|
||||
@ -21,10 +22,11 @@ pub fn compute_docids<'search>(
|
||||
ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity)
|
||||
}
|
||||
}?;
|
||||
let bitmap =
|
||||
bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
|
||||
pair_docids.push(bitmap);
|
||||
// TODO: deserialize bitmap within a universe, and (maybe) using a bump allocator?
|
||||
let bitmap = universe
|
||||
& bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
|
||||
pair_docids |= bitmap;
|
||||
}
|
||||
let docids = MultiOps::union(pair_docids);
|
||||
Ok(docids)
|
||||
|
||||
Ok(pair_docids)
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
// TODO: intern the strings, refer to them by their pointer?
|
||||
// TODO: intern the proximity edges as well?
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum WordPair {
|
||||
@ -21,8 +21,7 @@ pub enum WordPair {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ProximityEdge {
|
||||
// TODO: use a list of pointers to the word pairs instead?
|
||||
pairs: Vec<WordPair>,
|
||||
pairs: Box<[WordPair]>,
|
||||
proximity: u8,
|
||||
}
|
||||
|
||||
@ -40,8 +39,9 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &Self::EdgeDetails,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<roaring::RoaringBitmap> {
|
||||
compute_docids::compute_docids(ctx, edge)
|
||||
compute_docids::compute_docids(ctx, edge, universe)
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'search>(
|
||||
@ -61,11 +61,11 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
|
||||
fn log_state(
|
||||
graph: &super::RankingRuleGraph<Self>,
|
||||
paths: &[Vec<u32>],
|
||||
paths: &[Vec<u16>],
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &[Vec<u64>],
|
||||
cost: u64,
|
||||
distances: &[Vec<u16>],
|
||||
cost: u16,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
logger.log_proximity_state(
|
||||
|
Reference in New Issue
Block a user