Add "position" part of the attribute ranking rule

This commit is contained in:
Loïc Lecrenier
2023-04-13 10:46:09 +02:00
parent 8edad8291b
commit bd9aba4d77
11 changed files with 314 additions and 31 deletions

View File

@ -9,22 +9,22 @@ use crate::search::new::SearchContext;
use crate::Result;
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct AttributeCondition {
pub struct FidCondition {
term: LocatedQueryTermSubset,
fid: u16,
}
pub enum AttributeGraph {}
pub enum FidGraph {}
impl RankingRuleGraphTrait for AttributeGraph {
type Condition = AttributeCondition;
impl RankingRuleGraphTrait for FidGraph {
type Condition = FidCondition;
fn resolve_condition(
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
let AttributeCondition { term, .. } = condition;
let FidCondition { term, .. } = condition;
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
let mut docids = compute_query_term_subset_docids_within_field_id(
ctx,
@ -73,7 +73,7 @@ impl RankingRuleGraphTrait for AttributeGraph {
// the term subsets associated to each field ids fetched.
edges.push((
fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(AttributeCondition {
conditions_interner.insert(FidCondition {
term: term.clone(), // TODO remove this ugly clone
fid,
}),

View File

@ -11,9 +11,11 @@ mod condition_docids_cache;
mod dead_ends_cache;
/// Implementation of the `attribute` ranking rule
mod attribute;
mod fid;
/// Implementation of the `exactness` ranking rule
mod exactness;
/// Implementation of the `position` ranking rule
mod position;
/// Implementation of the `proximity` ranking rule
mod proximity;
/// Implementation of the `typo` ranking rule
@ -21,11 +23,12 @@ mod typo;
use std::hash::Hash;
pub use attribute::{AttributeCondition, AttributeGraph};
pub use fid::{FidCondition, FidGraph};
pub use cheapest_paths::PathVisitor;
pub use condition_docids_cache::ConditionDocIdsCache;
pub use dead_ends_cache::DeadEndsCache;
pub use exactness::{ExactnessCondition, ExactnessGraph};
pub use position::{PositionCondition, PositionGraph};
pub use proximity::{ProximityCondition, ProximityGraph};
use roaring::RoaringBitmap;
pub use typo::{TypoCondition, TypoGraph};

View File

@ -0,0 +1,93 @@
use fxhash::FxHashSet;
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
use crate::search::new::SearchContext;
use crate::Result;
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct PositionCondition {
term: LocatedQueryTermSubset,
position: u16,
}
pub enum PositionGraph {}
impl RankingRuleGraphTrait for PositionGraph {
type Condition = PositionCondition;
fn resolve_condition(
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
let PositionCondition { term, .. } = condition;
// maybe compute_query_term_subset_docids_within_position_id should accept a universe as argument
let mut docids = compute_query_term_subset_docids_within_position(
ctx,
&term.term_subset,
condition.position,
)?;
docids &= universe;
Ok(ComputedCondition {
docids,
universe_len: universe.len(),
start_term_subset: None,
end_term_subset: term.clone(),
})
}
fn build_edges(
ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
let term = to_term;
let mut all_positions = FxHashSet::default();
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
let positions = ctx.get_db_word_positions(word.interned())?;
all_positions.extend(positions);
}
for phrase in term.term_subset.all_phrases(ctx)? {
for &word in phrase.words(ctx).iter().flatten() {
let positions = ctx.get_db_word_positions(word)?;
all_positions.extend(positions);
}
}
if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
let positions = ctx.get_db_word_prefix_positions(word_prefix.interned())?;
all_positions.extend(positions);
}
let mut edges = vec![];
for position in all_positions {
let cost = {
let mut cost = 0;
for i in 0..term.term_ids.len() {
cost += position as u32 + i as u32;
}
cost
};
// TODO: We can improve performances and relevancy by storing
// the term subsets associated to each position fetched.
edges.push((
cost,
conditions_interner.insert(PositionCondition {
term: term.clone(), // TODO remove this ugly clone
position,
}),
));
}
Ok(edges)
}
}