Add tests for "exactness" rr, make correct universe computation

This commit is contained in:
Loïc Lecrenier
2023-04-24 16:57:12 +02:00
parent d1fdbb63da
commit 8f2e971879
3 changed files with 533 additions and 9 deletions

View File

@ -51,6 +51,7 @@ use resolve_query_graph::compute_query_graph_docids;
use sort::Sort;
use self::interner::Interned;
use self::query_term::ExactTerm;
/// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> {
@ -119,6 +120,75 @@ fn resolve_maximally_reduced_query_graph(
Ok(docids)
}
fn resolve_docids_containing_any_exact_word(
ctx: &mut SearchContext,
universe: &RoaringBitmap,
query_graph: &QueryGraph,
) -> Result<RoaringBitmap> {
let mut docids = RoaringBitmap::new();
for (_, node) in query_graph.nodes.iter() {
let term = match &node.data {
query_graph::QueryNodeData::Term(term) => term,
query_graph::QueryNodeData::Deleted
| query_graph::QueryNodeData::Start
| query_graph::QueryNodeData::End => {
continue;
}
};
if term.term_ids.len() != 1 {
continue;
}
let Some(exact_term) = term.term_subset.exact_term(ctx) else {
continue
};
let exact_term_docids = match exact_term {
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)? & universe,
ExactTerm::Word(word) => {
if let Some(word_docids) = ctx.word_docids(Word::Original(word))? {
word_docids & universe
} else {
continue;
}
}
};
docids |= exact_term_docids;
}
Ok(docids)
}
fn resolve_universe(
ctx: &mut SearchContext,
initial_universe: &RoaringBitmap,
query_graph: &QueryGraph,
method: UniverseResolutionMethod,
matching_strategy: TermsMatchingStrategy,
logger: &mut dyn SearchLogger<QueryGraph>,
) -> Result<RoaringBitmap> {
match method {
UniverseResolutionMethod::TermMatchingStrategyOnly => {
resolve_maximally_reduced_query_graph(
ctx,
initial_universe,
query_graph,
matching_strategy,
logger,
)
}
UniverseResolutionMethod::TermMatchingStrategyAndExactness => {
let mut resolved_universe = resolve_maximally_reduced_query_graph(
ctx,
initial_universe,
query_graph,
matching_strategy,
logger,
)?;
resolved_universe |=
resolve_docids_containing_any_exact_word(ctx, initial_universe, query_graph)?;
Ok(resolved_universe)
}
}
}
/// Return the list of initialised ranking rules to be used for a placeholder search.
fn get_ranking_rules_for_placeholder_search<'ctx>(
ctx: &SearchContext<'ctx>,
@ -163,12 +233,17 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
Ok(ranking_rules)
}
enum UniverseResolutionMethod {
TermMatchingStrategyOnly,
TermMatchingStrategyAndExactness,
}
/// Return the list of initialised ranking rules to be used for a query graph search.
fn get_ranking_rules_for_query_graph_search<'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
terms_matching_strategy: TermsMatchingStrategy,
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
) -> Result<(Vec<BoxRankingRule<'ctx, QueryGraph>>, UniverseResolutionMethod)> {
// query graph search
let mut words = false;
let mut typo = false;
@ -179,10 +254,12 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
let mut asc = HashSet::new();
let mut desc = HashSet::new();
let mut universe_resolution_method = UniverseResolutionMethod::TermMatchingStrategyOnly;
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {
// Add Words before any of: typo, proximity, attribute, exactness
// Add Words before any of: typo, proximity, attribute
match rr {
crate::Criterion::Typo | crate::Criterion::Attribute | crate::Criterion::Proximity => {
if !words {
@ -236,6 +313,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
ranking_rules.push(Box::new(ExactAttribute::new()));
ranking_rules.push(Box::new(Exactness::new()));
exactness = true;
if !words {
universe_resolution_method =
UniverseResolutionMethod::TermMatchingStrategyAndExactness;
}
}
crate::Criterion::Asc(field_name) => {
if asc.contains(&field_name) {
@ -253,7 +335,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
}
}
}
Ok(ranking_rules)
Ok((ranking_rules, universe_resolution_method))
}
fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
@ -335,19 +417,18 @@ pub fn execute_search(
check_sort_criteria(ctx, sort_criteria.as_ref())?;
// TODO: if the exactness criterion is the first one, then
// use a different strategy to find the universe (union of any term)
universe = resolve_maximally_reduced_query_graph(
let (ranking_rules, universe_resolution_method) =
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
universe = resolve_universe(
ctx,
&universe,
&graph,
universe_resolution_method,
terms_matching_strategy,
query_graph_logger,
)?;
let ranking_rules =
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
} else {
let ranking_rules = get_ranking_rules_for_placeholder_search(ctx, sort_criteria)?;