Support negative phrases

This commit is contained in:
Clément Renault
2024-03-28 15:51:43 +01:00
parent 69f8b2730d
commit 877f4b1045
4 changed files with 85 additions and 18 deletions

View File

@ -33,7 +33,9 @@ use interner::{DedupInterner, Interner};
pub use logger::visual::VisualSearchLogger;
pub use logger::{DefaultSearchLogger, SearchLogger};
use query_graph::{QueryGraph, QueryNode};
use query_term::{located_query_terms_from_tokens, LocatedQueryTerm, Phrase, QueryTerm};
use query_term::{
located_query_terms_from_tokens, ExtractedTokens, LocatedQueryTerm, Phrase, QueryTerm,
};
use ranking_rules::{
BoxRankingRule, PlaceholderQuery, RankingRule, RankingRuleOutput, RankingRuleQueryTrait,
};
@ -223,6 +225,21 @@ fn resolve_negative_words(
Ok(negative_bitmap)
}
#[tracing::instrument(level = "trace", skip_all, target = "search")]
fn resolve_negative_phrases(
ctx: &mut SearchContext,
negative_phrases: &[LocatedQueryTerm],
) -> Result<RoaringBitmap> {
let mut negative_bitmap = RoaringBitmap::new();
for term in negative_phrases {
let query_term = ctx.term_interner.get(term.value);
if let Some(phrase) = query_term.original_phrase() {
negative_bitmap |= ctx.get_phrase_docids(phrase)?;
}
}
Ok(negative_bitmap)
}
/// Return the list of initialised ranking rules to be used for a placeholder search.
fn get_ranking_rules_for_placeholder_search<'ctx>(
ctx: &SearchContext<'ctx>,
@ -636,12 +653,15 @@ pub fn execute_search(
let tokens = tokenizer.tokenize(query);
drop(entered);
let (query_terms, negative_words) =
let ExtractedTokens { query_terms, negative_words, negative_phrases } =
located_query_terms_from_tokens(ctx, tokens, words_limit)?;
used_negative_operator = !negative_words.is_empty();
used_negative_operator = !negative_words.is_empty() || !negative_phrases.is_empty();
let ignored_documents = resolve_negative_words(ctx, &negative_words)?;
let ignored_phrases = resolve_negative_phrases(ctx, &negative_phrases)?;
universe -= ignored_documents;
universe -= ignored_phrases;
if query_terms.is_empty() {
// Do a placeholder search instead