Prune the query graph after executing a ranking rule

This commit is contained in:
Loïc Lecrenier
2023-03-15 16:08:43 +01:00
parent 05fe856e6e
commit a49ddec9df
9 changed files with 401 additions and 58 deletions

View File

@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
empty_paths_cache,
&mut visit,
&mut vec![],
&mut SmallBitmap::new(self.edges_store.len()),
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
&mut empty_paths_cache.conditions.clone(),
)?;
Ok(())

View File

@ -16,6 +16,7 @@ mod proximity;
/// Implementation of the `typo` ranking rule
mod typo;
use std::collections::HashSet;
use std::hash::Hash;
pub use edge_docids_cache::EdgeConditionDocIdsCache;
@ -26,6 +27,7 @@ pub use typo::{TypoEdge, TypoGraph};
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
use super::logger::SearchLogger;
use super::query_term::Phrase;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
@ -82,7 +84,19 @@ pub trait RankingRuleGraphTrait: Sized {
/// Return the label of the given edge condition, to be used when visualising
/// the ranking rule graph.
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String;
fn label_for_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<String>;
fn words_used_by_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<HashSet<Interned<String>>>;
fn phrases_used_by_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<HashSet<Interned<Phrase>>>;
/// Compute the document ids associated with the given edge condition,
/// restricted to the given universe.

View File

@ -1,6 +1,9 @@
pub mod build;
pub mod compute_docids;
use std::collections::HashSet;
use std::iter::FromIterator;
use roaring::RoaringBitmap;
use super::empty_paths_cache::DeadEndPathCache;
@ -44,17 +47,6 @@ pub enum ProximityGraph {}
impl RankingRuleGraphTrait for ProximityGraph {
type EdgeCondition = ProximityCondition;
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
match edge {
ProximityCondition::Term { term } => {
format!("term {term}")
}
ProximityCondition::Pairs { pairs } => {
format!("pairs {}", pairs.len())
}
}
}
fn resolve_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
condition: &Self::EdgeCondition,
@ -83,4 +75,113 @@ impl RankingRuleGraphTrait for ProximityGraph {
) {
logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost);
}
fn label_for_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<String> {
match edge {
ProximityCondition::Term { term } => {
let term = ctx.term_interner.get(*term);
Ok(format!("{} : exists", ctx.word_interner.get(term.original)))
}
ProximityCondition::Pairs { pairs } => {
let mut s = String::new();
for pair in pairs.iter() {
match pair {
WordPair::Words { phrases, left, right, proximity } => {
let left = ctx.word_interner.get(*left);
let right = ctx.word_interner.get(*right);
if !phrases.is_empty() {
s.push_str(&format!("{} phrases + ", phrases.len()));
}
s.push_str(&format!("\"{left} {right}\": {proximity}\n"));
}
WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
let left = ctx.word_interner.get(*left);
let right = ctx.word_interner.get(*right_prefix);
if !phrases.is_empty() {
s.push_str(&format!("{} phrases + ", phrases.len()));
}
s.push_str(&format!("\"{left} {right}...\" : {proximity}\n"));
}
WordPair::WordPrefixSwapped { left_prefix, right, proximity } => {
let left = ctx.word_interner.get(*left_prefix);
let right = ctx.word_interner.get(*right);
s.push_str(&format!("\"{left}... {right}\" : {proximity}\n"));
}
}
}
Ok(s)
}
}
}
fn words_used_by_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<HashSet<Interned<String>>> {
match edge {
ProximityCondition::Term { term } => {
let term = ctx.term_interner.get(*term);
Ok(HashSet::from_iter(term.all_single_words_except_prefix_db()))
}
ProximityCondition::Pairs { pairs } => {
let mut set = HashSet::new();
for pair in pairs.iter() {
match pair {
WordPair::Words { phrases: _, left, right, proximity: _ } => {
set.insert(*left);
set.insert(*right);
}
WordPair::WordPrefix { phrases: _, left, right_prefix, proximity: _ } => {
set.insert(*left);
// TODO: this is not correct, there should be another trait method for collecting the prefixes
// to be used with the prefix DBs
set.insert(*right_prefix);
}
WordPair::WordPrefixSwapped { left_prefix, right, proximity: _ } => {
// TODO: this is not correct, there should be another trait method for collecting the prefixes
// to be used with the prefix DBs
set.insert(*left_prefix);
set.insert(*right);
}
}
}
Ok(set)
}
}
}
fn phrases_used_by_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<HashSet<Interned<Phrase>>> {
match edge {
ProximityCondition::Term { term } => {
let term = ctx.term_interner.get(*term);
Ok(HashSet::from_iter(term.all_phrases()))
}
ProximityCondition::Pairs { pairs } => {
let mut set = HashSet::new();
for pair in pairs.iter() {
match pair {
WordPair::Words { phrases, left: _, right: _, proximity: _ } => {
set.extend(phrases.iter().copied());
}
WordPair::WordPrefix {
phrases,
left: _,
right_prefix: _,
proximity: _,
} => {
set.extend(phrases.iter().copied());
}
WordPair::WordPrefixSwapped { left_prefix: _, right: _, proximity: _ } => {}
}
}
Ok(set)
}
}
}
}

View File

@ -5,10 +5,13 @@ use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
use crate::search::new::logger::SearchLogger;
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
use std::collections::HashSet;
use std::fmt::Write;
use std::iter::FromIterator;
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct TypoEdge {
@ -21,10 +24,6 @@ pub enum TypoGraph {}
impl RankingRuleGraphTrait for TypoGraph {
type EdgeCondition = TypoEdge;
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
format!(", {} typos", edge.nbr_typos)
}
fn resolve_edge_condition<'db_cache, 'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
@ -147,4 +146,78 @@ impl RankingRuleGraphTrait for TypoGraph {
) {
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost);
}
fn label_for_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<String> {
let TypoEdge { term, nbr_typos: _ } = edge;
let term = ctx.term_interner.get(*term);
let QueryTerm {
original: _,
is_ngram: _,
is_prefix: _,
phrase,
zero_typo,
prefix_of,
synonyms,
split_words,
one_typo,
two_typos,
use_prefix_db,
} = term;
let mut s = String::new();
if let Some(phrase) = phrase {
let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner);
writeln!(&mut s, "\"{phrase}\" : phrase").unwrap();
}
if let Some(w) = zero_typo {
let w = ctx.word_interner.get(*w);
writeln!(&mut s, "\"{w}\" : 0 typo").unwrap();
}
for w in prefix_of.iter() {
let w = ctx.word_interner.get(*w);
writeln!(&mut s, "\"{w}\" : prefix").unwrap();
}
for w in one_typo.iter() {
let w = ctx.word_interner.get(*w);
writeln!(&mut s, "\"{w}\" : 1 typo").unwrap();
}
for w in two_typos.iter() {
let w = ctx.word_interner.get(*w);
writeln!(&mut s, "\"{w}\" : 2 typos").unwrap();
}
if let Some(phrase) = split_words {
let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner);
writeln!(&mut s, "\"{phrase}\" : split words").unwrap();
}
for phrase in synonyms.iter() {
let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner);
writeln!(&mut s, "\"{phrase}\" : synonym").unwrap();
}
if let Some(w) = use_prefix_db {
let w = ctx.word_interner.get(*w);
writeln!(&mut s, "\"{w}\" : use prefix db").unwrap();
}
Ok(s)
}
fn words_used_by_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<HashSet<Interned<String>>> {
let TypoEdge { term, .. } = edge;
let term = ctx.term_interner.get(*term);
Ok(HashSet::from_iter(term.all_single_words_except_prefix_db()))
}
fn phrases_used_by_edge_condition<'ctx>(
ctx: &mut SearchContext<'ctx>,
edge: &Self::EdgeCondition,
) -> Result<HashSet<Interned<Phrase>>> {
let TypoEdge { term, .. } = edge;
let term = ctx.term_interner.get(*term);
Ok(HashSet::from_iter(term.all_phrases()))
}
}