Make bucket candidates optionals

This commit is contained in:
many
2021-05-05 20:46:56 +02:00
parent c620626515
commit e923d51b8f
9 changed files with 276 additions and 346 deletions

View File

@@ -94,7 +94,6 @@ impl<'t> Criterion for AscDesc<'t> {
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates_is_some = candidates.is_some();
self.query_tree = query_tree;
let candidates = match (&self.query_tree, candidates) {
(_, Some(mut candidates)) => {
@@ -103,7 +102,7 @@ impl<'t> Criterion for AscDesc<'t> {
},
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), params.wdcache)?;
let mut candidates = resolve_query_tree(&context, qt, params.wdcache)?;
candidates -= params.excluded_candidates;
candidates.intersect_with(&self.faceted_candidates);
candidates
@@ -111,15 +110,9 @@ impl<'t> Criterion for AscDesc<'t> {
(None, None) => take(&mut self.faceted_candidates),
};
// If our parent returns candidates it means that the bucket
// candidates were already computed before and we can use them.
//
// If not, we must use the just computed candidates as our bucket
// candidates.
if candidates_is_some {
self.bucket_candidates.union_with(&bucket_candidates);
} else {
self.bucket_candidates.union_with(&candidates);
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
if candidates.is_empty() {
@@ -143,7 +136,7 @@ impl<'t> Criterion for AscDesc<'t> {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(),
candidates: Some(candidates),
bucket_candidates: take(&mut self.bucket_candidates),
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
}

View File

@@ -24,13 +24,12 @@ const LEVEL_EXPONENTIATION_BASE: u32 = 4;
/// the system to choose between one algorithm or another.
const CANDIDATES_THRESHOLD: u64 = 1000;
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
pub struct Attribute<'t> {
ctx: &'t dyn Context<'t>,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
bucket_candidates: RoaringBitmap,
parent: Box<dyn Criterion + 't>,
flattened_query_tree: Option<Vec<Vec<Vec<Query>>>>,
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
}
@@ -38,11 +37,9 @@ impl<'t> Attribute<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
Attribute {
ctx,
query_tree: None,
candidates: None,
state: None,
bucket_candidates: RoaringBitmap::new(),
parent,
flattened_query_tree: None,
current_buckets: None,
}
}
@@ -52,29 +49,25 @@ impl<'t> Criterion for Attribute<'t> {
#[logging_timer::time("Attribute::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop.
if let Some(candidates) = self.candidates.as_mut() {
*candidates -= params.excluded_candidates;
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
*allowed_candidates -= params.excluded_candidates;
}
loop {
match (&self.query_tree, &mut self.candidates) {
(_, Some(candidates)) if candidates.is_empty() => {
match self.state.take() {
Some((query_tree, _, allowed_candidates)) if allowed_candidates.is_empty() => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(),
candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates),
query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()),
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
(Some(qt), Some(candidates)) => {
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| {
flatten_query_tree(&qt)
});
let found_candidates = if candidates.len() < CANDIDATES_THRESHOLD {
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
let current_buckets = match self.current_buckets.as_mut() {
Some(current_buckets) => current_buckets,
None => {
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?;
let new_buckets = linear_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates)?;
self.current_buckets.get_or_insert(new_buckets.into_iter())
},
};
@@ -83,62 +76,60 @@ impl<'t> Criterion for Attribute<'t> {
Some((_score, candidates)) => candidates,
None => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(),
candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates),
query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()),
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
}
} else {
match set_compute_candidates(self.ctx, flattened_query_tree, candidates, params.wdcache)? {
match set_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates, params.wdcache)? {
Some(candidates) => candidates,
None => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(),
candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates),
query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()),
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
}
};
candidates.difference_with(&found_candidates);
allowed_candidates -= &found_candidates;
self.state = Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(),
query_tree: Some(query_tree),
candidates: Some(found_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
(Some(qt), None) => {
let mut query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), params.wdcache)?;
query_tree_candidates -= params.excluded_candidates;
self.bucket_candidates |= &query_tree_candidates;
self.candidates = Some(query_tree_candidates);
},
(None, Some(_)) => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(),
candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(None, None) => {
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
let candidates = match candidates {
Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
};
let flattened_query_tree = flatten_query_tree(&query_tree);
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
self.state = Some((query_tree, flattened_query_tree, candidates));
self.current_buckets = None;
},
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
candidates,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_tree = query_tree;
self.candidates = candidates;
self.bucket_candidates |= bucket_candidates;
self.flattened_query_tree = None;
self.current_buckets = None;
},
None => return Ok(None),
}
},
@@ -467,7 +458,7 @@ impl<'t, 'q> Eq for Branch<'t, 'q> {}
fn initialize_query_level_iterators<'t, 'q>(
ctx: &'t dyn Context<'t>,
branches: &'q Vec<Vec<Vec<Query>>>,
branches: &'q FlattenedQueryTree,
allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
@@ -517,7 +508,7 @@ fn initialize_query_level_iterators<'t, 'q>(
fn set_compute_candidates<'t>(
ctx: &'t dyn Context<'t>,
branches: &Vec<Vec<Vec<Query>>>,
branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Option<RoaringBitmap>>
@@ -570,11 +561,11 @@ fn set_compute_candidates<'t>(
fn linear_compute_candidates(
ctx: &dyn Context,
branches: &Vec<Vec<Vec<Query>>>,
branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap,
) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>>
{
fn compute_candidate_rank(branches: &Vec<Vec<Vec<Query>>>, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
let mut min_rank = u64::max_value();
for branch in branches {
@@ -659,10 +650,10 @@ fn linear_compute_candidates(
}
// TODO can we keep refs of Query
fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
use crate::search::criteria::Operation::{And, Or, Consecutive};
fn and_recurse(head: &Operation, tail: &[Operation]) -> Vec<Vec<Vec<Query>>> {
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
match tail.split_first() {
Some((thead, tail)) => {
let tail = and_recurse(thead, tail);
@@ -680,7 +671,7 @@ fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
}
}
fn recurse(op: &Operation) -> Vec<Vec<Vec<Query>>> {
fn recurse(op: &Operation) -> FlattenedQueryTree {
match op {
And(ops) | Consecutive(ops) => {
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))

View File

@@ -1,4 +1,4 @@
use std::{collections::HashMap, mem};
use std::mem::take;
use log::debug;
use roaring::RoaringBitmap;
@@ -60,13 +60,13 @@ impl<'t> Criterion for Exactness<'t> {
self.query_tree = None;
},
Some(state) => {
let (candidates, state) = resolve_state(self.ctx, mem::take(state), &self.query)?;
let (candidates, state) = resolve_state(self.ctx, take(state), &self.query)?;
self.state = state;
return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(),
candidates: Some(candidates),
bucket_candidates: mem::take(&mut self.bucket_candidates),
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
None => {
@@ -74,11 +74,16 @@ impl<'t> Criterion for Exactness<'t> {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
let candidates = match candidates {
Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, &mut HashMap::new(), params.wdcache)?,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)?,
};
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
self.state = Some(State::new(candidates));
self.query_tree = Some(query_tree);
self.bucket_candidates |= bucket_candidates;
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {

View File

@@ -1,5 +1,3 @@
use std::collections::HashMap;
use log::debug;
use roaring::RoaringBitmap;
@@ -41,19 +39,15 @@ impl<'t> Final<'t> {
};
match self.parent.next(&mut criterion_parameters)? {
Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => {
let candidates = match candidates {
Some(candidates) => candidates,
None => {
let candidates = match query_tree.as_ref() {
Some(qt) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
None => self.ctx.documents_ids()?,
};
bucket_candidates |= &candidates;
candidates
}
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates = match (candidates, query_tree.as_ref()) {
(Some(candidates), _) => candidates,
(None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)?,
(None, None) => self.ctx.documents_ids()?,
};
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
self.returned_candidates |= &candidates;
return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }));

View File

@@ -12,8 +12,8 @@ impl Initial {
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial {
let answer = CriterionResult {
query_tree,
candidates: candidates.clone(),
bucket_candidates: candidates.take().unwrap_or_default(),
candidates: candidates.take(),
bucket_candidates: None,
};
Initial { answer: Some(answer) }
}

View File

@@ -39,7 +39,7 @@ pub struct CriterionResult {
/// if None, it is up to the child to compute the candidates itself.
candidates: Option<RoaringBitmap>,
/// Candidates that comes from the current bucket of the initial criterion.
bucket_candidates: RoaringBitmap,
bucket_candidates: Option<RoaringBitmap>,
}
#[derive(Debug, PartialEq)]
@@ -57,15 +57,6 @@ enum Candidates {
Forbidden(RoaringBitmap)
}
impl Candidates {
fn into_inner(self) -> RoaringBitmap {
match self {
Self::Allowed(inner) => inner,
Self::Forbidden(inner) => inner,
}
}
}
impl Default for Candidates {
fn default() -> Self {
Self::Forbidden(RoaringBitmap::new())
@@ -236,14 +227,12 @@ impl<'t> CriteriaBuilder<'t> {
pub fn resolve_query_tree<'t>(
ctx: &'t dyn Context,
query_tree: &Operation,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap>
{
fn resolve_operation<'t>(
ctx: &'t dyn Context,
query_tree: &Operation,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap>
{
@@ -252,7 +241,7 @@ pub fn resolve_query_tree<'t>(
match query_tree {
And(ops) => {
let mut ops = ops.iter().map(|op| {
resolve_operation(ctx, op, cache, wdcache)
resolve_operation(ctx, op, wdcache)
}).collect::<anyhow::Result<Vec<_>>>()?;
ops.sort_unstable_by_key(|cds| cds.len());
@@ -296,7 +285,7 @@ pub fn resolve_query_tree<'t>(
Or(_, ops) => {
let mut candidates = RoaringBitmap::new();
for op in ops {
let docids = resolve_operation(ctx, op, cache, wdcache)?;
let docids = resolve_operation(ctx, op, wdcache)?;
candidates.union_with(&docids);
}
Ok(candidates)
@@ -305,7 +294,7 @@ pub fn resolve_query_tree<'t>(
}
}
resolve_operation(ctx, query_tree, cache, wdcache)
resolve_operation(ctx, query_tree, wdcache)
}

View File

@@ -30,8 +30,8 @@ const PROXIMITY_THRESHOLD: u8 = 0;
pub struct Proximity<'t> {
ctx: &'t dyn Context<'t>,
/// ((max_proximity, query_tree), allowed_candidates)
state: Option<(Option<(usize, Operation)>, RoaringBitmap)>,
/// (max_proximity, query_tree, allowed_candidates)
state: Option<(u8, Operation, RoaringBitmap)>,
proximity: u8,
bucket_candidates: RoaringBitmap,
parent: Box<dyn Criterion + 't>,
@@ -57,114 +57,90 @@ impl<'t> Criterion for Proximity<'t> {
#[logging_timer::time("Proximity::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop.
if let Some((_, candidates)) = self.state.as_mut() {
*candidates -= params.excluded_candidates;
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
*allowed_candidates -= params.excluded_candidates;
}
loop {
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
self.proximity,
self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)),
self.state.as_ref().map(|(_, cd)| cd),
self.state.as_ref().map(|(mp, _, _)| mp),
self.state.as_ref().map(|(_, _, cd)| cd),
);
match &mut self.state {
Some((_, candidates)) if candidates.is_empty() => {
Some((max_prox, _, allowed_candidates)) if allowed_candidates.is_empty() || self.proximity > *max_prox => {
self.state = None; // reset state
},
Some((Some((max_prox, query_tree)), candidates)) => {
if self.proximity as usize > *max_prox {
self.state = None; // reset state
} else {
let mut new_candidates = if candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
if let Some(cache) = self.plane_sweep_cache.as_mut() {
match cache.next() {
Some((p, candidates)) => {
self.proximity = p;
candidates
},
None => {
self.state = None; // reset state
continue
},
}
} else {
let cache = resolve_plane_sweep_candidates(
self.ctx,
query_tree,
candidates,
params.wdcache,
)?;
self.plane_sweep_cache = Some(cache.into_iter());
continue
Some((_, query_tree, allowed_candidates)) => {
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
if let Some(cache) = self.plane_sweep_cache.as_mut() {
match cache.next() {
Some((p, candidates)) => {
self.proximity = p;
candidates
},
None => {
self.state = None; // reset state
continue
},
}
} else { // use set theory based algorithm
resolve_candidates(
self.ctx,
&query_tree,
self.proximity,
&mut self.candidates_cache,
params.wdcache,
)?
};
} else {
let cache = resolve_plane_sweep_candidates(
self.ctx,
query_tree,
allowed_candidates,
params.wdcache,
)?;
self.plane_sweep_cache = Some(cache.into_iter());
new_candidates.intersect_with(&candidates);
candidates.difference_with(&new_candidates);
self.proximity += 1;
continue
}
} else { // use set theory based algorithm
resolve_candidates(
self.ctx,
&query_tree,
self.proximity,
&mut self.candidates_cache,
params.wdcache,
)?
};
new_candidates &= &*allowed_candidates;
*allowed_candidates -= &new_candidates;
self.proximity += 1;
return Ok(Some(CriterionResult {
query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
Some((None, candidates)) => {
let candidates = take(candidates);
self.state = None; // reset state
return Ok(Some(CriterionResult {
query_tree: None,
candidates: Some(candidates.clone()),
bucket_candidates: candidates,
query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates),
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates_is_some = candidates.is_some();
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let candidates = resolve_query_tree(self.ctx, qt, &mut HashMap::new(), params.wdcache)?;
candidates - params.excluded_candidates
},
(None, None) => RoaringBitmap::new(),
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
let candidates = match candidates {
Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
};
// If our parent returns candidates it means that the bucket
// candidates were already computed before and we can use them.
//
// If not, we must use the just computed candidates as our bucket
// candidates.
if candidates_is_some {
self.bucket_candidates.union_with(&bucket_candidates);
} else {
self.bucket_candidates.union_with(&candidates);
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
let query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
self.state = Some((query_tree, candidates));
let maximum_proximity = maximum_proximity(&query_tree);
self.state = Some((maximum_proximity as u8, query_tree, candidates));
self.proximity = 0;
self.plane_sweep_cache = None;
},
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
bucket_candidates,
}));
},
None => return Ok(None),
}
},

View File

@@ -13,15 +13,19 @@ use super::{
CriterionParameters,
CriterionResult,
query_docids,
query_pair_proximity_docids
query_pair_proximity_docids,
resolve_query_tree,
};
/// Maximum number of typo for a word of any length.
const MAX_TYPOS_PER_WORD: u8 = 2;
pub struct Typo<'t> {
ctx: &'t dyn Context<'t>,
query_tree: Option<(usize, Operation)>,
number_typos: u8,
candidates: Candidates,
bucket_candidates: RoaringBitmap,
/// (max_typos, query_tree, candidates)
state: Option<(u8, Operation, Candidates)>,
typos: u8,
bucket_candidates: Option<RoaringBitmap>,
parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
}
@@ -30,10 +34,9 @@ impl<'t> Typo<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
Typo {
ctx,
query_tree: None,
number_typos: 0,
candidates: Candidates::default(),
bucket_candidates: RoaringBitmap::new(),
state: None,
typos: 0,
bucket_candidates: None,
parent,
candidates_cache: HashMap::new(),
}
@@ -45,113 +48,101 @@ impl<'t> Criterion for Typo<'t> {
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
use Candidates::{Allowed, Forbidden};
// remove excluded candidates when next is called, instead of doing it in the loop.
match &mut self.candidates {
Allowed(candidates) => *candidates -= params.excluded_candidates,
Forbidden(candidates) => *candidates |= params.excluded_candidates,
match self.state.as_mut() {
Some((_, _, Allowed(candidates))) => *candidates -= params.excluded_candidates,
Some((_, _, Forbidden(candidates))) => *candidates |= params.excluded_candidates,
None => (),
}
loop {
debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates);
debug!("Typo at iteration {} (max typos {:?}) ({:?})",
self.typos,
self.state.as_ref().map(|(mt, _, _)| mt),
self.state.as_ref().map(|(_, _, cd)| cd),
);
match (&mut self.query_tree, &mut self.candidates) {
(_, Allowed(candidates)) if candidates.is_empty() => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take().map(|(_, qt)| qt),
candidates: Some(take(&mut self.candidates).into_inner()),
bucket_candidates: take(&mut self.bucket_candidates),
}));
match self.state.as_mut() {
Some((max_typos, _, _)) if self.typos > *max_typos => {
self.state = None; // reset state
},
(Some((max_typos, query_tree)), Allowed(candidates)) => {
if self.number_typos as usize > *max_typos {
self.query_tree = None;
self.candidates = Candidates::default();
} else {
let fst = self.ctx.words_fst();
let new_query_tree = if self.number_typos < 2 {
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
} else if self.number_typos == 2 {
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
query_tree.clone()
} else {
query_tree.clone()
Some((_, _, Allowed(allowed_candidates))) if allowed_candidates.is_empty() => {
self.state = None; // reset state
},
Some((_, query_tree, candidates_authorization)) => {
let fst = self.ctx.words_fst();
let new_query_tree = if self.typos < MAX_TYPOS_PER_WORD {
alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?
} else if self.typos == MAX_TYPOS_PER_WORD {
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
// we keep the altered query tree
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?;
// we compute the allowed candidates
let query_tree_allowed_candidates = resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
// we assign the allowed candidates to the candidates authorization.
*candidates_authorization = match take(candidates_authorization) {
Allowed(allowed_candidates) => Allowed(query_tree_allowed_candidates & allowed_candidates),
Forbidden(forbidden_candidates) => Allowed(query_tree_allowed_candidates - forbidden_candidates),
};
let mut new_candidates = resolve_candidates(
self.ctx,
&new_query_tree,
self.number_typos,
&mut self.candidates_cache,
params.wdcache,
)?;
new_candidates.intersect_with(&candidates);
candidates.difference_with(&new_candidates);
self.number_typos += 1;
return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree),
candidates: Some(new_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
(Some((max_typos, query_tree)), Forbidden(candidates)) => {
if self.number_typos as usize > *max_typos {
self.query_tree = None;
self.candidates = Candidates::default();
query_tree.clone()
} else {
let fst = self.ctx.words_fst();
let new_query_tree = if self.number_typos < 2 {
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
} else if self.number_typos == 2 {
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
query_tree.clone()
} else {
query_tree.clone()
};
query_tree.clone()
};
let mut new_candidates = resolve_candidates(
self.ctx,
&new_query_tree,
self.number_typos,
&mut self.candidates_cache,
params.wdcache,
)?;
new_candidates.difference_with(&candidates);
candidates.union_with(&new_candidates);
self.number_typos += 1;
self.bucket_candidates.union_with(&new_candidates);
let mut candidates = resolve_candidates(
self.ctx,
&new_query_tree,
self.typos,
&mut self.candidates_cache,
params.wdcache,
)?;
return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree),
candidates: Some(new_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
(None, Allowed(_)) => {
let candidates = take(&mut self.candidates).into_inner();
return Ok(Some(CriterionResult {
query_tree: None,
candidates: Some(candidates.clone()),
bucket_candidates: candidates,
}));
},
(None, Forbidden(_)) => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
bucket_candidates,
}));
match candidates_authorization {
Allowed(allowed_candidates) => {
candidates &= &*allowed_candidates;
*allowed_candidates -= &candidates;
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
self.number_typos = 0;
self.candidates = candidates.map_or_else(|| {
Forbidden(forbidden_candidates) => {
candidates -= &*forbidden_candidates;
*forbidden_candidates |= &candidates;
},
}
let bucket_candidates = match self.bucket_candidates.as_mut() {
Some(bucket_candidates) => take(bucket_candidates),
None => candidates.clone(),
};
self.typos += 1;
return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree),
candidates: Some(candidates),
bucket_candidates: Some(bucket_candidates),
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
let candidates = candidates.map_or_else(|| {
Candidates::Forbidden(params.excluded_candidates.clone())
}, Candidates::Allowed);
self.bucket_candidates.union_with(&bucket_candidates);
let maximum_typos = maximum_typo(&query_tree) as u8;
self.state = Some((maximum_typos, query_tree, candidates));
self.typos = 0;
},
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
bucket_candidates,
}));
},
None => return Ok(None),
}
@@ -185,7 +176,6 @@ fn alterate_query_tree(
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
},
Operation::Query(q) => {
// TODO may be optimized when number_typos == 0
if let QueryKind::Tolerant { typo, word } = &q.kind {
// if no typo is allowed we don't call word_derivations function,
// and directly create an Exact query
@@ -384,7 +374,7 @@ mod test {
]),
])),
candidates: Some(candidates_1.clone()),
bucket_candidates: candidates_1,
bucket_candidates: Some(candidates_1),
};
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
@@ -406,7 +396,7 @@ mod test {
]),
])),
candidates: Some(candidates_2.clone()),
bucket_candidates: candidates_2,
bucket_candidates: Some(candidates_2),
};
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
@@ -428,7 +418,7 @@ mod test {
let expected = CriterionResult {
query_tree: None,
candidates: Some(facet_candidates.clone()),
bucket_candidates: facet_candidates,
bucket_candidates: None,
};
// first iteration, returns the facet candidates
@@ -471,7 +461,7 @@ mod test {
]),
])),
candidates: Some(&candidates_1 & &facet_candidates),
bucket_candidates: facet_candidates.clone(),
bucket_candidates: Some(&candidates_1 & &facet_candidates),
};
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
@@ -493,7 +483,7 @@ mod test {
]),
])),
candidates: Some(&candidates_2 & &facet_candidates),
bucket_candidates: RoaringBitmap::new(),
bucket_candidates: Some(&candidates_2 & &facet_candidates),
};
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));

View File

@@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::mem::take;
use log::debug;
@@ -11,9 +10,9 @@ pub struct Words<'t> {
ctx: &'t dyn Context<'t>,
query_trees: Vec<Operation>,
candidates: Option<RoaringBitmap>,
bucket_candidates: RoaringBitmap,
bucket_candidates: Option<RoaringBitmap>,
parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
compute_candidates: bool,
}
impl<'t> Words<'t> {
@@ -22,9 +21,9 @@ impl<'t> Words<'t> {
ctx,
query_trees: Vec::default(),
candidates: None,
bucket_candidates: RoaringBitmap::new(),
bucket_candidates: None,
parent,
candidates_cache: HashMap::default(),
compute_candidates: false,
}
}
}
@@ -40,55 +39,48 @@ impl<'t> Criterion for Words<'t> {
loop {
debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates);
match (self.query_trees.pop(), &mut self.candidates) {
(query_tree, Some(candidates)) if candidates.is_empty() => {
self.query_trees = Vec::new();
return Ok(Some(CriterionResult {
query_tree,
candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(Some(qt), Some(candidates)) => {
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, params.wdcache)?;
found_candidates.intersect_with(&candidates);
candidates.difference_with(&found_candidates);
match self.query_trees.pop() {
Some(query_tree) => {
let candidates = match self.candidates.as_mut() {
Some(allowed_candidates) if self.compute_candidates => {
let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
candidates &= &*allowed_candidates;
*allowed_candidates -= &candidates;
Some(candidates)
},
candidates => candidates.cloned(),
};
let bucket_candidates = match self.bucket_candidates.as_mut() {
Some(bucket_candidates) => Some(take(bucket_candidates)),
None => None,
};
return Ok(Some(CriterionResult {
query_tree: Some(qt),
candidates: Some(found_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
query_tree: Some(query_tree),
candidates,
bucket_candidates,
}));
},
(Some(qt), None) => {
return Ok(Some(CriterionResult {
query_tree: Some(qt),
candidates: None,
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(None, Some(_)) => {
let candidates = self.candidates.take();
return Ok(Some(CriterionResult {
query_tree: None,
candidates: candidates.clone(),
bucket_candidates: candidates.unwrap_or_default(),
}));
},
(None, None) => {
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
self.query_trees = explode_query_tree(query_tree);
self.candidates = candidates;
self.compute_candidates = bucket_candidates.is_some();
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
},
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
candidates,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
self.candidates = candidates;
self.bucket_candidates.union_with(&bucket_candidates);
},
None => return Ok(None),
}
},