Introduce an initial candidates set that makes the difference between an exhaustive count and an estimation

This commit is contained in:
ManyTheFish
2022-12-07 18:29:25 +01:00
parent 6d50ea0830
commit 55724f2412
11 changed files with 180 additions and 101 deletions

View File

@ -1,7 +1,7 @@
use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, Context};
use crate::search::criteria::{resolve_query_tree, Context, InitialCandidates};
use crate::search::query_tree::Operation;
use crate::search::Distinct;
use crate::Result;
@ -27,7 +27,7 @@ impl<'t, D> Initial<'t, D> {
query_tree,
candidates: None,
filtered_candidates,
bucket_candidates: None,
initial_candidates: None,
};
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
}
@ -41,32 +41,34 @@ impl<D: Distinct> Criterion for Initial<'_, D> {
.map(|mut answer| {
if self.exhaustive_number_hits && answer.query_tree.is_some() {
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
// then remove the potential soft deleted documents.
let mut candidates = resolve_query_tree(
self.ctx,
answer.query_tree.as_ref().unwrap(),
params.wdcache,
)?;
)? - params.excluded_candidates;
// Apply the filters on the documents retrieved with the query tree.
if let Some(ref filtered_candidates) = answer.filtered_candidates {
candidates &= filtered_candidates;
}
// because the bucket_candidates should be an exhaustive count of the matching documents,
// because the initial_candidates should be an exhaustive count of the matching documents,
// we precompute the distinct attributes.
let bucket_candidates = match &mut self.distinct {
let initial_candidates = match &mut self.distinct {
Some(distinct) => {
let mut bucket_candidates = RoaringBitmap::new();
let mut initial_candidates = RoaringBitmap::new();
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
bucket_candidates.insert(c?);
initial_candidates.insert(c?);
}
bucket_candidates
initial_candidates
}
None => candidates.clone(),
};
answer.candidates = Some(candidates);
answer.bucket_candidates = Some(bucket_candidates);
answer.initial_candidates =
Some(InitialCandidates::Exhaustive(initial_candidates));
}
Ok(answer)
})