Compute an exact count when using distinct

This commit is contained in:
ManyTheFish
2022-07-18 16:52:45 +02:00
parent a396806343
commit d71bc1e69f
5 changed files with 72 additions and 25 deletions

View File

@ -3,32 +3,35 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, Context};
use crate::search::query_tree::Operation;
use crate::search::Distinct;
use crate::Result;
pub struct Initial<'t> {
pub struct Initial<'t, D> {
ctx: &'t dyn Context<'t>,
answer: Option<CriterionResult>,
exhaustive_number_hits: bool,
distinct: Option<D>,
}
impl<'t> Initial<'t> {
impl<'t, D> Initial<'t, D> {
pub fn new(
ctx: &'t dyn Context<'t>,
query_tree: Option<Operation>,
filtered_candidates: Option<RoaringBitmap>,
exhaustive_number_hits: bool,
) -> Initial {
distinct: Option<D>,
) -> Initial<D> {
let answer = CriterionResult {
query_tree,
candidates: None,
filtered_candidates,
bucket_candidates: None,
};
Initial { ctx, answer: Some(answer), exhaustive_number_hits }
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
}
}
impl Criterion for Initial<'_> {
impl<D: Distinct> Criterion for Initial<'_, D> {
#[logging_timer::time("Initial::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
self.answer
@ -41,8 +44,20 @@ impl Criterion for Initial<'_> {
&mut params.wdcache,
)?;
answer.candidates = Some(candidates.clone());
answer.bucket_candidates = Some(candidates);
let bucket_candidates = match &mut self.distinct {
// may be really time consuming
Some(distinct) => {
let mut bucket_candidates = RoaringBitmap::new();
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
bucket_candidates.insert(c?);
}
bucket_candidates
}
None => candidates.clone(),
};
answer.candidates = Some(candidates);
answer.bucket_candidates = Some(bucket_candidates);
}
Ok(answer)
})