mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge pull request #173 from meilisearch/enhance-distinct-attributes
Remove excluded document in criteria iterations
This commit is contained in:
		| @@ -12,9 +12,8 @@ use crate::heed_codec::facet::FieldDocIdFacetF64Codec; | ||||
| use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; | ||||
| use crate::search::facet::FacetIter; | ||||
| use crate::search::query_tree::Operation; | ||||
| use crate::search::WordDerivationsCache; | ||||
| use crate::{FieldsIdsMap, FieldId, Index}; | ||||
| use super::{Criterion, CriterionResult}; | ||||
| use super::{Criterion, CriterionParameters, CriterionResult}; | ||||
|  | ||||
| /// Threshold on the number of candidates that will make | ||||
| /// the system to choose between one algorithm or another. | ||||
| @@ -85,7 +84,7 @@ impl<'t> AscDesc<'t> { | ||||
|  | ||||
| impl<'t> Criterion for AscDesc<'t> { | ||||
|     #[logging_timer::time("AscDesc::{}")] | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         loop { | ||||
|             debug!("Facet {}({}) iteration", | ||||
|                 if self.ascending { "Asc" } else { "Desc" }, self.field_name | ||||
| @@ -93,7 +92,7 @@ impl<'t> Criterion for AscDesc<'t> { | ||||
|  | ||||
|             match self.candidates.next().transpose()? { | ||||
|                 None => { | ||||
|                     match self.parent.next(wdcache)? { | ||||
|                     match self.parent.next(params)? { | ||||
|                         Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { | ||||
|                             let candidates_is_some = candidates.is_some(); | ||||
|                             self.query_tree = query_tree; | ||||
| @@ -104,7 +103,8 @@ impl<'t> Criterion for AscDesc<'t> { | ||||
|                                 }, | ||||
|                                 (Some(qt), None) => { | ||||
|                                     let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; | ||||
|                                     let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), wdcache)?; | ||||
|                                     let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), params.wdcache)?; | ||||
|                                     candidates -= params.excluded_candidates; | ||||
|                                     candidates.intersect_with(&self.faceted_candidates); | ||||
|                                     candidates | ||||
|                                 }, | ||||
| @@ -138,7 +138,8 @@ impl<'t> Criterion for AscDesc<'t> { | ||||
|                         None => return Ok(None), | ||||
|                     } | ||||
|                 }, | ||||
|                 Some(candidates) => { | ||||
|                 Some(mut candidates) => { | ||||
|                     candidates -= params.excluded_candidates; | ||||
|                     return Ok(Some(CriterionResult { | ||||
|                         query_tree: self.query_tree.clone(), | ||||
|                         candidates: Some(candidates), | ||||
|   | ||||
| @@ -9,7 +9,7 @@ use crate::{TreeLevel, search::build_dfa}; | ||||
| use crate::search::criteria::Query; | ||||
| use crate::search::query_tree::{Operation, QueryKind}; | ||||
| use crate::search::{word_derivations, WordDerivationsCache}; | ||||
| use super::{Criterion, CriterionResult, Context, resolve_query_tree}; | ||||
| use super::{Criterion, CriterionParameters, CriterionResult, Context, resolve_query_tree}; | ||||
|  | ||||
| /// To be able to divide integers by the number of words in the query | ||||
| /// we want to find a multiplier that allow us to divide by any number between 1 and 10. | ||||
| @@ -20,6 +20,10 @@ const LCM_10_FIRST_NUMBERS: u32 = 2520; | ||||
| /// we use 4 as the exponentiation base and the level as the exponent. | ||||
| const LEVEL_EXPONENTIATION_BASE: u32 = 4; | ||||
|  | ||||
| /// Threshold on the number of candidates that will make | ||||
| /// the system to choose between one algorithm or another. | ||||
| const CANDIDATES_THRESHOLD: u64 = 1000; | ||||
|  | ||||
| pub struct Attribute<'t> { | ||||
|     ctx: &'t dyn Context<'t>, | ||||
|     query_tree: Option<Operation>, | ||||
| @@ -46,7 +50,12 @@ impl<'t> Attribute<'t> { | ||||
|  | ||||
| impl<'t> Criterion for Attribute<'t> { | ||||
|     #[logging_timer::time("Attribute::{}")] | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         // remove excluded candidates when next is called, instead of doing it in the loop. | ||||
|         if let Some(candidates) = self.candidates.as_mut() { | ||||
|             *candidates -= params.excluded_candidates; | ||||
|         } | ||||
|  | ||||
|         loop { | ||||
|             match (&self.query_tree, &mut self.candidates) { | ||||
|                 (_, Some(candidates)) if candidates.is_empty() => { | ||||
| @@ -61,7 +70,7 @@ impl<'t> Criterion for Attribute<'t> { | ||||
|                         flatten_query_tree(&qt) | ||||
|                     }); | ||||
|  | ||||
|                     let found_candidates = if candidates.len() < 1000 { | ||||
|                     let found_candidates = if candidates.len() < CANDIDATES_THRESHOLD { | ||||
|                         let current_buckets = match self.current_buckets.as_mut() { | ||||
|                             Some(current_buckets) => current_buckets, | ||||
|                             None => { | ||||
| @@ -81,7 +90,7 @@ impl<'t> Criterion for Attribute<'t> { | ||||
|                             }, | ||||
|                         } | ||||
|                     } else { | ||||
|                         match set_compute_candidates(self.ctx, flattened_query_tree, candidates, wdcache)? { | ||||
|                         match set_compute_candidates(self.ctx, flattened_query_tree, candidates, params.wdcache)? { | ||||
|                             Some(candidates) => candidates, | ||||
|                             None => { | ||||
|                                 return Ok(Some(CriterionResult { | ||||
| @@ -102,7 +111,8 @@ impl<'t> Criterion for Attribute<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 (Some(qt), None) => { | ||||
|                     let query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?; | ||||
|                     let mut query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), params.wdcache)?; | ||||
|                     query_tree_candidates -= params.excluded_candidates; | ||||
|                     self.bucket_candidates |= &query_tree_candidates; | ||||
|                     self.candidates = Some(query_tree_candidates); | ||||
|                 }, | ||||
| @@ -114,7 +124,7 @@ impl<'t> Criterion for Attribute<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 (None, None) => { | ||||
|                     match self.parent.next(wdcache)? { | ||||
|                     match self.parent.next(params)? { | ||||
|                         Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { | ||||
|                             return Ok(Some(CriterionResult { | ||||
|                                 query_tree: None, | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::Operation; | ||||
| use crate::search::WordDerivationsCache; | ||||
| use super::{resolve_query_tree, Criterion, CriterionResult, Context}; | ||||
| use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context}; | ||||
|  | ||||
| /// The result of a call to the fetcher. | ||||
| #[derive(Debug, Clone, PartialEq)] | ||||
| @@ -22,27 +22,39 @@ pub struct Final<'t> { | ||||
|     ctx: &'t dyn Context<'t>, | ||||
|     parent: Box<dyn Criterion + 't>, | ||||
|     wdcache: WordDerivationsCache, | ||||
|     returned_candidates: RoaringBitmap, | ||||
| } | ||||
|  | ||||
| impl<'t> Final<'t> { | ||||
|     pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Final<'t> { | ||||
|         Final { ctx, parent, wdcache: WordDerivationsCache::new() } | ||||
|         Final { ctx, parent, wdcache: WordDerivationsCache::new(), returned_candidates: RoaringBitmap::new() } | ||||
|     } | ||||
|  | ||||
|     #[logging_timer::time("Final::{}")] | ||||
|     pub fn next(&mut self) -> anyhow::Result<Option<FinalResult>> { | ||||
|     pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> { | ||||
|         loop { | ||||
|             debug!("Final iteration"); | ||||
|  | ||||
|             match self.parent.next(&mut self.wdcache)? { | ||||
|                 Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => { | ||||
|                     let candidates = match (&query_tree, candidates) { | ||||
|                         (_, Some(candidates)) => candidates, | ||||
|                         (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?, | ||||
|                         (None, None) => self.ctx.documents_ids()?, | ||||
|             let mut criterion_parameters = CriterionParameters { | ||||
|                 wdcache: &mut self.wdcache, | ||||
|                 // returned_candidates is merged with excluded_candidates to avoid duplicas | ||||
|                 excluded_candidates: &(&self.returned_candidates | excluded_candidates), | ||||
|             }; | ||||
|  | ||||
|                     bucket_candidates.union_with(&candidates); | ||||
|             match self.parent.next(&mut criterion_parameters)? { | ||||
|                 Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => { | ||||
|                     let candidates = match candidates { | ||||
|                         Some(candidates) => candidates, | ||||
|                         None => { | ||||
|                             let candidates = match query_tree.as_ref() { | ||||
|                                 Some(qt) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?, | ||||
|                                 None => self.ctx.documents_ids()?, | ||||
|                             }; | ||||
|                             bucket_candidates |= &candidates; | ||||
|                             candidates | ||||
|                         } | ||||
|                     }; | ||||
|  | ||||
|                     self.returned_candidates |= &candidates; | ||||
|  | ||||
|                     return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates })); | ||||
|                 }, | ||||
|   | ||||
| @@ -1,9 +1,8 @@ | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::Operation; | ||||
| use crate::search::WordDerivationsCache; | ||||
|  | ||||
| use super::{Criterion, CriterionResult}; | ||||
| use super::{Criterion, CriterionResult, CriterionParameters}; | ||||
|  | ||||
| pub struct Initial { | ||||
|     answer: Option<CriterionResult> | ||||
| @@ -22,7 +21,7 @@ impl Initial { | ||||
|  | ||||
| impl Criterion for Initial { | ||||
|     #[logging_timer::time("Initial::{}")] | ||||
|     fn next(&mut self, _: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, _: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         Ok(self.answer.take()) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -25,7 +25,7 @@ mod words; | ||||
| pub mod r#final; | ||||
|  | ||||
| pub trait Criterion { | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>>; | ||||
|     fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>>; | ||||
| } | ||||
|  | ||||
| /// The result of a call to the parent criterion. | ||||
| @@ -40,6 +40,12 @@ pub struct CriterionResult { | ||||
|     bucket_candidates: RoaringBitmap, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq)] | ||||
| pub struct CriterionParameters<'a> { | ||||
|     wdcache: &'a mut WordDerivationsCache, | ||||
|     excluded_candidates: &'a RoaringBitmap, | ||||
| } | ||||
|  | ||||
| /// Either a set of candidates that defines the candidates | ||||
| /// that are allowed to be returned, | ||||
| /// or the candidates that must never be returned. | ||||
|   | ||||
| @@ -8,10 +8,26 @@ use log::debug; | ||||
| use crate::{DocumentId, Position, search::{query_tree::QueryKind}}; | ||||
| use crate::search::query_tree::{maximum_proximity, Operation, Query}; | ||||
| use crate::search::{build_dfa, WordDerivationsCache}; | ||||
| use super::{Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree}; | ||||
| use super::{ | ||||
|     Context, | ||||
|     Criterion, | ||||
|     CriterionParameters, | ||||
|     CriterionResult, | ||||
|     query_docids, | ||||
|     query_pair_proximity_docids, | ||||
|     resolve_query_tree, | ||||
| }; | ||||
|  | ||||
| type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>; | ||||
|  | ||||
| /// Threshold on the number of candidates that will make | ||||
| /// the system choose between one algorithm or another. | ||||
| const CANDIDATES_THRESHOLD: u64 = 1000; | ||||
|  | ||||
| /// Threshold on the number of proximity that will make | ||||
| /// the system choose between one algorithm or another. | ||||
| const PROXIMITY_THRESHOLD: u8 = 0; | ||||
|  | ||||
| pub struct Proximity<'t> { | ||||
|     ctx: &'t dyn Context<'t>, | ||||
|     /// ((max_proximity, query_tree), allowed_candidates) | ||||
| @@ -39,7 +55,12 @@ impl<'t> Proximity<'t> { | ||||
|  | ||||
| impl<'t> Criterion for Proximity<'t> { | ||||
|     #[logging_timer::time("Proximity::{}")] | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         // remove excluded candidates when next is called, instead of doing it in the loop. | ||||
|         if let Some((_, candidates)) = self.state.as_mut() { | ||||
|             *candidates -= params.excluded_candidates; | ||||
|         } | ||||
|  | ||||
|         loop { | ||||
|             debug!("Proximity at iteration {} (max prox {:?}) ({:?})", | ||||
|                 self.proximity, | ||||
| @@ -55,7 +76,7 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                     if self.proximity as usize > *max_prox { | ||||
|                         self.state = None; // reset state | ||||
|                     } else { | ||||
|                         let mut new_candidates = if candidates.len() <= 1000 && self.proximity > 0 { | ||||
|                         let mut new_candidates = if candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD { | ||||
|                             if let Some(cache) = self.plane_sweep_cache.as_mut() { | ||||
|                                 match cache.next() { | ||||
|                                     Some((p, candidates)) => { | ||||
| @@ -72,7 +93,7 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                                     self.ctx, | ||||
|                                     query_tree, | ||||
|                                     candidates, | ||||
|                                     wdcache, | ||||
|                                     params.wdcache, | ||||
|                                 )?; | ||||
|                                 self.plane_sweep_cache = Some(cache.into_iter()); | ||||
|  | ||||
| @@ -84,7 +105,7 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                                &query_tree, | ||||
|                                self.proximity, | ||||
|                                &mut self.candidates_cache, | ||||
|                                wdcache, | ||||
|                                params.wdcache, | ||||
|                            )? | ||||
|                         }; | ||||
|  | ||||
| @@ -109,7 +130,7 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 None => { | ||||
|                     match self.parent.next(wdcache)? { | ||||
|                     match self.parent.next(params)? { | ||||
|                         Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { | ||||
|                             return Ok(Some(CriterionResult { | ||||
|                                 query_tree: None, | ||||
| @@ -121,7 +142,10 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                             let candidates_is_some = candidates.is_some(); | ||||
|                             let candidates = match (&query_tree, candidates) { | ||||
|                                 (_, Some(candidates)) => candidates, | ||||
|                                 (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?, | ||||
|                                 (Some(qt), None) => { | ||||
|                                     let candidates = resolve_query_tree(self.ctx, qt, &mut HashMap::new(), params.wdcache)?; | ||||
|                                     candidates - params.excluded_candidates | ||||
|                                 }, | ||||
|                                 (None, None) => RoaringBitmap::new(), | ||||
|                             }; | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,15 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind}; | ||||
| use crate::search::{word_derivations, WordDerivationsCache}; | ||||
| use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids}; | ||||
| use super::{ | ||||
|     Candidates, | ||||
|     Context, | ||||
|     Criterion, | ||||
|     CriterionParameters, | ||||
|     CriterionResult, | ||||
|     query_docids, | ||||
|     query_pair_proximity_docids | ||||
| }; | ||||
|  | ||||
| pub struct Typo<'t> { | ||||
|     ctx: &'t dyn Context<'t>, | ||||
| @@ -34,8 +42,14 @@ impl<'t> Typo<'t> { | ||||
|  | ||||
| impl<'t> Criterion for Typo<'t> { | ||||
|     #[logging_timer::time("Typo::{}")] | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         use Candidates::{Allowed, Forbidden}; | ||||
|         // remove excluded candidates when next is called, instead of doing it in the loop. | ||||
|         match &mut self.candidates { | ||||
|             Allowed(candidates) => *candidates -= params.excluded_candidates, | ||||
|             Forbidden(candidates) => *candidates |= params.excluded_candidates, | ||||
|         } | ||||
|  | ||||
|         loop { | ||||
|             debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates); | ||||
|  | ||||
| @@ -54,9 +68,9 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                     } else { | ||||
|                         let fst = self.ctx.words_fst(); | ||||
|                         let new_query_tree = if self.number_typos < 2 { | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)? | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)? | ||||
|                         } else if self.number_typos == 2 { | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)?; | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?; | ||||
|                             query_tree.clone() | ||||
|                         } else { | ||||
|                             query_tree.clone() | ||||
| @@ -67,7 +81,7 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                             &new_query_tree, | ||||
|                             self.number_typos, | ||||
|                             &mut self.candidates_cache, | ||||
|                             wdcache, | ||||
|                             params.wdcache, | ||||
|                         )?; | ||||
|                         new_candidates.intersect_with(&candidates); | ||||
|                         candidates.difference_with(&new_candidates); | ||||
| @@ -87,9 +101,9 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                     } else { | ||||
|                         let fst = self.ctx.words_fst(); | ||||
|                         let new_query_tree = if self.number_typos < 2 { | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)? | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)? | ||||
|                         } else if self.number_typos == 2 { | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)?; | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?; | ||||
|                             query_tree.clone() | ||||
|                         } else { | ||||
|                             query_tree.clone() | ||||
| @@ -100,7 +114,7 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                             &new_query_tree, | ||||
|                             self.number_typos, | ||||
|                             &mut self.candidates_cache, | ||||
|                             wdcache, | ||||
|                             params.wdcache, | ||||
|                         )?; | ||||
|                         new_candidates.difference_with(&candidates); | ||||
|                         candidates.union_with(&new_candidates); | ||||
| @@ -123,7 +137,7 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 (None, Forbidden(_)) => { | ||||
|                     match self.parent.next(wdcache)? { | ||||
|                     match self.parent.next(params)? { | ||||
|                         Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { | ||||
|                             return Ok(Some(CriterionResult { | ||||
|                                 query_tree: None, | ||||
| @@ -134,7 +148,9 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                         Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { | ||||
|                             self.query_tree = query_tree.map(|op| (maximum_typo(&op), op)); | ||||
|                             self.number_typos = 0; | ||||
|                             self.candidates = candidates.map_or_else(Candidates::default, Candidates::Allowed); | ||||
|                             self.candidates = candidates.map_or_else(|| { | ||||
|                                 Candidates::Forbidden(params.excluded_candidates.clone()) | ||||
|                             }, Candidates::Allowed); | ||||
|                             self.bucket_candidates.union_with(&bucket_candidates); | ||||
|                         }, | ||||
|                         None => return Ok(None), | ||||
| @@ -324,12 +340,16 @@ mod test { | ||||
|         let query_tree = None; | ||||
|         let facet_candidates = None; | ||||
|  | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criterion_parameters = CriterionParameters { | ||||
|             wdcache: &mut WordDerivationsCache::new(), | ||||
|             excluded_candidates: &RoaringBitmap::new(), | ||||
|         }; | ||||
|  | ||||
|         let parent = Initial::new(query_tree, facet_candidates); | ||||
|         let mut criteria = Typo::new(&context, Box::new(parent)); | ||||
|  | ||||
|         assert!(criteria.next(&mut wdcache).unwrap().unwrap().candidates.is_none()); | ||||
|         assert!(criteria.next(&mut wdcache).unwrap().is_none()); | ||||
|         assert!(criteria.next(&mut criterion_parameters).unwrap().unwrap().candidates.is_none()); | ||||
|         assert!(criteria.next(&mut criterion_parameters).unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -345,7 +365,10 @@ mod test { | ||||
|  | ||||
|         let facet_candidates = None; | ||||
|  | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criterion_parameters = CriterionParameters { | ||||
|             wdcache: &mut WordDerivationsCache::new(), | ||||
|             excluded_candidates: &RoaringBitmap::new(), | ||||
|         }; | ||||
|         let parent = Initial::new(Some(query_tree), facet_candidates); | ||||
|         let mut criteria = Typo::new(&context, Box::new(parent)); | ||||
|  | ||||
| @@ -364,7 +387,7 @@ mod test { | ||||
|             bucket_candidates: candidates_1, | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_1)); | ||||
|         assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); | ||||
|  | ||||
|         let candidates_2 = ( | ||||
|                 context.word_docids("split").unwrap().unwrap() | ||||
| @@ -386,7 +409,7 @@ mod test { | ||||
|             bucket_candidates: candidates_2, | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_2)); | ||||
|         assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -395,7 +418,10 @@ mod test { | ||||
|         let query_tree = None; | ||||
|         let facet_candidates = context.word_docids("earth").unwrap().unwrap(); | ||||
|  | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criterion_parameters = CriterionParameters { | ||||
|             wdcache: &mut WordDerivationsCache::new(), | ||||
|             excluded_candidates: &RoaringBitmap::new(), | ||||
|         }; | ||||
|         let parent = Initial::new(query_tree, Some(facet_candidates.clone())); | ||||
|         let mut criteria = Typo::new(&context, Box::new(parent)); | ||||
|  | ||||
| @@ -406,10 +432,10 @@ mod test { | ||||
|         }; | ||||
|  | ||||
|         // first iteration, returns the facet candidates | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected)); | ||||
|         assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected)); | ||||
|  | ||||
|         // second iteration, returns None because there is no more things to do | ||||
|         assert!(criteria.next(&mut wdcache).unwrap().is_none()); | ||||
|         assert!(criteria.next(&mut criterion_parameters).unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -425,7 +451,11 @@ mod test { | ||||
|  | ||||
|         let facet_candidates = context.word_docids("earth").unwrap().unwrap(); | ||||
|  | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|  | ||||
|         let mut criterion_parameters = CriterionParameters { | ||||
|             wdcache: &mut WordDerivationsCache::new(), | ||||
|             excluded_candidates: &RoaringBitmap::new(), | ||||
|         }; | ||||
|         let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone())); | ||||
|         let mut criteria = Typo::new(&context, Box::new(parent)); | ||||
|  | ||||
| @@ -444,7 +474,7 @@ mod test { | ||||
|             bucket_candidates: facet_candidates.clone(), | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_1)); | ||||
|         assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); | ||||
|  | ||||
|         let candidates_2 = ( | ||||
|                 context.word_docids("split").unwrap().unwrap() | ||||
| @@ -466,6 +496,6 @@ mod test { | ||||
|             bucket_candidates: RoaringBitmap::new(), | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_2)); | ||||
|         assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use log::debug; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::Operation; | ||||
| use super::{resolve_query_tree, Criterion, CriterionResult, Context, WordDerivationsCache}; | ||||
| use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree}; | ||||
|  | ||||
| pub struct Words<'t> { | ||||
|     ctx: &'t dyn Context<'t>, | ||||
| @@ -31,7 +31,12 @@ impl<'t> Words<'t> { | ||||
|  | ||||
| impl<'t> Criterion for Words<'t> { | ||||
|     #[logging_timer::time("Words::{}")] | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         // remove excluded candidates when next is called, instead of doing it in the loop. | ||||
|         if let Some(candidates) = self.candidates.as_mut() { | ||||
|             *candidates -= params.excluded_candidates; | ||||
|         } | ||||
|  | ||||
|         loop { | ||||
|             debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates); | ||||
|  | ||||
| @@ -45,7 +50,7 @@ impl<'t> Criterion for Words<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 (Some(qt), Some(candidates)) => { | ||||
|                     let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, wdcache)?; | ||||
|                     let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, params.wdcache)?; | ||||
|                     found_candidates.intersect_with(&candidates); | ||||
|                     candidates.difference_with(&found_candidates); | ||||
|  | ||||
| @@ -71,7 +76,7 @@ impl<'t> Criterion for Words<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 (None, None) => { | ||||
|                     match self.parent.next(wdcache)? { | ||||
|                     match self.parent.next(params)? { | ||||
|                         Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { | ||||
|                             return Ok(Some(CriterionResult { | ||||
|                                 query_tree: None, | ||||
|   | ||||
| @@ -165,13 +165,13 @@ impl<'a> Search<'a> { | ||||
|     ) -> anyhow::Result<SearchResult> { | ||||
|         let mut offset = self.offset; | ||||
|         let mut initial_candidates = RoaringBitmap::new(); | ||||
|         let mut excluded_documents = RoaringBitmap::new(); | ||||
|         let mut excluded_candidates = RoaringBitmap::new(); | ||||
|         let mut documents_ids = Vec::with_capacity(self.limit); | ||||
|  | ||||
|         while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next()? { | ||||
|         while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next(&excluded_candidates)? { | ||||
|             debug!("Number of candidates found {}", candidates.len()); | ||||
|  | ||||
|             let excluded = take(&mut excluded_documents); | ||||
|             let excluded = take(&mut excluded_candidates); | ||||
|  | ||||
|             let mut candidates = distinct.distinct(candidates, excluded); | ||||
|  | ||||
| @@ -186,7 +186,7 @@ impl<'a> Search<'a> { | ||||
|                 documents_ids.push(candidate?); | ||||
|             } | ||||
|             if documents_ids.len() == self.limit { break } | ||||
|             excluded_documents = candidates.into_excluded(); | ||||
|             excluded_candidates = candidates.into_excluded(); | ||||
|         } | ||||
|  | ||||
|         Ok(SearchResult { matching_words, candidates: initial_candidates, documents_ids }) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user