mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 15:36:28 +00:00 
			
		
		
		
	Introduce a WordDerivationsCache struct
This commit is contained in:
		
				
					committed by
					
						 Kerollmops
						Kerollmops
					
				
			
			
				
	
			
			
			
						parent
						
							2606c92ef9
						
					
				
				
					commit
					5fcaedb880
				
			| @@ -15,6 +15,7 @@ use crate::heed_codec::facet::{FieldDocIdFacetI64Codec, FieldDocIdFacetF64Codec} | ||||
| use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; | ||||
| use crate::search::facet::FacetIter; | ||||
| use crate::search::query_tree::Operation; | ||||
| use crate::search::WordDerivationsCache; | ||||
| use crate::{FieldsIdsMap, FieldId, Index}; | ||||
| use super::{Criterion, CriterionResult}; | ||||
|  | ||||
| @@ -92,7 +93,7 @@ impl<'t> AscDesc<'t> { | ||||
|         let candidates = match &query_tree { | ||||
|             Some(qt) => { | ||||
|                 let context = CriteriaBuilder::new(rtxn, index)?; | ||||
|                 let mut qt_candidates = resolve_query_tree(&context, qt, &mut HashMap::new())?; | ||||
|                 let mut qt_candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), &mut WordDerivationsCache::new())?; | ||||
|                 if let Some(candidates) = candidates { | ||||
|                     qt_candidates.intersect_with(&candidates); | ||||
|                 } | ||||
| @@ -145,7 +146,7 @@ impl<'t> AscDesc<'t> { | ||||
| } | ||||
|  | ||||
| impl<'t> Criterion for AscDesc<'t> { | ||||
|     fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         loop { | ||||
|             debug!("Facet {}({}) iteration", | ||||
|                 if self.ascending { "Asc" } else { "Desc" }, self.field_name | ||||
| @@ -157,7 +158,7 @@ impl<'t> Criterion for AscDesc<'t> { | ||||
|                     let bucket_candidates = take(&mut self.bucket_candidates); | ||||
|                     match self.parent.as_mut() { | ||||
|                         Some(parent) => { | ||||
|                             match parent.next()? { | ||||
|                             match parent.next(wdcache)? { | ||||
|                                 Some(CriterionResult { query_tree, mut candidates, bucket_candidates }) => { | ||||
|                                     self.query_tree = query_tree; | ||||
|                                     candidates.intersect_with(&self.faceted_candidates); | ||||
|   | ||||
| @@ -5,6 +5,7 @@ use log::debug; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::Operation; | ||||
| use crate::search::WordDerivationsCache; | ||||
| use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context}; | ||||
|  | ||||
| pub struct Fetcher<'t> { | ||||
| @@ -47,7 +48,7 @@ impl<'t> Fetcher<'t> { | ||||
| } | ||||
|  | ||||
| impl<'t> Criterion for Fetcher<'t> { | ||||
|     fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         use Candidates::{Allowed, Forbidden}; | ||||
|         loop { | ||||
|             debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})", | ||||
| @@ -60,7 +61,7 @@ impl<'t> Criterion for Fetcher<'t> { | ||||
|                     let candidates = take(&mut self.candidates).into_inner(); | ||||
|                     let candidates = match &self.query_tree { | ||||
|                         Some(qt) if should_get_documents_ids => { | ||||
|                             let mut docids = resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?; | ||||
|                             let mut docids = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?; | ||||
|                             docids.intersect_with(&candidates); | ||||
|                             docids | ||||
|                         }, | ||||
| @@ -76,11 +77,11 @@ impl<'t> Criterion for Fetcher<'t> { | ||||
|                 Forbidden(_) => { | ||||
|                     match self.parent.as_mut() { | ||||
|                         Some(parent) => { | ||||
|                             match parent.next()? { | ||||
|                             match parent.next(wdcache)? { | ||||
|                                 Some(result) => return Ok(Some(result)), | ||||
|                                 None => if should_get_documents_ids { | ||||
|                                     let candidates = match &self.query_tree { | ||||
|                                         Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?, | ||||
|                                         Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?, | ||||
|                                         None => self.ctx.documents_ids()?, | ||||
|                                     }; | ||||
|  | ||||
| @@ -94,7 +95,7 @@ impl<'t> Criterion for Fetcher<'t> { | ||||
|                         }, | ||||
|                         None => if should_get_documents_ids { | ||||
|                             let candidates = match &self.query_tree { | ||||
|                                 Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?, | ||||
|                                 Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?, | ||||
|                                 None => self.ctx.documents_ids()?, | ||||
|                             }; | ||||
|  | ||||
|   | ||||
| @@ -4,8 +4,8 @@ use std::borrow::Cow; | ||||
| use anyhow::bail; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::word_derivations; | ||||
| use crate::{DocumentId, Index}; | ||||
| use crate::search::{word_derivations, WordDerivationsCache}; | ||||
| use crate::{Index, DocumentId}; | ||||
|  | ||||
| use super::query_tree::{Operation, Query, QueryKind}; | ||||
| use self::typo::Typo; | ||||
| @@ -21,7 +21,7 @@ pub mod proximity; | ||||
| pub mod fetcher; | ||||
|  | ||||
| pub trait Criterion { | ||||
|     fn next(&mut self) -> anyhow::Result<Option<CriterionResult>>; | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>>; | ||||
| } | ||||
|  | ||||
| /// The result of a call to the parent criterion. | ||||
| @@ -164,12 +164,14 @@ pub fn resolve_query_tree<'t>( | ||||
|     ctx: &'t dyn Context, | ||||
|     query_tree: &Operation, | ||||
|     cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<RoaringBitmap> | ||||
| { | ||||
|     fn resolve_operation<'t>( | ||||
|         ctx: &'t dyn Context, | ||||
|         query_tree: &Operation, | ||||
|         cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<RoaringBitmap> | ||||
|     { | ||||
|         use Operation::{And, Consecutive, Or, Query}; | ||||
| @@ -177,7 +179,7 @@ pub fn resolve_query_tree<'t>( | ||||
|         match query_tree { | ||||
|             And(ops) => { | ||||
|                 let mut ops = ops.iter().map(|op| { | ||||
|                     resolve_operation(ctx, op, cache) | ||||
|                     resolve_operation(ctx, op, cache, wdcache) | ||||
|                 }).collect::<anyhow::Result<Vec<_>>>()?; | ||||
|  | ||||
|                 ops.sort_unstable_by_key(|cds| cds.len()); | ||||
| @@ -200,7 +202,7 @@ pub fn resolve_query_tree<'t>( | ||||
|                 for slice in ops.windows(2) { | ||||
|                     match (&slice[0], &slice[1]) { | ||||
|                         (Operation::Query(left), Operation::Query(right)) => { | ||||
|                             match query_pair_proximity_docids(ctx, left, right, 1)? { | ||||
|                             match query_pair_proximity_docids(ctx, left, right, 1, wdcache)? { | ||||
|                                 pair_docids if pair_docids.is_empty() => { | ||||
|                                     return Ok(RoaringBitmap::new()) | ||||
|                                 }, | ||||
| @@ -221,16 +223,16 @@ pub fn resolve_query_tree<'t>( | ||||
|             Or(_, ops) => { | ||||
|                 let mut candidates = RoaringBitmap::new(); | ||||
|                 for op in ops { | ||||
|                     let docids = resolve_operation(ctx, op, cache)?; | ||||
|                     let docids = resolve_operation(ctx, op, cache, wdcache)?; | ||||
|                     candidates.union_with(&docids); | ||||
|                 } | ||||
|                 Ok(candidates) | ||||
|             }, | ||||
|             Query(q) => Ok(query_docids(ctx, q)?), | ||||
|             Query(q) => Ok(query_docids(ctx, q, wdcache)?), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     resolve_operation(ctx, query_tree, cache) | ||||
|     resolve_operation(ctx, query_tree, cache, wdcache) | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -239,7 +241,8 @@ fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>( | ||||
|     left_words: &[(T, u8)], | ||||
|     right_words: &[(U, u8)], | ||||
|     proximity: u8 | ||||
| ) -> anyhow::Result<RoaringBitmap> { | ||||
| ) -> anyhow::Result<RoaringBitmap> | ||||
| { | ||||
|     let mut docids = RoaringBitmap::new(); | ||||
|     for (left, _l_typo) in left_words { | ||||
|         for (right, _r_typo) in right_words { | ||||
| @@ -250,13 +253,18 @@ fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>( | ||||
|     Ok(docids) | ||||
| } | ||||
|  | ||||
| fn query_docids(ctx: &dyn Context, query: &Query) -> anyhow::Result<RoaringBitmap> { | ||||
| fn query_docids( | ||||
|     ctx: &dyn Context, | ||||
|     query: &Query, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<RoaringBitmap> | ||||
| { | ||||
|     match &query.kind { | ||||
|         QueryKind::Exact { word, .. } => { | ||||
|             if query.prefix && ctx.in_prefix_cache(&word) { | ||||
|                 Ok(ctx.word_prefix_docids(&word)?.unwrap_or_default()) | ||||
|             } else if query.prefix { | ||||
|                 let words = word_derivations(&word, true, 0, ctx.words_fst())?; | ||||
|                 let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?; | ||||
|                 let mut docids = RoaringBitmap::new(); | ||||
|                 for (word, _typo) in words { | ||||
|                     let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); | ||||
| @@ -268,7 +276,7 @@ fn query_docids(ctx: &dyn Context, query: &Query) -> anyhow::Result<RoaringBitma | ||||
|             } | ||||
|         }, | ||||
|         QueryKind::Tolerant { typo, word } => { | ||||
|             let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst())?; | ||||
|             let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?; | ||||
|             let mut docids = RoaringBitmap::new(); | ||||
|             for (word, _typo) in words { | ||||
|                 let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); | ||||
| @@ -279,10 +287,17 @@ fn query_docids(ctx: &dyn Context, query: &Query) -> anyhow::Result<RoaringBitma | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn query_pair_proximity_docids(ctx: &dyn Context, left: &Query, right: &Query, proximity: u8) -> anyhow::Result<RoaringBitmap> { | ||||
| fn query_pair_proximity_docids( | ||||
|     ctx: &dyn Context, | ||||
|     left: &Query, | ||||
|     right: &Query, | ||||
|     proximity: u8, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<RoaringBitmap> | ||||
| { | ||||
|     if proximity >= 8 { | ||||
|         let mut candidates = query_docids(ctx, left)?; | ||||
|         let right_candidates = query_docids(ctx, right)?; | ||||
|         let mut candidates = query_docids(ctx, left, wdcache)?; | ||||
|         let right_candidates = query_docids(ctx, right, wdcache)?; | ||||
|         candidates.intersect_with(&right_candidates); | ||||
|         return Ok(candidates); | ||||
|     } | ||||
| @@ -293,14 +308,14 @@ fn query_pair_proximity_docids(ctx: &dyn Context, left: &Query, right: &Query, p | ||||
|             if prefix && ctx.in_prefix_cache(&right) { | ||||
|                 Ok(ctx.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default()) | ||||
|             } else if prefix { | ||||
|                 let r_words = word_derivations(&right, true, 0, ctx.words_fst())?; | ||||
|                 let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?; | ||||
|                 all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity) | ||||
|             } else { | ||||
|                 Ok(ctx.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default()) | ||||
|             } | ||||
|         }, | ||||
|         (QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => { | ||||
|             let l_words = word_derivations(&left, false, *typo, ctx.words_fst())?; | ||||
|             let l_words = word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned(); | ||||
|             if prefix && ctx.in_prefix_cache(&right) { | ||||
|                 let mut docids = RoaringBitmap::new(); | ||||
|                 for (left, _) in l_words { | ||||
| @@ -309,19 +324,19 @@ fn query_pair_proximity_docids(ctx: &dyn Context, left: &Query, right: &Query, p | ||||
|                 } | ||||
|                 Ok(docids) | ||||
|             } else if prefix { | ||||
|                 let r_words = word_derivations(&right, true, 0, ctx.words_fst())?; | ||||
|                 let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?; | ||||
|                 all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity) | ||||
|             } else { | ||||
|                 all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity) | ||||
|             } | ||||
|         }, | ||||
|         (QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => { | ||||
|             let r_words = word_derivations(&right, prefix, *typo, ctx.words_fst())?; | ||||
|             let r_words = word_derivations(&right, prefix, *typo, ctx.words_fst(), wdcache)?; | ||||
|             all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity) | ||||
|         }, | ||||
|         (QueryKind::Tolerant { typo: l_typo, word: left }, QueryKind::Tolerant { typo: r_typo, word: right }) => { | ||||
|             let l_words = word_derivations(&left, false, *l_typo, ctx.words_fst())?; | ||||
|             let r_words = word_derivations(&right, prefix, *r_typo, ctx.words_fst())?; | ||||
|             let l_words = word_derivations(&left, false, *l_typo, ctx.words_fst(), wdcache)?.to_owned(); | ||||
|             let r_words = word_derivations(&right, prefix, *r_typo, ctx.words_fst(), wdcache)?; | ||||
|             all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity) | ||||
|         }, | ||||
|     } | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{BTreeMap, HashMap, btree_map}; | ||||
| use std::mem::take; | ||||
|  | ||||
| @@ -6,6 +7,7 @@ use log::debug; | ||||
|  | ||||
| use crate::{DocumentId, Position, search::{query_tree::QueryKind, word_derivations}}; | ||||
| use crate::search::query_tree::{maximum_proximity, Operation, Query}; | ||||
| use crate::search::WordDerivationsCache; | ||||
| use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids}; | ||||
|  | ||||
| pub struct Proximity<'t> { | ||||
| @@ -53,7 +55,7 @@ impl<'t> Proximity<'t> { | ||||
| } | ||||
|  | ||||
| impl<'t> Criterion for Proximity<'t> { | ||||
|     fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         use Candidates::{Allowed, Forbidden}; | ||||
|         loop { | ||||
|             debug!("Proximity at iteration {} (max {:?}) ({:?})", | ||||
| @@ -94,7 +96,8 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                                 let cache = resolve_plane_sweep_candidates( | ||||
|                                     self.ctx, | ||||
|                                     query_tree, | ||||
|                                     candidates | ||||
|                                     candidates, | ||||
|                                     wdcache, | ||||
|                                 )?; | ||||
|                                 self.plane_sweep_cache = Some(cache.into_iter()); | ||||
|  | ||||
| @@ -106,6 +109,7 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                                &query_tree, | ||||
|                                self.proximity, | ||||
|                                &mut self.candidates_cache, | ||||
|                                wdcache, | ||||
|                            )? | ||||
|                         }; | ||||
|  | ||||
| @@ -135,6 +139,7 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                             &query_tree, | ||||
|                             self.proximity, | ||||
|                             &mut self.candidates_cache, | ||||
|                             wdcache, | ||||
|                         )?; | ||||
|  | ||||
|                         new_candidates.difference_with(&candidates); | ||||
| @@ -164,7 +169,7 @@ impl<'t> Criterion for Proximity<'t> { | ||||
|                 (None, Forbidden(_)) => { | ||||
|                     match self.parent.as_mut() { | ||||
|                         Some(parent) => { | ||||
|                             match parent.next()? { | ||||
|                             match parent.next(wdcache)? { | ||||
|                                 Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { | ||||
|                                     self.query_tree = query_tree.map(|op| (maximum_proximity(&op), op)); | ||||
|                                     self.proximity = 0; | ||||
| @@ -188,6 +193,7 @@ fn resolve_candidates<'t>( | ||||
|     query_tree: &Operation, | ||||
|     proximity: u8, | ||||
|     cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<RoaringBitmap> | ||||
| { | ||||
|     fn resolve_operation<'t>( | ||||
| @@ -195,27 +201,28 @@ fn resolve_candidates<'t>( | ||||
|         query_tree: &Operation, | ||||
|         proximity: u8, | ||||
|         cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> | ||||
|     { | ||||
|         use Operation::{And, Consecutive, Or, Query}; | ||||
|  | ||||
|         let result = match query_tree { | ||||
|             And(ops) => mdfs(ctx, ops, proximity, cache)?, | ||||
|             And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?, | ||||
|             Consecutive(ops) => if proximity == 0 { | ||||
|                 mdfs(ctx, ops, 0, cache)? | ||||
|                 mdfs(ctx, ops, 0, cache, wdcache)? | ||||
|             } else { | ||||
|                 Default::default() | ||||
|             }, | ||||
|             Or(_, ops) => { | ||||
|                 let mut output = Vec::new(); | ||||
|                 for op in ops { | ||||
|                     let result = resolve_operation(ctx, op, proximity, cache)?; | ||||
|                     let result = resolve_operation(ctx, op, proximity, cache, wdcache)?; | ||||
|                     output.extend(result); | ||||
|                 } | ||||
|                 output | ||||
|             }, | ||||
|             Query(q) => if proximity == 0 { | ||||
|                 let candidates = query_docids(ctx, q)?; | ||||
|                 let candidates = query_docids(ctx, q, wdcache)?; | ||||
|                 vec![(q.clone(), q.clone(), candidates)] | ||||
|             } else { | ||||
|                 Default::default() | ||||
| @@ -231,6 +238,7 @@ fn resolve_candidates<'t>( | ||||
|         right: &Operation, | ||||
|         proximity: u8, | ||||
|         cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> | ||||
|     { | ||||
|         fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> { | ||||
| @@ -245,13 +253,13 @@ fn resolve_candidates<'t>( | ||||
|             for (left_p, right_p) in pair_combinations(left_right_p, left_right_p) { | ||||
|                 let left_key = (left.clone(), left_p); | ||||
|                 if !cache.contains_key(&left_key) { | ||||
|                     let candidates = resolve_operation(ctx, left, left_p, cache)?; | ||||
|                     let candidates = resolve_operation(ctx, left, left_p, cache, wdcache)?; | ||||
|                     cache.insert(left_key.clone(), candidates); | ||||
|                 } | ||||
|  | ||||
|                 let right_key = (right.clone(), right_p); | ||||
|                 if !cache.contains_key(&right_key) { | ||||
|                     let candidates = resolve_operation(ctx, right, right_p, cache)?; | ||||
|                     let candidates = resolve_operation(ctx, right, right_p, cache, wdcache)?; | ||||
|                     cache.insert(right_key.clone(), candidates); | ||||
|                 } | ||||
|  | ||||
| @@ -260,7 +268,7 @@ fn resolve_candidates<'t>( | ||||
|  | ||||
|                 for (ll, lr, lcandidates) in lefts { | ||||
|                     for (rl, rr, rcandidates) in rights { | ||||
|                         let mut candidates = query_pair_proximity_docids(ctx, lr, rl, pair_p + 1)?; | ||||
|                         let mut candidates = query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?; | ||||
|                         if lcandidates.len() < rcandidates.len() { | ||||
|                             candidates.intersect_with(lcandidates); | ||||
|                             candidates.intersect_with(rcandidates); | ||||
| @@ -284,6 +292,7 @@ fn resolve_candidates<'t>( | ||||
|         branches: &[Operation], | ||||
|         proximity: u8, | ||||
|         cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> | ||||
|     { | ||||
|         // Extract the first two elements but gives the tail | ||||
| @@ -293,13 +302,13 @@ fn resolve_candidates<'t>( | ||||
|         }); | ||||
|  | ||||
|         match next { | ||||
|             Some((head1, Some((head2, [_])))) => mdfs_pair(ctx, head1, head2, proximity, cache), | ||||
|             Some((head1, Some((head2, [_])))) => mdfs_pair(ctx, head1, head2, proximity, cache, wdcache), | ||||
|             Some((head1, Some((head2, tail)))) => { | ||||
|                 let mut output = Vec::new(); | ||||
|                 for p in 0..=proximity { | ||||
|                     for (lhead, _, head_candidates) in mdfs_pair(ctx, head1, head2, p, cache)? { | ||||
|                     for (lhead, _, head_candidates) in mdfs_pair(ctx, head1, head2, p, cache, wdcache)? { | ||||
|                         if !head_candidates.is_empty() { | ||||
|                             for (_, rtail, mut candidates) in mdfs(ctx, tail, proximity - p, cache)? { | ||||
|                             for (_, rtail, mut candidates) in mdfs(ctx, tail, proximity - p, cache, wdcache)? { | ||||
|                                 candidates.intersect_with(&head_candidates); | ||||
|                                 if !candidates.is_empty() { | ||||
|                                     output.push((lhead.clone(), rtail, candidates)); | ||||
| @@ -310,13 +319,13 @@ fn resolve_candidates<'t>( | ||||
|                 } | ||||
|                 Ok(output) | ||||
|             }, | ||||
|             Some((head1, None)) => resolve_operation(ctx, head1, proximity, cache), | ||||
|             Some((head1, None)) => resolve_operation(ctx, head1, proximity, cache, wdcache), | ||||
|             None => return Ok(Default::default()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let mut candidates = RoaringBitmap::new(); | ||||
|     for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache)? { | ||||
|     for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache, wdcache)? { | ||||
|         candidates.union_with(&cds); | ||||
|     } | ||||
|     Ok(candidates) | ||||
| @@ -326,6 +335,7 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|     ctx: &'t dyn Context, | ||||
|     query_tree: &Operation, | ||||
|     allowed_candidates: &RoaringBitmap, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>> | ||||
| { | ||||
|     /// FIXME may be buggy with query like "new new york" | ||||
| @@ -334,8 +344,14 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|         operations: &[Operation], | ||||
|         docid: DocumentId, | ||||
|         consecutive: bool, | ||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> { | ||||
|         fn compute_groups_proximity(groups: &Vec<(usize, (Position, u8, Position))>, consecutive: bool) -> Option<(Position, u8, Position)> { | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> | ||||
|     { | ||||
|         fn compute_groups_proximity( | ||||
|             groups: &[(usize, (Position, u8, Position))], | ||||
|             consecutive: bool, | ||||
|         ) -> Option<(Position, u8, Position)> | ||||
|         { | ||||
|             // take the inner proximity of the first group as initial | ||||
|             let mut proximity = groups.first()?.1.1; | ||||
|             let left_most_pos = groups.first()?.1.0; | ||||
| @@ -360,14 +376,16 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|             // if groups should be consecutives, we will only accept groups with a proximity of 0 | ||||
|             if !consecutive || proximity == 0 { | ||||
|                 Some((left_most_pos, proximity, right_most_pos)) | ||||
|             } else { None } | ||||
|             } else { | ||||
|                 None | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let groups_len = operations.len(); | ||||
|         let mut groups_positions = Vec::with_capacity(groups_len); | ||||
|  | ||||
|         for operation in operations { | ||||
|             let positions = resolve_operation(ctx, operation, docid)?; | ||||
|             let positions = resolve_operation(ctx, operation, docid, wdcache)?; | ||||
|             groups_positions.push(positions.into_iter()); | ||||
|         } | ||||
|  | ||||
| @@ -442,16 +460,17 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|         ctx: &'t dyn Context, | ||||
|         query_tree: &Operation, | ||||
|         docid: DocumentId, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> { | ||||
|         use Operation::{And, Consecutive, Or}; | ||||
|  | ||||
|         match query_tree { | ||||
|             And(ops) => plane_sweep(ctx, ops, docid, false), | ||||
|             Consecutive(ops) => plane_sweep(ctx, ops, docid, true), | ||||
|             And(ops) => plane_sweep(ctx, ops, docid, false, wdcache), | ||||
|             Consecutive(ops) => plane_sweep(ctx, ops, docid, true, wdcache), | ||||
|             Or(_, ops) => { | ||||
|                 let mut result = Vec::new(); | ||||
|                 for op in ops { | ||||
|                     result.extend(resolve_operation(ctx, op, docid)?) | ||||
|                     result.extend(resolve_operation(ctx, op, docid, wdcache)?) | ||||
|                 } | ||||
|  | ||||
|                 result.sort_unstable(); | ||||
| @@ -462,19 +481,19 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|                 let words = match kind { | ||||
|                     QueryKind::Exact { word, .. } => { | ||||
|                         if *prefix { | ||||
|                             word_derivations(word, true, 0, fst)? | ||||
|                             Cow::Borrowed(word_derivations(word, true, 0, fst, wdcache)?) | ||||
|                         } else { | ||||
|                             vec![(word.to_string(), 0)] | ||||
|                             Cow::Owned(vec![(word.to_string(), 0)]) | ||||
|                         } | ||||
|                     }, | ||||
|                     QueryKind::Tolerant { typo, word } => { | ||||
|                         word_derivations(word, *prefix, *typo, fst)? | ||||
|                         Cow::Borrowed(word_derivations(word, *prefix, *typo, fst, wdcache)?) | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 let mut result = Vec::new(); | ||||
|                 for (word, _) in words { | ||||
|                     if let Some(positions) = ctx.docid_word_positions(docid, &word)? { | ||||
|                 for (word, _) in words.as_ref() { | ||||
|                     if let Some(positions) = ctx.docid_word_positions(docid, word)? { | ||||
|                         let iter = positions.iter().map(|p| (p, 0, p)); | ||||
|                         result.extend(iter); | ||||
|                     } | ||||
| @@ -488,7 +507,7 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|  | ||||
|     let mut candidates = BTreeMap::new(); | ||||
|     for docid in allowed_candidates { | ||||
|         let positions =  resolve_operation(ctx, query_tree, docid)?; | ||||
|         let positions =  resolve_operation(ctx, query_tree, docid, wdcache)?; | ||||
|         let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity); | ||||
|         let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7); | ||||
|         candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid); | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use log::debug; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind}; | ||||
| use crate::search::word_derivations; | ||||
| use crate::search::{word_derivations, WordDerivationsCache}; | ||||
| use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids}; | ||||
|  | ||||
| pub struct Typo<'t> { | ||||
| @@ -53,7 +53,7 @@ impl<'t> Typo<'t> { | ||||
| } | ||||
|  | ||||
| impl<'t> Criterion for Typo<'t> { | ||||
|     fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         use Candidates::{Allowed, Forbidden}; | ||||
|         loop { | ||||
|             debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates); | ||||
| @@ -73,15 +73,21 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                     } else { | ||||
|                         let fst = self.ctx.words_fst(); | ||||
|                         let new_query_tree = if self.number_typos < 2 { | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)? | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache, wdcache)? | ||||
|                         } else if self.number_typos == 2 { | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)?; | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache, wdcache)?; | ||||
|                             query_tree.clone() | ||||
|                         } else { | ||||
|                             query_tree.clone() | ||||
|                         }; | ||||
|  | ||||
|                         let mut new_candidates = resolve_candidates(self.ctx, &new_query_tree, self.number_typos, &mut self.candidates_cache)?; | ||||
|                         let mut new_candidates = resolve_candidates( | ||||
|                             self.ctx, | ||||
|                             &new_query_tree, | ||||
|                             self.number_typos, | ||||
|                             &mut self.candidates_cache, | ||||
|                             wdcache, | ||||
|                         )?; | ||||
|                         new_candidates.intersect_with(&candidates); | ||||
|                         candidates.difference_with(&new_candidates); | ||||
|                         self.number_typos += 1; | ||||
| @@ -105,15 +111,21 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                     } else { | ||||
|                         let fst = self.ctx.words_fst(); | ||||
|                         let new_query_tree = if self.number_typos < 2 { | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)? | ||||
|                             alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache, wdcache)? | ||||
|                         } else if self.number_typos == 2 { | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)?; | ||||
|                             *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache, wdcache)?; | ||||
|                             query_tree.clone() | ||||
|                         } else { | ||||
|                             query_tree.clone() | ||||
|                         }; | ||||
|  | ||||
|                         let mut new_candidates = resolve_candidates(self.ctx, &new_query_tree, self.number_typos, &mut self.candidates_cache)?; | ||||
|                         let mut new_candidates = resolve_candidates( | ||||
|                             self.ctx, | ||||
|                             &new_query_tree, | ||||
|                             self.number_typos, | ||||
|                             &mut self.candidates_cache, | ||||
|                             wdcache, | ||||
|                         )?; | ||||
|                         new_candidates.difference_with(&candidates); | ||||
|                         candidates.union_with(&new_candidates); | ||||
|                         self.number_typos += 1; | ||||
| @@ -141,7 +153,7 @@ impl<'t> Criterion for Typo<'t> { | ||||
|                 (None, Forbidden(_)) => { | ||||
|                     match self.parent.as_mut() { | ||||
|                         Some(parent) => { | ||||
|                             match parent.next()? { | ||||
|                             match parent.next(wdcache)? { | ||||
|                                 Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { | ||||
|                                     self.query_tree = query_tree.map(|op| (maximum_typo(&op), op)); | ||||
|                                     self.number_typos = 0; | ||||
| @@ -167,6 +179,7 @@ fn alterate_query_tree( | ||||
|     mut query_tree: Operation, | ||||
|     number_typos: u8, | ||||
|     typo_cache: &mut HashMap<(String, bool, u8), Vec<(String, u8)>>, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<Operation> | ||||
| { | ||||
|     fn recurse( | ||||
| @@ -174,13 +187,14 @@ fn alterate_query_tree( | ||||
|         operation: &mut Operation, | ||||
|         number_typos: u8, | ||||
|         typo_cache: &mut HashMap<(String, bool, u8), Vec<(String, u8)>>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<()> | ||||
|     { | ||||
|         use Operation::{And, Consecutive, Or}; | ||||
|  | ||||
|         match operation { | ||||
|             And(ops) | Consecutive(ops) | Or(_, ops) => { | ||||
|                 ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, typo_cache)) | ||||
|                 ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, typo_cache, wdcache)) | ||||
|             }, | ||||
|             Operation::Query(q) => { | ||||
|                 // TODO may be optimized when number_typos == 0 | ||||
| @@ -198,7 +212,7 @@ fn alterate_query_tree( | ||||
|                         let words = if let Some(derivations) = typo_cache.get(&cache_key) { | ||||
|                             derivations.clone() | ||||
|                         } else { | ||||
|                             let derivations = word_derivations(word, q.prefix, typo, words_fst)?; | ||||
|                             let derivations = word_derivations(word, q.prefix, typo, words_fst, wdcache)?.to_vec(); | ||||
|                             typo_cache.insert(cache_key, derivations.clone()); | ||||
|                             derivations | ||||
|                         }; | ||||
| @@ -219,7 +233,7 @@ fn alterate_query_tree( | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     recurse(words_fst, &mut query_tree, number_typos, typo_cache)?; | ||||
|     recurse(words_fst, &mut query_tree, number_typos, typo_cache, wdcache)?; | ||||
|     Ok(query_tree) | ||||
| } | ||||
|  | ||||
| @@ -228,6 +242,7 @@ fn resolve_candidates<'t>( | ||||
|     query_tree: &Operation, | ||||
|     number_typos: u8, | ||||
|     cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<RoaringBitmap> | ||||
| { | ||||
|     fn resolve_operation<'t>( | ||||
| @@ -235,13 +250,14 @@ fn resolve_candidates<'t>( | ||||
|         query_tree: &Operation, | ||||
|         number_typos: u8, | ||||
|         cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<RoaringBitmap> | ||||
|     { | ||||
|         use Operation::{And, Consecutive, Or, Query}; | ||||
|  | ||||
|         match query_tree { | ||||
|             And(ops) => { | ||||
|                 mdfs(ctx, ops, number_typos, cache) | ||||
|                 mdfs(ctx, ops, number_typos, cache, wdcache) | ||||
|             }, | ||||
|             Consecutive(ops) => { | ||||
|                 let mut candidates = RoaringBitmap::new(); | ||||
| @@ -249,7 +265,7 @@ fn resolve_candidates<'t>( | ||||
|                 for slice in ops.windows(2) { | ||||
|                     match (&slice[0], &slice[1]) { | ||||
|                         (Operation::Query(left), Operation::Query(right)) => { | ||||
|                             match query_pair_proximity_docids(ctx, left, right, 1)? { | ||||
|                             match query_pair_proximity_docids(ctx, left, right, 1, wdcache)? { | ||||
|                                 pair_docids if pair_docids.is_empty() => { | ||||
|                                     return Ok(RoaringBitmap::new()) | ||||
|                                 }, | ||||
| @@ -270,13 +286,13 @@ fn resolve_candidates<'t>( | ||||
|             Or(_, ops) => { | ||||
|                 let mut candidates = RoaringBitmap::new(); | ||||
|                 for op in ops { | ||||
|                     let docids = resolve_operation(ctx, op, number_typos, cache)?; | ||||
|                     let docids = resolve_operation(ctx, op, number_typos, cache, wdcache)?; | ||||
|                     candidates.union_with(&docids); | ||||
|                 } | ||||
|                 Ok(candidates) | ||||
|             }, | ||||
|             Query(q) => if q.kind.typo() == number_typos { | ||||
|                 Ok(query_docids(ctx, q)?) | ||||
|                 Ok(query_docids(ctx, q, wdcache)?) | ||||
|             } else { | ||||
|                 Ok(RoaringBitmap::new()) | ||||
|             }, | ||||
| @@ -288,6 +304,7 @@ fn resolve_candidates<'t>( | ||||
|         branches: &[Operation], | ||||
|         mana: u8, | ||||
|         cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<RoaringBitmap> | ||||
|     { | ||||
|         match branches.split_first() { | ||||
| @@ -296,7 +313,7 @@ fn resolve_candidates<'t>( | ||||
|                 if let Some(candidates) = cache.get(&cache_key) { | ||||
|                     Ok(candidates.clone()) | ||||
|                 } else { | ||||
|                     let candidates = resolve_operation(ctx, head, mana, cache)?; | ||||
|                     let candidates = resolve_operation(ctx, head, mana, cache, wdcache)?; | ||||
|                     cache.insert(cache_key, candidates.clone()); | ||||
|                     Ok(candidates) | ||||
|                 } | ||||
| @@ -310,13 +327,13 @@ fn resolve_candidates<'t>( | ||||
|                         if let Some(candidates) = cache.get(&cache_key) { | ||||
|                             candidates.clone() | ||||
|                         } else { | ||||
|                             let candidates = resolve_operation(ctx, head, m, cache)?; | ||||
|                             let candidates = resolve_operation(ctx, head, m, cache, wdcache)?; | ||||
|                             cache.insert(cache_key, candidates.clone()); | ||||
|                             candidates | ||||
|                         } | ||||
|                     }; | ||||
|                     if !head_candidates.is_empty() { | ||||
|                         let tail_candidates = mdfs(ctx, tail, mana - m, cache)?; | ||||
|                         let tail_candidates = mdfs(ctx, tail, mana - m, cache, wdcache)?; | ||||
|                         head_candidates.intersect_with(&tail_candidates); | ||||
|                         candidates.union_with(&head_candidates); | ||||
|                     } | ||||
| @@ -328,7 +345,7 @@ fn resolve_candidates<'t>( | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     resolve_operation(ctx, query_tree, number_typos, cache) | ||||
|     resolve_operation(ctx, query_tree, number_typos, cache, wdcache) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| @@ -343,9 +360,10 @@ mod test { | ||||
|         let query_tree = None; | ||||
|         let facet_candidates = None; | ||||
|  | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criteria = Typo::initial(&context, query_tree, facet_candidates); | ||||
|  | ||||
|         assert!(criteria.next().unwrap().is_none()); | ||||
|         assert!(criteria.next(&mut wdcache).unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -361,6 +379,7 @@ mod test { | ||||
|  | ||||
|         let facet_candidates = None; | ||||
|  | ||||
| let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates); | ||||
|  | ||||
|         let candidates_1 = context.word_docids("split").unwrap().unwrap() | ||||
| @@ -378,7 +397,7 @@ mod test { | ||||
|             bucket_candidates: candidates_1, | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next().unwrap(), Some(expected_1)); | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_1)); | ||||
|  | ||||
|         let candidates_2 = ( | ||||
|                 context.word_docids("split").unwrap().unwrap() | ||||
| @@ -400,7 +419,7 @@ mod test { | ||||
|             bucket_candidates: candidates_2, | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next().unwrap(), Some(expected_2)); | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_2)); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -409,6 +428,7 @@ mod test { | ||||
|         let query_tree = None; | ||||
|         let facet_candidates = context.word_docids("earth").unwrap().unwrap(); | ||||
|  | ||||
| let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone())); | ||||
|  | ||||
|         let expected = CriterionResult { | ||||
| @@ -418,10 +438,10 @@ mod test { | ||||
|         }; | ||||
|  | ||||
|         // first iteration, returns the facet candidates | ||||
|         assert_eq!(criteria.next().unwrap(), Some(expected)); | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected)); | ||||
|  | ||||
|         // second iteration, returns None because there is no more things to do | ||||
|         assert!(criteria.next().unwrap().is_none()); | ||||
|         assert!(criteria.next(&mut wdcache).unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -437,6 +457,7 @@ mod test { | ||||
|  | ||||
|         let facet_candidates = context.word_docids("earth").unwrap().unwrap(); | ||||
|  | ||||
| let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone())); | ||||
|  | ||||
|         let candidates_1 = context.word_docids("split").unwrap().unwrap() | ||||
| @@ -454,7 +475,7 @@ mod test { | ||||
|             bucket_candidates: candidates_1 & &facet_candidates, | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next().unwrap(), Some(expected_1)); | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_1)); | ||||
|  | ||||
|         let candidates_2 = ( | ||||
|                 context.word_docids("split").unwrap().unwrap() | ||||
| @@ -476,7 +497,7 @@ mod test { | ||||
|             bucket_candidates: candidates_2 & &facet_candidates, | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(criteria.next().unwrap(), Some(expected_2)); | ||||
|         assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_2)); | ||||
|     } | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -5,6 +5,7 @@ use log::debug; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::Operation; | ||||
| use crate::search::WordDerivationsCache; | ||||
| use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context}; | ||||
|  | ||||
| pub struct Words<'t> { | ||||
| @@ -46,7 +47,7 @@ impl<'t> Words<'t> { | ||||
| } | ||||
|  | ||||
| impl<'t> Criterion for Words<'t> { | ||||
|     fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> { | ||||
|     fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { | ||||
|         use Candidates::{Allowed, Forbidden}; | ||||
|         loop { | ||||
|             debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates); | ||||
| @@ -61,7 +62,7 @@ impl<'t> Criterion for Words<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 (Some(qt), Allowed(candidates)) => { | ||||
|                     let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?; | ||||
|                     let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, wdcache)?; | ||||
|                     found_candidates.intersect_with(&candidates); | ||||
|                     candidates.difference_with(&found_candidates); | ||||
|  | ||||
| @@ -77,7 +78,7 @@ impl<'t> Criterion for Words<'t> { | ||||
|                     })); | ||||
|                 }, | ||||
|                 (Some(qt), Forbidden(candidates)) => { | ||||
|                     let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?; | ||||
|                     let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, wdcache)?; | ||||
|                     found_candidates.difference_with(&candidates); | ||||
|                     candidates.union_with(&found_candidates); | ||||
|  | ||||
| @@ -103,7 +104,7 @@ impl<'t> Criterion for Words<'t> { | ||||
|                 (None, Forbidden(_)) => { | ||||
|                     match self.parent.as_mut() { | ||||
|                         Some(parent) => { | ||||
|                             match parent.next()? { | ||||
|                             match parent.next(wdcache)? { | ||||
|                                 Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { | ||||
|                                     self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default(); | ||||
|                                     self.candidates = Candidates::Allowed(candidates); | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::hash_map::{HashMap, Entry}; | ||||
| use std::fmt; | ||||
| use std::str::Utf8Error; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use fst::{IntoStreamer, Streamer, Set}; | ||||
| @@ -97,8 +99,9 @@ impl<'a> Search<'a> { | ||||
|         let mut offset = self.offset; | ||||
|         let mut limit = self.limit; | ||||
|         let mut documents_ids = Vec::new(); | ||||
|         let mut words_derivations_cache = WordDerivationsCache::new(); | ||||
|         let mut initial_candidates = RoaringBitmap::new(); | ||||
|         while let Some(CriterionResult { candidates, bucket_candidates, .. }) = criteria.next()? { | ||||
|         while let Some(CriterionResult { candidates, bucket_candidates, .. }) = criteria.next(&mut words_derivations_cache)? { | ||||
|  | ||||
|             debug!("Number of candidates found {}", candidates.len()); | ||||
|  | ||||
| @@ -145,24 +148,32 @@ pub struct SearchResult { | ||||
|     pub documents_ids: Vec<DocumentId>, | ||||
| } | ||||
|  | ||||
| pub fn word_derivations( | ||||
| pub type WordDerivationsCache = HashMap<(String, bool, u8), Vec<(String, u8)>>; | ||||
|  | ||||
| pub fn word_derivations<'c>( | ||||
|     word: &str, | ||||
|     is_prefix: bool, | ||||
|     max_typo: u8, | ||||
|     fst: &fst::Set<Cow<[u8]>>, | ||||
| ) -> anyhow::Result<Vec<(String, u8)>> | ||||
|     cache: &'c mut WordDerivationsCache, | ||||
| ) -> Result<&'c [(String, u8)], Utf8Error> | ||||
| { | ||||
|     let mut derived_words = Vec::new(); | ||||
|     let dfa = build_dfa(word, max_typo, is_prefix); | ||||
|     let mut stream = fst.search_with_state(&dfa).into_stream(); | ||||
|     match cache.entry((word.to_string(), is_prefix, max_typo)) { | ||||
|         Entry::Occupied(entry) => Ok(entry.into_mut()), | ||||
|         Entry::Vacant(entry) => { | ||||
|             let mut derived_words = Vec::new(); | ||||
|             let dfa = build_dfa(word, max_typo, is_prefix); | ||||
|             let mut stream = fst.search_with_state(&dfa).into_stream(); | ||||
|  | ||||
|     while let Some((word, state)) = stream.next() { | ||||
|         let word = std::str::from_utf8(word)?; | ||||
|         let distance = dfa.distance(state); | ||||
|         derived_words.push((word.to_string(), distance.to_u8())); | ||||
|             while let Some((word, state)) = stream.next() { | ||||
|                 let word = std::str::from_utf8(word)?; | ||||
|                 let distance = dfa.distance(state); | ||||
|                 derived_words.push((word.to_string(), distance.to_u8())); | ||||
|             } | ||||
|  | ||||
|             Ok(entry.insert(derived_words)) | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     Ok(derived_words) | ||||
| } | ||||
|  | ||||
| pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user