mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-01 00:16:28 +00:00 
			
		
		
		
	Introduce a cache on the docid_word_positions database method
This commit is contained in:
		| @@ -1,5 +1,6 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{BTreeMap, HashMap, btree_map}; | ||||
| use std::collections::btree_map::{self, BTreeMap}; | ||||
| use std::collections::hash_map::{HashMap, Entry}; | ||||
| use std::mem::take; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -331,19 +332,21 @@ fn resolve_candidates<'t>( | ||||
|     Ok(candidates) | ||||
| } | ||||
|  | ||||
| fn resolve_plane_sweep_candidates<'t>( | ||||
|     ctx: &'t dyn Context, | ||||
| fn resolve_plane_sweep_candidates( | ||||
|     ctx: &dyn Context, | ||||
|     query_tree: &Operation, | ||||
|     allowed_candidates: &RoaringBitmap, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>> | ||||
| { | ||||
|     /// FIXME may be buggy with query like "new new york" | ||||
|     fn plane_sweep<'t>( | ||||
|         ctx: &'t dyn Context, | ||||
|         operations: &[Operation], | ||||
|     fn plane_sweep<'a>( | ||||
|         ctx: &dyn Context, | ||||
|         operations: &'a [Operation], | ||||
|         docid: DocumentId, | ||||
|         consecutive: bool, | ||||
|         rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>, | ||||
|         dwpcache: &mut HashMap<String, Option<RoaringBitmap>>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> | ||||
|     { | ||||
| @@ -385,7 +388,7 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|         let mut groups_positions = Vec::with_capacity(groups_len); | ||||
|  | ||||
|         for operation in operations { | ||||
|             let positions = resolve_operation(ctx, operation, docid, wdcache)?; | ||||
|             let positions = resolve_operation(ctx, operation, docid, rocache, dwpcache, wdcache)?; | ||||
|             groups_positions.push(positions.into_iter()); | ||||
|         } | ||||
|  | ||||
| @@ -456,25 +459,32 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|         Ok(output) | ||||
|     } | ||||
|  | ||||
|     fn resolve_operation<'t>( | ||||
|         ctx: &'t dyn Context, | ||||
|         query_tree: &Operation, | ||||
|     fn resolve_operation<'a>( | ||||
|         ctx: &dyn Context, | ||||
|         query_tree: &'a Operation, | ||||
|         docid: DocumentId, | ||||
|         rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>, | ||||
|         dwpcache: &mut HashMap<String, Option<RoaringBitmap>>, | ||||
|         wdcache: &mut WordDerivationsCache, | ||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> { | ||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> | ||||
|     { | ||||
|         use Operation::{And, Consecutive, Or}; | ||||
|  | ||||
|         match query_tree { | ||||
|             And(ops) => plane_sweep(ctx, ops, docid, false, wdcache), | ||||
|             Consecutive(ops) => plane_sweep(ctx, ops, docid, true, wdcache), | ||||
|         if let Some(result) = rocache.get(query_tree) { | ||||
|             return Ok(result.clone()); | ||||
|         } | ||||
|  | ||||
|         let result = match query_tree { | ||||
|             And(ops) => plane_sweep(ctx, ops, docid, false, rocache, dwpcache, wdcache)?, | ||||
|             Consecutive(ops) => plane_sweep(ctx, ops, docid, true, rocache, dwpcache, wdcache)?, | ||||
|             Or(_, ops) => { | ||||
|                 let mut result = Vec::new(); | ||||
|                 for op in ops { | ||||
|                     result.extend(resolve_operation(ctx, op, docid, wdcache)?) | ||||
|                     result.extend(resolve_operation(ctx, op, docid, rocache, dwpcache, wdcache)?) | ||||
|                 } | ||||
|  | ||||
|                 result.sort_unstable(); | ||||
|                 Ok(result) | ||||
|                 result | ||||
|             }, | ||||
|             Operation::Query(Query {prefix, kind}) => { | ||||
|                 let fst = ctx.words_fst(); | ||||
| @@ -493,21 +503,43 @@ fn resolve_plane_sweep_candidates<'t>( | ||||
|  | ||||
|                 let mut result = Vec::new(); | ||||
|                 for (word, _) in words.as_ref() { | ||||
|                     if let Some(positions) = ctx.docid_word_positions(docid, word)? { | ||||
|                     let positions = match dwpcache.entry(word.to_string()) { | ||||
|                         Entry::Occupied(entry) => entry.into_mut(), | ||||
|                         Entry::Vacant(entry) => { | ||||
|                             let positions = ctx.docid_word_positions(docid, word)?; | ||||
|                             entry.insert(positions) | ||||
|                         } | ||||
|                     }; | ||||
|  | ||||
|                     if let Some(positions) = positions { | ||||
|                         let iter = positions.iter().map(|p| (p, 0, p)); | ||||
|                         result.extend(iter); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 result.sort_unstable(); | ||||
|                 Ok(result) | ||||
|                 result | ||||
|             } | ||||
|         } | ||||
|         }; | ||||
|  | ||||
|         rocache.insert(query_tree, result.clone()); | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     let mut word_positions_cache = HashMap::new(); | ||||
|     let mut resolve_operation_cache = HashMap::new(); | ||||
|     let mut candidates = BTreeMap::new(); | ||||
|     for docid in allowed_candidates { | ||||
|         let positions =  resolve_operation(ctx, query_tree, docid, wdcache)?; | ||||
|         word_positions_cache.clear(); | ||||
|         resolve_operation_cache.clear(); | ||||
|         let positions =  resolve_operation( | ||||
|             ctx, | ||||
|             query_tree, | ||||
|             docid, | ||||
|             &mut resolve_operation_cache, | ||||
|             &mut word_positions_cache, | ||||
|             wdcache, | ||||
|         )?; | ||||
|         let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity); | ||||
|         let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7); | ||||
|         candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid); | ||||
|   | ||||
| @@ -379,7 +379,7 @@ mod test { | ||||
|  | ||||
|         let facet_candidates = None; | ||||
|  | ||||
| let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates); | ||||
|  | ||||
|         let candidates_1 = context.word_docids("split").unwrap().unwrap() | ||||
| @@ -428,7 +428,7 @@ let mut wdcache = WordDerivationsCache::new(); | ||||
|         let query_tree = None; | ||||
|         let facet_candidates = context.word_docids("earth").unwrap().unwrap(); | ||||
|  | ||||
| let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone())); | ||||
|  | ||||
|         let expected = CriterionResult { | ||||
| @@ -457,7 +457,7 @@ let mut wdcache = WordDerivationsCache::new(); | ||||
|  | ||||
|         let facet_candidates = context.word_docids("earth").unwrap().unwrap(); | ||||
|  | ||||
| let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut wdcache = WordDerivationsCache::new(); | ||||
|         let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone())); | ||||
|  | ||||
|         let candidates_1 = context.word_docids("split").unwrap().unwrap() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user