mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Update resolve_graph module to work with lazy query terms
This commit is contained in:
		| @@ -28,12 +28,13 @@ pub use logger::{DefaultSearchLogger, SearchLogger}; | ||||
| use query_graph::{QueryGraph, QueryNode, QueryNodeData}; | ||||
| use query_term::{located_query_terms_from_string, Phrase, QueryTerm}; | ||||
| use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait}; | ||||
| use resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache}; | ||||
| use resolve_query_graph::PhraseDocIdsCache; | ||||
| use roaring::RoaringBitmap; | ||||
| use words::Words; | ||||
|  | ||||
| use self::interner::Interner; | ||||
| use self::ranking_rules::{BoxRankingRule, RankingRule}; | ||||
| use self::resolve_query_graph::compute_query_graph_docids; | ||||
| use self::sort::Sort; | ||||
| use crate::{ | ||||
|     AscDesc, Filter, Index, MatchingWords, Member, Result, SearchResult, TermsMatchingStrategy, | ||||
| @@ -48,8 +49,7 @@ pub struct SearchContext<'ctx> { | ||||
|     pub word_interner: DedupInterner<String>, | ||||
|     pub phrase_interner: DedupInterner<Phrase>, | ||||
|     pub term_interner: Interner<QueryTerm>, | ||||
|     // think about memory usage of that field (roaring bitmaps in a hashmap) | ||||
|     pub term_docids: QueryTermDocIdsCache, | ||||
|     pub phrase_docids: PhraseDocIdsCache, | ||||
| } | ||||
| impl<'ctx> SearchContext<'ctx> { | ||||
|     pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self { | ||||
| @@ -60,7 +60,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             word_interner: <_>::default(), | ||||
|             phrase_interner: <_>::default(), | ||||
|             term_interner: <_>::default(), | ||||
|             term_docids: <_>::default(), | ||||
|             phrase_docids: <_>::default(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -103,7 +103,7 @@ fn resolve_maximally_reduced_query_graph( | ||||
|         } | ||||
|     } | ||||
|     logger.query_for_universe(&graph); | ||||
|     let docids = resolve_query_graph(ctx, &graph, universe)?; | ||||
|     let docids = compute_query_graph_docids(ctx, &graph, universe)?; | ||||
|  | ||||
|     Ok(docids) | ||||
| } | ||||
| @@ -319,7 +319,7 @@ pub fn execute_search( | ||||
|         let tokens = tokenizer.tokenize(query); | ||||
|  | ||||
|         let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?; | ||||
|         let graph = QueryGraph::from_query(ctx, query_terms)?; | ||||
|         let graph = QueryGraph::from_query(ctx, &query_terms)?; | ||||
|  | ||||
|         check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||
|  | ||||
|   | ||||
| @@ -3,106 +3,63 @@ | ||||
| use std::collections::VecDeque; | ||||
|  | ||||
| use fxhash::FxHashMap; | ||||
| use heed::{BytesDecode, RoTxn}; | ||||
| use heed::BytesDecode; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::db_cache::DatabaseCache; | ||||
| use super::interner::{DedupInterner, Interned}; | ||||
| use super::interner::Interned; | ||||
| use super::query_graph::QueryNodeData; | ||||
| use super::query_term::{Phrase, QueryTerm}; | ||||
| use super::query_term::{Phrase, QueryTermSubset}; | ||||
| use super::small_bitmap::SmallBitmap; | ||||
| use super::{QueryGraph, SearchContext}; | ||||
| use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec}; | ||||
| use crate::search::new::query_term::LocatedQueryTermSubset; | ||||
| use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; | ||||
|  | ||||
| #[derive(Default)] | ||||
| pub struct QueryTermDocIdsCache { | ||||
|     pub phrases: FxHashMap<Interned<Phrase>, RoaringBitmap>, | ||||
|     pub terms: FxHashMap<Interned<QueryTerm>, RoaringBitmap>, | ||||
| pub struct PhraseDocIdsCache { | ||||
|     pub cache: FxHashMap<Interned<Phrase>, RoaringBitmap>, | ||||
| } | ||||
| impl QueryTermDocIdsCache { | ||||
| impl<'ctx> SearchContext<'ctx> { | ||||
|     /// Get the document ids associated with the given phrase | ||||
|     pub fn get_phrase_docids<'s, 'ctx>( | ||||
|         &'s mut self, | ||||
|         index: &Index, | ||||
|         txn: &'ctx RoTxn, | ||||
|         db_cache: &mut DatabaseCache<'ctx>, | ||||
|         word_interner: &DedupInterner<String>, | ||||
|         phrase_interner: &DedupInterner<Phrase>, | ||||
|         phrase: Interned<Phrase>, | ||||
|     ) -> Result<&'s RoaringBitmap> { | ||||
|         if self.phrases.contains_key(&phrase) { | ||||
|             return Ok(&self.phrases[&phrase]); | ||||
|     pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> { | ||||
|         if self.phrase_docids.cache.contains_key(&phrase) { | ||||
|             return Ok(&self.phrase_docids.cache[&phrase]); | ||||
|         }; | ||||
|         let docids = resolve_phrase(index, txn, db_cache, word_interner, phrase_interner, phrase)?; | ||||
|         let _ = self.phrases.insert(phrase, docids); | ||||
|         let docids = &self.phrases[&phrase]; | ||||
|         let docids = compute_phrase_docids(self, phrase)?; | ||||
|         let _ = self.phrase_docids.cache.insert(phrase, docids); | ||||
|         let docids = &self.phrase_docids.cache[&phrase]; | ||||
|         Ok(docids) | ||||
|     } | ||||
|     /// Get the document ids associated with the given term | ||||
|     pub fn get_query_term_docids<'s, 'ctx>( | ||||
|         &'s mut self, | ||||
|         index: &Index, | ||||
|         txn: &'ctx RoTxn, | ||||
|         db_cache: &mut DatabaseCache<'ctx>, | ||||
|         word_interner: &DedupInterner<String>, | ||||
|         term_interner: &DedupInterner<QueryTerm>, | ||||
|         phrase_interner: &DedupInterner<Phrase>, | ||||
|         term_interned: Interned<QueryTerm>, | ||||
|     ) -> Result<&'s RoaringBitmap> { | ||||
|         if self.terms.contains_key(&term_interned) { | ||||
|             return Ok(&self.terms[&term_interned]); | ||||
|         }; | ||||
| } | ||||
| pub fn compute_query_term_subset_docids( | ||||
|     ctx: &mut SearchContext, | ||||
|     term: &QueryTermSubset, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let mut docids = RoaringBitmap::new(); | ||||
|         // TODO: use a MultiOps? | ||||
|         let term = term_interner.get(term_interned); | ||||
|         for word in term.all_single_words_except_prefix_db() { | ||||
|             if let Some(word_docids) = db_cache.get_word_docids(index, txn, word_interner, word)? { | ||||
|                 docids |= | ||||
|                     RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?; | ||||
|     for word in term.all_single_words_except_prefix_db(ctx)? { | ||||
|         if let Some(word_docids) = ctx.get_db_word_docids(word)? { | ||||
|             docids |= RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?; | ||||
|         } | ||||
|     } | ||||
|         for phrase in term.all_phrases() { | ||||
|             docids |= self.get_phrase_docids( | ||||
|                 index, | ||||
|                 txn, | ||||
|                 db_cache, | ||||
|                 word_interner, | ||||
|                 phrase_interner, | ||||
|                 phrase, | ||||
|             )?; | ||||
|     for phrase in term.all_phrases(ctx)? { | ||||
|         docids |= ctx.get_phrase_docids(phrase)?; | ||||
|     } | ||||
|  | ||||
|         if let Some(prefix) = term.use_prefix_db { | ||||
|             if let Some(prefix_docids) = | ||||
|                 db_cache.get_word_prefix_docids(index, txn, word_interner, prefix)? | ||||
|             { | ||||
|     if let Some(prefix) = term.use_prefix_db(ctx) { | ||||
|         if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? { | ||||
|             docids |= | ||||
|                 RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|         let _ = self.terms.insert(term_interned, docids); | ||||
|         let docids = &self.terms[&term_interned]; | ||||
|     Ok(docids) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn resolve_query_graph( | ||||
| pub fn compute_query_graph_docids( | ||||
|     ctx: &mut SearchContext, | ||||
|     q: &QueryGraph, | ||||
|     universe: &RoaringBitmap, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let SearchContext { | ||||
|         index, | ||||
|         txn, | ||||
|         db_cache, | ||||
|         word_interner, | ||||
|         phrase_interner, | ||||
|         term_interner, | ||||
|         term_docids: query_term_docids, | ||||
|         .. | ||||
|     } = ctx; | ||||
|     // TODO: there is a faster way to compute this big | ||||
|     // TODO: there must be a faster way to compute this big | ||||
|     // roaring bitmap expression | ||||
|  | ||||
|     let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes); | ||||
| @@ -125,17 +82,13 @@ pub fn resolve_query_graph( | ||||
|         } | ||||
|  | ||||
|         let node_docids = match &node.data { | ||||
|             QueryNodeData::Term(located_term) => { | ||||
|                 let term_docids = query_term_docids.get_query_term_docids( | ||||
|                     index, | ||||
|                     txn, | ||||
|                     db_cache, | ||||
|                     word_interner, | ||||
|                     term_interner, | ||||
|                     phrase_interner, | ||||
|                     located_term.value, | ||||
|                 )?; | ||||
|                 predecessors_docids & term_docids | ||||
|             QueryNodeData::Term(LocatedQueryTermSubset { | ||||
|                 term_subset, | ||||
|                 positions: _, | ||||
|                 term_ids: _, | ||||
|             }) => { | ||||
|                 let phrase_docids = compute_query_term_subset_docids(ctx, term_subset)?; | ||||
|                 predecessors_docids & phrase_docids | ||||
|             } | ||||
|             QueryNodeData::Deleted => { | ||||
|                 panic!() | ||||
| @@ -163,15 +116,11 @@ pub fn resolve_query_graph( | ||||
|     panic!() | ||||
| } | ||||
|  | ||||
| pub fn resolve_phrase<'ctx>( | ||||
|     index: &Index, | ||||
|     txn: &'ctx RoTxn, | ||||
|     db_cache: &mut DatabaseCache<'ctx>, | ||||
|     word_interner: &DedupInterner<String>, | ||||
|     phrase_interner: &DedupInterner<Phrase>, | ||||
| pub fn compute_phrase_docids( | ||||
|     ctx: &mut SearchContext, | ||||
|     phrase: Interned<Phrase>, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let Phrase { words } = phrase_interner.get(phrase).clone(); | ||||
|     let Phrase { words } = ctx.phrase_interner.get(phrase).clone(); | ||||
|     let mut candidates = RoaringBitmap::new(); | ||||
|     let mut first_iter = true; | ||||
|     let winsize = words.len().min(3); | ||||
| @@ -195,14 +144,7 @@ pub fn resolve_phrase<'ctx>( | ||||
|                 .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) | ||||
|             { | ||||
|                 if dist == 0 { | ||||
|                     match db_cache.get_word_pair_proximity_docids( | ||||
|                         index, | ||||
|                         txn, | ||||
|                         word_interner, | ||||
|                         s1, | ||||
|                         s2, | ||||
|                         1, | ||||
|                     )? { | ||||
|                     match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? { | ||||
|                         Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), | ||||
|                         // If there are no documents for this pair, there will be no | ||||
|                         // results for the phrase query. | ||||
| @@ -211,14 +153,9 @@ pub fn resolve_phrase<'ctx>( | ||||
|                 } else { | ||||
|                     let mut bitmap = RoaringBitmap::new(); | ||||
|                     for dist in 0..=dist { | ||||
|                         if let Some(m) = db_cache.get_word_pair_proximity_docids( | ||||
|                             index, | ||||
|                             txn, | ||||
|                             word_interner, | ||||
|                             s1, | ||||
|                             s2, | ||||
|                             dist as u8 + 1, | ||||
|                         )? { | ||||
|                         if let Some(m) = | ||||
|                             ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)? | ||||
|                         { | ||||
|                             bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; | ||||
|                         } | ||||
|                     } | ||||
|   | ||||
| @@ -4,7 +4,7 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::logger::SearchLogger; | ||||
| use super::query_graph::QueryNodeData; | ||||
| use super::resolve_query_graph::resolve_query_graph; | ||||
| use super::resolve_query_graph::compute_query_graph_docids; | ||||
| use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | ||||
| use crate::{Result, TermsMatchingStrategy}; | ||||
|  | ||||
| @@ -80,7 +80,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { | ||||
|  | ||||
|         logger.log_words_state(query_graph); | ||||
|  | ||||
|         let this_bucket = resolve_query_graph(ctx, query_graph, universe)?; | ||||
|         let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?; | ||||
|  | ||||
|         let child_query_graph = query_graph.clone(); | ||||
|         loop { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user