mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Update resolve_graph module to work with lazy query terms
This commit is contained in:
		| @@ -28,12 +28,13 @@ pub use logger::{DefaultSearchLogger, SearchLogger}; | |||||||
| use query_graph::{QueryGraph, QueryNode, QueryNodeData}; | use query_graph::{QueryGraph, QueryNode, QueryNodeData}; | ||||||
| use query_term::{located_query_terms_from_string, Phrase, QueryTerm}; | use query_term::{located_query_terms_from_string, Phrase, QueryTerm}; | ||||||
| use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait}; | use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait}; | ||||||
| use resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache}; | use resolve_query_graph::PhraseDocIdsCache; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use words::Words; | use words::Words; | ||||||
|  |  | ||||||
| use self::interner::Interner; | use self::interner::Interner; | ||||||
| use self::ranking_rules::{BoxRankingRule, RankingRule}; | use self::ranking_rules::{BoxRankingRule, RankingRule}; | ||||||
|  | use self::resolve_query_graph::compute_query_graph_docids; | ||||||
| use self::sort::Sort; | use self::sort::Sort; | ||||||
| use crate::{ | use crate::{ | ||||||
|     AscDesc, Filter, Index, MatchingWords, Member, Result, SearchResult, TermsMatchingStrategy, |     AscDesc, Filter, Index, MatchingWords, Member, Result, SearchResult, TermsMatchingStrategy, | ||||||
| @@ -48,8 +49,7 @@ pub struct SearchContext<'ctx> { | |||||||
|     pub word_interner: DedupInterner<String>, |     pub word_interner: DedupInterner<String>, | ||||||
|     pub phrase_interner: DedupInterner<Phrase>, |     pub phrase_interner: DedupInterner<Phrase>, | ||||||
|     pub term_interner: Interner<QueryTerm>, |     pub term_interner: Interner<QueryTerm>, | ||||||
|     // think about memory usage of that field (roaring bitmaps in a hashmap) |     pub phrase_docids: PhraseDocIdsCache, | ||||||
|     pub term_docids: QueryTermDocIdsCache, |  | ||||||
| } | } | ||||||
| impl<'ctx> SearchContext<'ctx> { | impl<'ctx> SearchContext<'ctx> { | ||||||
|     pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self { |     pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self { | ||||||
| @@ -60,7 +60,7 @@ impl<'ctx> SearchContext<'ctx> { | |||||||
|             word_interner: <_>::default(), |             word_interner: <_>::default(), | ||||||
|             phrase_interner: <_>::default(), |             phrase_interner: <_>::default(), | ||||||
|             term_interner: <_>::default(), |             term_interner: <_>::default(), | ||||||
|             term_docids: <_>::default(), |             phrase_docids: <_>::default(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -103,7 +103,7 @@ fn resolve_maximally_reduced_query_graph( | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     logger.query_for_universe(&graph); |     logger.query_for_universe(&graph); | ||||||
|     let docids = resolve_query_graph(ctx, &graph, universe)?; |     let docids = compute_query_graph_docids(ctx, &graph, universe)?; | ||||||
|  |  | ||||||
|     Ok(docids) |     Ok(docids) | ||||||
| } | } | ||||||
| @@ -319,7 +319,7 @@ pub fn execute_search( | |||||||
|         let tokens = tokenizer.tokenize(query); |         let tokens = tokenizer.tokenize(query); | ||||||
|  |  | ||||||
|         let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?; |         let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?; | ||||||
|         let graph = QueryGraph::from_query(ctx, query_terms)?; |         let graph = QueryGraph::from_query(ctx, &query_terms)?; | ||||||
|  |  | ||||||
|         check_sort_criteria(ctx, sort_criteria.as_ref())?; |         check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -3,106 +3,63 @@ | |||||||
| use std::collections::VecDeque; | use std::collections::VecDeque; | ||||||
|  |  | ||||||
| use fxhash::FxHashMap; | use fxhash::FxHashMap; | ||||||
| use heed::{BytesDecode, RoTxn}; | use heed::BytesDecode; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::db_cache::DatabaseCache; | use super::interner::Interned; | ||||||
| use super::interner::{DedupInterner, Interned}; |  | ||||||
| use super::query_graph::QueryNodeData; | use super::query_graph::QueryNodeData; | ||||||
| use super::query_term::{Phrase, QueryTerm}; | use super::query_term::{Phrase, QueryTermSubset}; | ||||||
| use super::small_bitmap::SmallBitmap; | use super::small_bitmap::SmallBitmap; | ||||||
| use super::{QueryGraph, SearchContext}; | use super::{QueryGraph, SearchContext}; | ||||||
| use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec}; | use crate::search::new::query_term::LocatedQueryTermSubset; | ||||||
|  | use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; | ||||||
|  |  | ||||||
| #[derive(Default)] | #[derive(Default)] | ||||||
| pub struct QueryTermDocIdsCache { | pub struct PhraseDocIdsCache { | ||||||
|     pub phrases: FxHashMap<Interned<Phrase>, RoaringBitmap>, |     pub cache: FxHashMap<Interned<Phrase>, RoaringBitmap>, | ||||||
|     pub terms: FxHashMap<Interned<QueryTerm>, RoaringBitmap>, |  | ||||||
| } | } | ||||||
| impl QueryTermDocIdsCache { | impl<'ctx> SearchContext<'ctx> { | ||||||
|     /// Get the document ids associated with the given phrase |     /// Get the document ids associated with the given phrase | ||||||
|     pub fn get_phrase_docids<'s, 'ctx>( |     pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> { | ||||||
|         &'s mut self, |         if self.phrase_docids.cache.contains_key(&phrase) { | ||||||
|         index: &Index, |             return Ok(&self.phrase_docids.cache[&phrase]); | ||||||
|         txn: &'ctx RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'ctx>, |  | ||||||
|         word_interner: &DedupInterner<String>, |  | ||||||
|         phrase_interner: &DedupInterner<Phrase>, |  | ||||||
|         phrase: Interned<Phrase>, |  | ||||||
|     ) -> Result<&'s RoaringBitmap> { |  | ||||||
|         if self.phrases.contains_key(&phrase) { |  | ||||||
|             return Ok(&self.phrases[&phrase]); |  | ||||||
|         }; |         }; | ||||||
|         let docids = resolve_phrase(index, txn, db_cache, word_interner, phrase_interner, phrase)?; |         let docids = compute_phrase_docids(self, phrase)?; | ||||||
|         let _ = self.phrases.insert(phrase, docids); |         let _ = self.phrase_docids.cache.insert(phrase, docids); | ||||||
|         let docids = &self.phrases[&phrase]; |         let docids = &self.phrase_docids.cache[&phrase]; | ||||||
|         Ok(docids) |         Ok(docids) | ||||||
|     } |     } | ||||||
|     /// Get the document ids associated with the given term | } | ||||||
|     pub fn get_query_term_docids<'s, 'ctx>( | pub fn compute_query_term_subset_docids( | ||||||
|         &'s mut self, |     ctx: &mut SearchContext, | ||||||
|         index: &Index, |     term: &QueryTermSubset, | ||||||
|         txn: &'ctx RoTxn, | ) -> Result<RoaringBitmap> { | ||||||
|         db_cache: &mut DatabaseCache<'ctx>, |  | ||||||
|         word_interner: &DedupInterner<String>, |  | ||||||
|         term_interner: &DedupInterner<QueryTerm>, |  | ||||||
|         phrase_interner: &DedupInterner<Phrase>, |  | ||||||
|         term_interned: Interned<QueryTerm>, |  | ||||||
|     ) -> Result<&'s RoaringBitmap> { |  | ||||||
|         if self.terms.contains_key(&term_interned) { |  | ||||||
|             return Ok(&self.terms[&term_interned]); |  | ||||||
|         }; |  | ||||||
|     let mut docids = RoaringBitmap::new(); |     let mut docids = RoaringBitmap::new(); | ||||||
|         // TODO: use a MultiOps? |     for word in term.all_single_words_except_prefix_db(ctx)? { | ||||||
|         let term = term_interner.get(term_interned); |         if let Some(word_docids) = ctx.get_db_word_docids(word)? { | ||||||
|         for word in term.all_single_words_except_prefix_db() { |             docids |= RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?; | ||||||
|             if let Some(word_docids) = db_cache.get_word_docids(index, txn, word_interner, word)? { |  | ||||||
|                 docids |= |  | ||||||
|                     RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?; |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|         for phrase in term.all_phrases() { |     for phrase in term.all_phrases(ctx)? { | ||||||
|             docids |= self.get_phrase_docids( |         docids |= ctx.get_phrase_docids(phrase)?; | ||||||
|                 index, |  | ||||||
|                 txn, |  | ||||||
|                 db_cache, |  | ||||||
|                 word_interner, |  | ||||||
|                 phrase_interner, |  | ||||||
|                 phrase, |  | ||||||
|             )?; |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|         if let Some(prefix) = term.use_prefix_db { |     if let Some(prefix) = term.use_prefix_db(ctx) { | ||||||
|             if let Some(prefix_docids) = |         if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? { | ||||||
|                 db_cache.get_word_prefix_docids(index, txn, word_interner, prefix)? |  | ||||||
|             { |  | ||||||
|             docids |= |             docids |= | ||||||
|                 RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?; |                 RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|         let _ = self.terms.insert(term_interned, docids); |  | ||||||
|         let docids = &self.terms[&term_interned]; |  | ||||||
|     Ok(docids) |     Ok(docids) | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn resolve_query_graph( | pub fn compute_query_graph_docids( | ||||||
|     ctx: &mut SearchContext, |     ctx: &mut SearchContext, | ||||||
|     q: &QueryGraph, |     q: &QueryGraph, | ||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
| ) -> Result<RoaringBitmap> { | ) -> Result<RoaringBitmap> { | ||||||
|     let SearchContext { |     // TODO: there must be a faster way to compute this big | ||||||
|         index, |  | ||||||
|         txn, |  | ||||||
|         db_cache, |  | ||||||
|         word_interner, |  | ||||||
|         phrase_interner, |  | ||||||
|         term_interner, |  | ||||||
|         term_docids: query_term_docids, |  | ||||||
|         .. |  | ||||||
|     } = ctx; |  | ||||||
|     // TODO: there is a faster way to compute this big |  | ||||||
|     // roaring bitmap expression |     // roaring bitmap expression | ||||||
|  |  | ||||||
|     let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes); |     let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes); | ||||||
| @@ -125,17 +82,13 @@ pub fn resolve_query_graph( | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         let node_docids = match &node.data { |         let node_docids = match &node.data { | ||||||
|             QueryNodeData::Term(located_term) => { |             QueryNodeData::Term(LocatedQueryTermSubset { | ||||||
|                 let term_docids = query_term_docids.get_query_term_docids( |                 term_subset, | ||||||
|                     index, |                 positions: _, | ||||||
|                     txn, |                 term_ids: _, | ||||||
|                     db_cache, |             }) => { | ||||||
|                     word_interner, |                 let phrase_docids = compute_query_term_subset_docids(ctx, term_subset)?; | ||||||
|                     term_interner, |                 predecessors_docids & phrase_docids | ||||||
|                     phrase_interner, |  | ||||||
|                     located_term.value, |  | ||||||
|                 )?; |  | ||||||
|                 predecessors_docids & term_docids |  | ||||||
|             } |             } | ||||||
|             QueryNodeData::Deleted => { |             QueryNodeData::Deleted => { | ||||||
|                 panic!() |                 panic!() | ||||||
| @@ -163,15 +116,11 @@ pub fn resolve_query_graph( | |||||||
|     panic!() |     panic!() | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn resolve_phrase<'ctx>( | pub fn compute_phrase_docids( | ||||||
|     index: &Index, |     ctx: &mut SearchContext, | ||||||
|     txn: &'ctx RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'ctx>, |  | ||||||
|     word_interner: &DedupInterner<String>, |  | ||||||
|     phrase_interner: &DedupInterner<Phrase>, |  | ||||||
|     phrase: Interned<Phrase>, |     phrase: Interned<Phrase>, | ||||||
| ) -> Result<RoaringBitmap> { | ) -> Result<RoaringBitmap> { | ||||||
|     let Phrase { words } = phrase_interner.get(phrase).clone(); |     let Phrase { words } = ctx.phrase_interner.get(phrase).clone(); | ||||||
|     let mut candidates = RoaringBitmap::new(); |     let mut candidates = RoaringBitmap::new(); | ||||||
|     let mut first_iter = true; |     let mut first_iter = true; | ||||||
|     let winsize = words.len().min(3); |     let winsize = words.len().min(3); | ||||||
| @@ -195,14 +144,7 @@ pub fn resolve_phrase<'ctx>( | |||||||
|                 .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) |                 .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) | ||||||
|             { |             { | ||||||
|                 if dist == 0 { |                 if dist == 0 { | ||||||
|                     match db_cache.get_word_pair_proximity_docids( |                     match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? { | ||||||
|                         index, |  | ||||||
|                         txn, |  | ||||||
|                         word_interner, |  | ||||||
|                         s1, |  | ||||||
|                         s2, |  | ||||||
|                         1, |  | ||||||
|                     )? { |  | ||||||
|                         Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), |                         Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), | ||||||
|                         // If there are no documents for this pair, there will be no |                         // If there are no documents for this pair, there will be no | ||||||
|                         // results for the phrase query. |                         // results for the phrase query. | ||||||
| @@ -211,14 +153,9 @@ pub fn resolve_phrase<'ctx>( | |||||||
|                 } else { |                 } else { | ||||||
|                     let mut bitmap = RoaringBitmap::new(); |                     let mut bitmap = RoaringBitmap::new(); | ||||||
|                     for dist in 0..=dist { |                     for dist in 0..=dist { | ||||||
|                         if let Some(m) = db_cache.get_word_pair_proximity_docids( |                         if let Some(m) = | ||||||
|                             index, |                             ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)? | ||||||
|                             txn, |                         { | ||||||
|                             word_interner, |  | ||||||
|                             s1, |  | ||||||
|                             s2, |  | ||||||
|                             dist as u8 + 1, |  | ||||||
|                         )? { |  | ||||||
|                             bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; |                             bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
| use super::query_graph::QueryNodeData; | use super::query_graph::QueryNodeData; | ||||||
| use super::resolve_query_graph::resolve_query_graph; | use super::resolve_query_graph::compute_query_graph_docids; | ||||||
| use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | ||||||
| use crate::{Result, TermsMatchingStrategy}; | use crate::{Result, TermsMatchingStrategy}; | ||||||
|  |  | ||||||
| @@ -80,7 +80,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { | |||||||
|  |  | ||||||
|         logger.log_words_state(query_graph); |         logger.log_words_state(query_graph); | ||||||
|  |  | ||||||
|         let this_bucket = resolve_query_graph(ctx, query_graph, universe)?; |         let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?; | ||||||
|  |  | ||||||
|         let child_query_graph = query_graph.clone(); |         let child_query_graph = query_graph.clone(); | ||||||
|         loop { |         loop { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user