mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Always do the intersections with the universe
This commit is contained in:
		| @@ -46,36 +46,70 @@ pub struct DatabaseCache<'ctx> { | ||||
|     pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>, | ||||
| } | ||||
| impl<'ctx> DatabaseCache<'ctx> { | ||||
|     fn get_value<'v, K1, KC, DC>( | ||||
|     fn get_value<'v, K1, KC>( | ||||
|         txn: &'ctx RoTxn<'_>, | ||||
|         cache_key: K1, | ||||
|         db_key: &'v KC::EItem, | ||||
|         cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         db: Database<KC, Bytes>, | ||||
|     ) -> Result<Option<DC::DItem>> | ||||
|     ) -> Result<Option<RoaringBitmap>> | ||||
|     where | ||||
|         K1: Copy + Eq + Hash, | ||||
|         KC: BytesEncode<'v>, | ||||
|         DC: BytesDecodeOwned, | ||||
|     { | ||||
|         if let Entry::Vacant(entry) = cache.entry(cache_key) { | ||||
|             let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed); | ||||
|             entry.insert(bitmap_ptr); | ||||
|         } | ||||
|  | ||||
|         match cache.get(&cache_key).unwrap() { | ||||
|             Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes) | ||||
|         let bitmap_bytes = match cache.get(&cache_key).unwrap() { | ||||
|             Some(Cow::Borrowed(bytes)) => bytes, | ||||
|             Some(Cow::Owned(bytes)) => bytes.as_slice(), | ||||
|             None => return Ok(None), | ||||
|         }; | ||||
|  | ||||
|         match (bitmap_bytes, universe) { | ||||
|             (bytes, Some(universe)) => { | ||||
|                 CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe) | ||||
|                     .map(Some) | ||||
|                     .map_err(Into::into) | ||||
|             } | ||||
|             (bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes) | ||||
|                 .map(Some) | ||||
|                 .map_err(heed::Error::Decoding) | ||||
|                 .map_err(Into::into), | ||||
|             Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes) | ||||
|                 .map(Some) | ||||
|                 .map_err(heed::Error::Decoding) | ||||
|                 .map_err(Into::into), | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn get_value_length<'v, K1, KC>( | ||||
|         txn: &'ctx RoTxn<'_>, | ||||
|         cache_key: K1, | ||||
|         db_key: &'v KC::EItem, | ||||
|         cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>, | ||||
|         db: Database<KC, Bytes>, | ||||
|     ) -> Result<Option<u64>> | ||||
|     where | ||||
|         K1: Copy + Eq + Hash, | ||||
|         KC: BytesEncode<'v>, | ||||
|     { | ||||
|         if let Entry::Vacant(entry) = cache.entry(cache_key) { | ||||
|             let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed); | ||||
|             entry.insert(bitmap_ptr); | ||||
|         } | ||||
|  | ||||
|         let bitmap_bytes = match cache.get(&cache_key).unwrap() { | ||||
|             Some(Cow::Borrowed(bytes)) => bytes, | ||||
|             Some(Cow::Owned(bytes)) => bytes.as_slice(), | ||||
|             None => return Ok(None), | ||||
|         }; | ||||
|  | ||||
|         CboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes) | ||||
|             .map(Some) | ||||
|             .map_err(heed::Error::Decoding) | ||||
|             .map_err(Into::into) | ||||
|     } | ||||
|  | ||||
|     fn get_value_from_keys<'v, K1, KC, DC>( | ||||
|         txn: &'ctx RoTxn<'_>, | ||||
|         cache_key: K1, | ||||
| @@ -137,11 +171,15 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn word_docids(&mut self, word: Word) -> Result<Option<RoaringBitmap>> { | ||||
|     pub fn word_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word: Word, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         match word { | ||||
|             Word::Original(word) => { | ||||
|                 let exact = self.get_db_exact_word_docids(word)?; | ||||
|                 let tolerant = self.get_db_word_docids(word)?; | ||||
|                 let exact = self.get_db_exact_word_docids(universe, word)?; | ||||
|                 let tolerant = self.get_db_word_docids(universe, word)?; | ||||
|                 Ok(match (exact, tolerant) { | ||||
|                     (None, None) => None, | ||||
|                     (None, Some(tolerant)) => Some(tolerant), | ||||
| @@ -153,12 +191,16 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     } | ||||
|                 }) | ||||
|             } | ||||
|             Word::Derived(word) => self.get_db_word_docids(word), | ||||
|             Word::Derived(word) => self.get_db_word_docids(universe, word), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Retrieve or insert the given value in the `word_docids` database. | ||||
|     fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> { | ||||
|     fn get_db_word_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word: Interned<String>, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         match &self.restricted_fids { | ||||
|             Some(restricted_fids) => { | ||||
|                 let interned = self.word_interner.get(word).as_str(); | ||||
| @@ -174,11 +216,12 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     merge_cbo_roaring_bitmaps, | ||||
|                 ) | ||||
|             } | ||||
|             None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             None => DatabaseCache::get_value::<_, _>( | ||||
|                 self.txn, | ||||
|                 word, | ||||
|                 self.word_interner.get(word).as_str(), | ||||
|                 &mut self.db_cache.word_docids, | ||||
|                 universe, | ||||
|                 self.index.word_docids.remap_data_type::<Bytes>(), | ||||
|             ), | ||||
|         } | ||||
| @@ -186,6 +229,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|  | ||||
|     fn get_db_exact_word_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word: Interned<String>, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         match &self.restricted_fids { | ||||
| @@ -203,21 +247,26 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     merge_cbo_roaring_bitmaps, | ||||
|                 ) | ||||
|             } | ||||
|             None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             None => DatabaseCache::get_value::<_, _>( | ||||
|                 self.txn, | ||||
|                 word, | ||||
|                 self.word_interner.get(word).as_str(), | ||||
|                 &mut self.db_cache.exact_word_docids, | ||||
|                 universe, | ||||
|                 self.index.exact_word_docids.remap_data_type::<Bytes>(), | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> { | ||||
|     pub fn word_prefix_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         prefix: Word, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         match prefix { | ||||
|             Word::Original(prefix) => { | ||||
|                 let exact = self.get_db_exact_word_prefix_docids(prefix)?; | ||||
|                 let tolerant = self.get_db_word_prefix_docids(prefix)?; | ||||
|                 let exact = self.get_db_exact_word_prefix_docids(universe, prefix)?; | ||||
|                 let tolerant = self.get_db_word_prefix_docids(universe, prefix)?; | ||||
|                 Ok(match (exact, tolerant) { | ||||
|                     (None, None) => None, | ||||
|                     (None, Some(tolerant)) => Some(tolerant), | ||||
| @@ -229,13 +278,14 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     } | ||||
|                 }) | ||||
|             } | ||||
|             Word::Derived(prefix) => self.get_db_word_prefix_docids(prefix), | ||||
|             Word::Derived(prefix) => self.get_db_word_prefix_docids(universe, prefix), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Retrieve or insert the given value in the `word_prefix_docids` database. | ||||
|     fn get_db_word_prefix_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         prefix: Interned<String>, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         match &self.restricted_fids { | ||||
| @@ -253,11 +303,12 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     merge_cbo_roaring_bitmaps, | ||||
|                 ) | ||||
|             } | ||||
|             None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             None => DatabaseCache::get_value::<_, _>( | ||||
|                 self.txn, | ||||
|                 prefix, | ||||
|                 self.word_interner.get(prefix).as_str(), | ||||
|                 &mut self.db_cache.word_prefix_docids, | ||||
|                 universe, | ||||
|                 self.index.word_prefix_docids.remap_data_type::<Bytes>(), | ||||
|             ), | ||||
|         } | ||||
| @@ -265,6 +316,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|  | ||||
|     fn get_db_exact_word_prefix_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         prefix: Interned<String>, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         match &self.restricted_fids { | ||||
| @@ -282,11 +334,12 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     merge_cbo_roaring_bitmaps, | ||||
|                 ) | ||||
|             } | ||||
|             None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             None => DatabaseCache::get_value::<_, _>( | ||||
|                 self.txn, | ||||
|                 prefix, | ||||
|                 self.word_interner.get(prefix).as_str(), | ||||
|                 &mut self.db_cache.exact_word_prefix_docids, | ||||
|                 universe, | ||||
|                 self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(), | ||||
|             ), | ||||
|         } | ||||
| @@ -294,6 +347,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|  | ||||
|     pub fn get_db_word_pair_proximity_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word1: Interned<String>, | ||||
|         word2: Interned<String>, | ||||
|         proximity: u8, | ||||
| @@ -320,8 +374,8 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     for fid in fids { | ||||
|                         // for each field, intersect left word bitmap and right word bitmap, | ||||
|                         // then merge the result in a global bitmap before storing it in the cache. | ||||
|                         let word1_docids = self.get_db_word_fid_docids(word1, fid)?; | ||||
|                         let word2_docids = self.get_db_word_fid_docids(word2, fid)?; | ||||
|                         let word1_docids = self.get_db_word_fid_docids(universe, word1, fid)?; | ||||
|                         let word2_docids = self.get_db_word_fid_docids(universe, word2, fid)?; | ||||
|                         if let (Some(word1_docids), Some(word2_docids)) = | ||||
|                             (word1_docids, word2_docids) | ||||
|                         { | ||||
| @@ -341,7 +395,33 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|  | ||||
|                 Ok(docids) | ||||
|             } | ||||
|             ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _>( | ||||
|                 self.txn, | ||||
|                 (proximity, word1, word2), | ||||
|                 &( | ||||
|                     proximity, | ||||
|                     self.word_interner.get(word1).as_str(), | ||||
|                     self.word_interner.get(word2).as_str(), | ||||
|                 ), | ||||
|                 &mut self.db_cache.word_pair_proximity_docids, | ||||
|                 universe, | ||||
|                 self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(), | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_pair_proximity_docids_len( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word1: Interned<String>, | ||||
|         word2: Interned<String>, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<u64>> { | ||||
|         match self.index.proximity_precision(self.txn)?.unwrap_or_default() { | ||||
|             ProximityPrecision::ByAttribute => Ok(self | ||||
|                 .get_db_word_pair_proximity_docids(universe, word1, word2, proximity)? | ||||
|                 .map(|d| d.len())), | ||||
|             ProximityPrecision::ByWord => DatabaseCache::get_value_length::<_, _>( | ||||
|                 self.txn, | ||||
|                 (proximity, word1, word2), | ||||
|                 &( | ||||
| @@ -355,34 +435,9 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_pair_proximity_docids_len( | ||||
|         &mut self, | ||||
|         word1: Interned<String>, | ||||
|         word2: Interned<String>, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<u64>> { | ||||
|         match self.index.proximity_precision(self.txn)?.unwrap_or_default() { | ||||
|             ProximityPrecision::ByAttribute => Ok(self | ||||
|                 .get_db_word_pair_proximity_docids(word1, word2, proximity)? | ||||
|                 .map(|d| d.len())), | ||||
|             ProximityPrecision::ByWord => { | ||||
|                 DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( | ||||
|                     self.txn, | ||||
|                     (proximity, word1, word2), | ||||
|                     &( | ||||
|                         proximity, | ||||
|                         self.word_interner.get(word1).as_str(), | ||||
|                         self.word_interner.get(word2).as_str(), | ||||
|                     ), | ||||
|                     &mut self.db_cache.word_pair_proximity_docids, | ||||
|                     self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(), | ||||
|                 ) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_prefix_pair_proximity_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word1: Interned<String>, | ||||
|         prefix2: Interned<String>, | ||||
|         mut proximity: u8, | ||||
| @@ -409,8 +464,9 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     // for each field, intersect left word bitmap and right word bitmap, | ||||
|                     // then merge the result in a global bitmap before storing it in the cache. | ||||
|                     for fid in fids { | ||||
|                         let word1_docids = self.get_db_word_fid_docids(word1, fid)?; | ||||
|                         let prefix2_docids = self.get_db_word_prefix_fid_docids(prefix2, fid)?; | ||||
|                         let word1_docids = self.get_db_word_fid_docids(universe, word1, fid)?; | ||||
|                         let prefix2_docids = | ||||
|                             self.get_db_word_prefix_fid_docids(universe, prefix2, fid)?; | ||||
|                         if let (Some(word1_docids), Some(prefix2_docids)) = | ||||
|                             (word1_docids, prefix2_docids) | ||||
|                         { | ||||
| @@ -452,16 +508,18 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|  | ||||
|     pub fn get_db_prefix_word_pair_proximity_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         left_prefix: Interned<String>, | ||||
|         right: Interned<String>, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         // only accept exact matches on reverted positions | ||||
|         self.get_db_word_pair_proximity_docids(left_prefix, right, proximity) | ||||
|         self.get_db_word_pair_proximity_docids(universe, left_prefix, right, proximity) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_fid_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word: Interned<String>, | ||||
|         fid: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
| @@ -470,17 +528,19 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             return Ok(None); | ||||
|         } | ||||
|  | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|         DatabaseCache::get_value::<_, _>( | ||||
|             self.txn, | ||||
|             (word, fid), | ||||
|             &(self.word_interner.get(word).as_str(), fid), | ||||
|             &mut self.db_cache.word_fid_docids, | ||||
|             universe, | ||||
|             self.index.word_fid_docids.remap_data_type::<Bytes>(), | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_prefix_fid_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word_prefix: Interned<String>, | ||||
|         fid: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
| @@ -489,11 +549,12 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             return Ok(None); | ||||
|         } | ||||
|  | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|         DatabaseCache::get_value::<_, _>( | ||||
|             self.txn, | ||||
|             (word_prefix, fid), | ||||
|             &(self.word_interner.get(word_prefix).as_str(), fid), | ||||
|             &mut self.db_cache.word_prefix_fid_docids, | ||||
|             universe, | ||||
|             self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(), | ||||
|         ) | ||||
|     } | ||||
| @@ -554,28 +615,32 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|  | ||||
|     pub fn get_db_word_position_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word: Interned<String>, | ||||
|         position: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|         DatabaseCache::get_value::<_, _>( | ||||
|             self.txn, | ||||
|             (word, position), | ||||
|             &(self.word_interner.get(word).as_str(), position), | ||||
|             &mut self.db_cache.word_position_docids, | ||||
|             universe, | ||||
|             self.index.word_position_docids.remap_data_type::<Bytes>(), | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_prefix_position_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         word_prefix: Interned<String>, | ||||
|         position: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|         DatabaseCache::get_value::<_, _>( | ||||
|             self.txn, | ||||
|             (word_prefix, position), | ||||
|             &(self.word_interner.get(word_prefix).as_str(), position), | ||||
|             &mut self.db_cache.word_prefix_position_docids, | ||||
|             universe, | ||||
|             self.index.word_prefix_position_docids.remap_data_type::<Bytes>(), | ||||
|         ) | ||||
|     } | ||||
|   | ||||
| @@ -171,9 +171,10 @@ impl State { | ||||
|                 // Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of | ||||
|                 // longer phrases we'll be losing on precision here. | ||||
|                 let bucketed_position = crate::bucketed_position(position + offset); | ||||
|                 let word_position_docids = | ||||
|                     ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default() | ||||
|                         & universe; | ||||
|                 let word_position_docids = ctx | ||||
|                     .get_db_word_position_docids(Some(universe), *word, bucketed_position)? | ||||
|                     .unwrap_or_default() | ||||
|                     & universe; | ||||
|                 candidates &= word_position_docids; | ||||
|                 if candidates.is_empty() { | ||||
|                     return Ok(State::Empty(query_graph.clone())); | ||||
| @@ -199,7 +200,9 @@ impl State { | ||||
|                     // ignore stop words words in phrases | ||||
|                     .flatten() | ||||
|                     .map(|word| -> Result<_> { | ||||
|                         Ok(ctx.get_db_word_fid_docids(*word, fid)?.unwrap_or_default()) | ||||
|                         Ok(ctx | ||||
|                             .get_db_word_fid_docids(Some(universe), *word, fid)? | ||||
|                             .unwrap_or_default()) | ||||
|                     }), | ||||
|             )?; | ||||
|             intersection &= &candidates; | ||||
|   | ||||
| @@ -232,11 +232,12 @@ fn resolve_universe( | ||||
| #[tracing::instrument(level = "trace", skip_all, target = "search::query")] | ||||
| fn resolve_negative_words( | ||||
|     ctx: &mut SearchContext<'_>, | ||||
|     universe: Option<&RoaringBitmap>, | ||||
|     negative_words: &[Word], | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let mut negative_bitmap = RoaringBitmap::new(); | ||||
|     for &word in negative_words { | ||||
|         if let Some(bitmap) = ctx.word_docids(word)? { | ||||
|         if let Some(bitmap) = ctx.word_docids(universe, word)? { | ||||
|             negative_bitmap |= bitmap; | ||||
|         } | ||||
|     } | ||||
| @@ -246,13 +247,14 @@ fn resolve_negative_words( | ||||
| #[tracing::instrument(level = "trace", skip_all, target = "search::query")] | ||||
| fn resolve_negative_phrases( | ||||
|     ctx: &mut SearchContext<'_>, | ||||
|     universe: Option<&RoaringBitmap>, | ||||
|     negative_phrases: &[LocatedQueryTerm], | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let mut negative_bitmap = RoaringBitmap::new(); | ||||
|     for term in negative_phrases { | ||||
|         let query_term = ctx.term_interner.get(term.value); | ||||
|         if let Some(phrase) = query_term.original_phrase() { | ||||
|             negative_bitmap |= ctx.get_phrase_docids(phrase)?; | ||||
|             negative_bitmap |= ctx.get_phrase_docids(universe, phrase)?; | ||||
|         } | ||||
|     } | ||||
|     Ok(negative_bitmap) | ||||
| @@ -686,8 +688,8 @@ pub fn execute_search( | ||||
|             located_query_terms_from_tokens(ctx, tokens, words_limit)?; | ||||
|         used_negative_operator = !negative_words.is_empty() || !negative_phrases.is_empty(); | ||||
|  | ||||
|         let ignored_documents = resolve_negative_words(ctx, &negative_words)?; | ||||
|         let ignored_phrases = resolve_negative_phrases(ctx, &negative_phrases)?; | ||||
|         let ignored_documents = resolve_negative_words(ctx, Some(&universe), &negative_words)?; | ||||
|         let ignored_phrases = resolve_negative_phrases(ctx, Some(&universe), &negative_phrases)?; | ||||
|  | ||||
|         universe -= ignored_documents; | ||||
|         universe -= ignored_phrases; | ||||
|   | ||||
| @@ -417,7 +417,7 @@ fn split_best_frequency( | ||||
|         let left = ctx.word_interner.insert(left.to_owned()); | ||||
|         let right = ctx.word_interner.insert(right.to_owned()); | ||||
|  | ||||
|         if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? { | ||||
|         if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(None, left, right, 1)? { | ||||
|             if best.map_or(true, |(old, _, _)| frequency > old) { | ||||
|                 best = Some((frequency, left, right)); | ||||
|             } | ||||
|   | ||||
| @@ -26,18 +26,15 @@ fn compute_docids( | ||||
|     } else { | ||||
|         return Ok(Default::default()); | ||||
|     }; | ||||
|     let mut candidates = match exact_term { | ||||
|         ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(), | ||||
|  | ||||
|     let candidates = match exact_term { | ||||
|         // TODO I move the intersection here | ||||
|         ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(Some(universe), phrase)? & universe, | ||||
|         ExactTerm::Word(word) => { | ||||
|             if let Some(word_candidates) = ctx.word_docids(Word::Original(word))? { | ||||
|                 word_candidates | ||||
|             } else { | ||||
|                 return Ok(Default::default()); | ||||
|             } | ||||
|             ctx.word_docids(Some(universe), Word::Original(word))?.unwrap_or_default() | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     candidates &= universe; | ||||
|     Ok(candidates) | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -30,8 +30,12 @@ impl RankingRuleGraphTrait for FidGraph { | ||||
|  | ||||
|         let docids = if let Some(fid) = condition.fid { | ||||
|             // maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument | ||||
|             let docids = | ||||
|                 compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?; | ||||
|             let docids = compute_query_term_subset_docids_within_field_id( | ||||
|                 ctx, | ||||
|                 Some(universe), | ||||
|                 &term.term_subset, | ||||
|                 fid, | ||||
|             )?; | ||||
|             docids & universe | ||||
|         } else { | ||||
|             RoaringBitmap::new() | ||||
|   | ||||
| @@ -33,6 +33,7 @@ impl RankingRuleGraphTrait for PositionGraph { | ||||
|             docids |= universe | ||||
|                 & compute_query_term_subset_docids_within_position( | ||||
|                     ctx, | ||||
|                     Some(universe), | ||||
|                     &term.term_subset, | ||||
|                     *position, | ||||
|                 )?; | ||||
|   | ||||
| @@ -74,10 +74,10 @@ pub fn compute_docids( | ||||
|         if right_derivs.len() > 1 { | ||||
|             let universe = &universe; | ||||
|             if let Some(left_phrase) = left_phrase { | ||||
|                 if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) { | ||||
|                 if universe.is_disjoint(ctx.get_phrase_docids(Some(universe), left_phrase)?) { | ||||
|                     continue; | ||||
|                 } | ||||
|             } else if let Some(left_word_docids) = ctx.word_docids(left_word)? { | ||||
|             } else if let Some(left_word_docids) = ctx.word_docids(Some(universe), left_word)? { | ||||
|                 if universe.is_disjoint(&left_word_docids) { | ||||
|                     continue; | ||||
|                 } | ||||
| @@ -123,7 +123,10 @@ fn compute_prefix_edges( | ||||
|  | ||||
|     let mut universe = universe.clone(); | ||||
|     if let Some(phrase) = left_phrase { | ||||
|         let phrase_docids = ctx.get_phrase_docids(phrase)?; | ||||
|         // TODO we can clearly give the universe to this method | ||||
|         //      Unfortunately, it is deserializing/computing stuff and | ||||
|         //      keeping the result as a materialized bitmap. | ||||
|         let phrase_docids = ctx.get_phrase_docids(Some(&universe), phrase)?; | ||||
|         if !phrase_docids.is_empty() { | ||||
|             used_left_phrases.insert(phrase); | ||||
|         } | ||||
| @@ -133,9 +136,13 @@ fn compute_prefix_edges( | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if let Some(new_docids) = | ||||
|         ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)? | ||||
|     { | ||||
|     // TODO check that the fact that the universe always changes is not an issue, e.g. caching stuff. | ||||
|     if let Some(new_docids) = ctx.get_db_word_prefix_pair_proximity_docids( | ||||
|         Some(&universe), | ||||
|         left_word, | ||||
|         right_prefix, | ||||
|         forward_proximity, | ||||
|     )? { | ||||
|         let new_docids = &universe & new_docids; | ||||
|         if !new_docids.is_empty() { | ||||
|             used_left_words.insert(left_word); | ||||
| @@ -147,6 +154,7 @@ fn compute_prefix_edges( | ||||
|     // No swapping when computing the proximity between a phrase and a word | ||||
|     if left_phrase.is_none() { | ||||
|         if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids( | ||||
|             Some(&universe), | ||||
|             right_prefix, | ||||
|             left_word, | ||||
|             backward_proximity, | ||||
| @@ -177,26 +185,29 @@ fn compute_non_prefix_edges( | ||||
|     let mut universe = universe.clone(); | ||||
|  | ||||
|     for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() { | ||||
|         let phrase_docids = ctx.get_phrase_docids(phrase)?; | ||||
|         // TODO do the intersection in the method, again! | ||||
|         let phrase_docids = ctx.get_phrase_docids(Some(&universe), phrase)?; | ||||
|         universe &= phrase_docids; | ||||
|         if universe.is_empty() { | ||||
|             return Ok(()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // TODO check that it is not an issue to alterate the universe | ||||
|     if let Some(new_docids) = | ||||
|         ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)? | ||||
|         ctx.get_db_word_pair_proximity_docids(Some(&universe), word1, word2, forward_proximity)? | ||||
|     { | ||||
|         let new_docids = &universe & new_docids; | ||||
|         if !new_docids.is_empty() { | ||||
|             *docids |= new_docids; | ||||
|         } | ||||
|     } | ||||
|     if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() { | ||||
|         if let Some(new_docids) = | ||||
|             ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)? | ||||
|         { | ||||
|             let new_docids = &universe & new_docids; | ||||
|         if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids( | ||||
|             Some(&universe), | ||||
|             word2, | ||||
|             word1, | ||||
|             backward_proximity, | ||||
|         )? { | ||||
|             if !new_docids.is_empty() { | ||||
|                 *docids |= new_docids; | ||||
|             } | ||||
|   | ||||
| @@ -19,11 +19,16 @@ pub struct PhraseDocIdsCache { | ||||
| } | ||||
| impl<'ctx> SearchContext<'ctx> { | ||||
|     /// Get the document ids associated with the given phrase | ||||
|     pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> { | ||||
|     pub fn get_phrase_docids( | ||||
|         &mut self, | ||||
|         universe: Option<&RoaringBitmap>, | ||||
|         phrase: Interned<Phrase>, | ||||
|     ) -> Result<&RoaringBitmap> { | ||||
|         if self.phrase_docids.cache.contains_key(&phrase) { | ||||
|             return Ok(&self.phrase_docids.cache[&phrase]); | ||||
|         }; | ||||
|         let docids = compute_phrase_docids(self, phrase)?; | ||||
|         let docids = compute_phrase_docids(self, universe, phrase)?; | ||||
|         // TODO can we improve that? Because there is an issue, we keep that in cache... | ||||
|         let _ = self.phrase_docids.cache.insert(phrase, docids); | ||||
|         let docids = &self.phrase_docids.cache[&phrase]; | ||||
|         Ok(docids) | ||||
| @@ -35,17 +40,18 @@ pub fn compute_query_term_subset_docids( | ||||
|     term: &QueryTermSubset, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let mut docids = RoaringBitmap::new(); | ||||
|     // TODO use the MultiOps trait to do large intersections | ||||
|     for word in term.all_single_words_except_prefix_db(ctx)? { | ||||
|         if let Some(word_docids) = ctx.word_docids(word)? { | ||||
|         if let Some(word_docids) = ctx.word_docids(universe, word)? { | ||||
|             docids |= word_docids; | ||||
|         } | ||||
|     } | ||||
|     for phrase in term.all_phrases(ctx)? { | ||||
|         docids |= ctx.get_phrase_docids(phrase)?; | ||||
|         docids |= ctx.get_phrase_docids(universe, phrase)?; | ||||
|     } | ||||
|  | ||||
|     if let Some(prefix) = term.use_prefix_db(ctx) { | ||||
|         if let Some(prefix_docids) = ctx.word_prefix_docids(prefix)? { | ||||
|         if let Some(prefix_docids) = ctx.word_prefix_docids(universe, prefix)? { | ||||
|             docids |= prefix_docids; | ||||
|         } | ||||
|     } | ||||
| @@ -58,12 +64,13 @@ pub fn compute_query_term_subset_docids( | ||||
|  | ||||
| pub fn compute_query_term_subset_docids_within_field_id( | ||||
|     ctx: &mut SearchContext<'_>, | ||||
|     universe: Option<&RoaringBitmap>, | ||||
|     term: &QueryTermSubset, | ||||
|     fid: u16, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let mut docids = RoaringBitmap::new(); | ||||
|     for word in term.all_single_words_except_prefix_db(ctx)? { | ||||
|         if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(word.interned(), fid)? { | ||||
|         if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(universe, word.interned(), fid)? { | ||||
|             docids |= word_fid_docids; | ||||
|         } | ||||
|     } | ||||
| @@ -72,15 +79,15 @@ pub fn compute_query_term_subset_docids_within_field_id( | ||||
|         // There may be false positives when resolving a phrase, so we're not | ||||
|         // guaranteed that all of its words are within a single fid. | ||||
|         if let Some(word) = phrase.words(ctx).iter().flatten().next() { | ||||
|             if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(*word, fid)? { | ||||
|                 docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids; | ||||
|             if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(universe, *word, fid)? { | ||||
|                 docids |= ctx.get_phrase_docids(Some(&word_fid_docids), phrase)?; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if let Some(word_prefix) = term.use_prefix_db(ctx) { | ||||
|         if let Some(word_fid_docids) = | ||||
|             ctx.get_db_word_prefix_fid_docids(word_prefix.interned(), fid)? | ||||
|             ctx.get_db_word_prefix_fid_docids(universe, word_prefix.interned(), fid)? | ||||
|         { | ||||
|             docids |= word_fid_docids; | ||||
|         } | ||||
| @@ -91,13 +98,14 @@ pub fn compute_query_term_subset_docids_within_field_id( | ||||
|  | ||||
| pub fn compute_query_term_subset_docids_within_position( | ||||
|     ctx: &mut SearchContext<'_>, | ||||
|     universe: Option<&RoaringBitmap>, | ||||
|     term: &QueryTermSubset, | ||||
|     position: u16, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let mut docids = RoaringBitmap::new(); | ||||
|     for word in term.all_single_words_except_prefix_db(ctx)? { | ||||
|         if let Some(word_position_docids) = | ||||
|             ctx.get_db_word_position_docids(word.interned(), position)? | ||||
|             ctx.get_db_word_position_docids(universe, word.interned(), position)? | ||||
|         { | ||||
|             docids |= word_position_docids; | ||||
|         } | ||||
| @@ -107,15 +115,17 @@ pub fn compute_query_term_subset_docids_within_position( | ||||
|         // It's difficult to know the expected position of the words in the phrase, | ||||
|         // so instead we just check the first one. | ||||
|         if let Some(word) = phrase.words(ctx).iter().flatten().next() { | ||||
|             if let Some(word_position_docids) = ctx.get_db_word_position_docids(*word, position)? { | ||||
|                 docids |= ctx.get_phrase_docids(phrase)? & word_position_docids | ||||
|             if let Some(word_position_docids) = | ||||
|                 ctx.get_db_word_position_docids(universe, *word, position)? | ||||
|             { | ||||
|                 docids |= ctx.get_phrase_docids(Some(&word_position_docids), phrase)?; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if let Some(word_prefix) = term.use_prefix_db(ctx) { | ||||
|         if let Some(word_position_docids) = | ||||
|             ctx.get_db_word_prefix_position_docids(word_prefix.interned(), position)? | ||||
|             ctx.get_db_word_prefix_position_docids(universe, word_prefix.interned(), position)? | ||||
|         { | ||||
|             docids |= word_position_docids; | ||||
|         } | ||||
| @@ -180,6 +190,7 @@ pub fn compute_query_graph_docids( | ||||
|  | ||||
| pub fn compute_phrase_docids( | ||||
|     ctx: &mut SearchContext<'_>, | ||||
|     universe: Option<&RoaringBitmap>, | ||||
|     phrase: Interned<Phrase>, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let Phrase { words } = ctx.phrase_interner.get(phrase).clone(); | ||||
| @@ -189,7 +200,7 @@ pub fn compute_phrase_docids( | ||||
|     } | ||||
|     let mut candidates = RoaringBitmap::new(); | ||||
|     for word in words.iter().flatten().copied() { | ||||
|         if let Some(word_docids) = ctx.word_docids(Word::Original(word))? { | ||||
|         if let Some(word_docids) = ctx.word_docids(universe, Word::Original(word))? { | ||||
|             candidates |= word_docids; | ||||
|         } else { | ||||
|             return Ok(RoaringBitmap::new()); | ||||
| @@ -213,7 +224,7 @@ pub fn compute_phrase_docids( | ||||
|                 .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) | ||||
|             { | ||||
|                 if dist == 0 { | ||||
|                     match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? { | ||||
|                     match ctx.get_db_word_pair_proximity_docids(universe, s1, s2, 1)? { | ||||
|                         Some(m) => bitmaps.push(m), | ||||
|                         // If there are no documents for this pair, there will be no | ||||
|                         // results for the phrase query. | ||||
| @@ -223,7 +234,7 @@ pub fn compute_phrase_docids( | ||||
|                     let mut bitmap = RoaringBitmap::new(); | ||||
|                     for dist in 0..=dist { | ||||
|                         if let Some(m) = | ||||
|                             ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)? | ||||
|                             ctx.get_db_word_pair_proximity_docids(universe, s1, s2, dist as u8 + 1)? | ||||
|                         { | ||||
|                             bitmap |= m; | ||||
|                         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user