mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Intern all strings and phrases in the search logic
This commit is contained in:
		| @@ -1,51 +1,48 @@ | |||||||
| use std::collections::hash_map::Entry; | use super::{interner::Interned, SearchContext}; | ||||||
|  | use crate::Result; | ||||||
| use fxhash::FxHashMap; | use fxhash::FxHashMap; | ||||||
| use heed::types::ByteSlice; | use heed::types::ByteSlice; | ||||||
| use heed::RoTxn; | use std::collections::hash_map::Entry; | ||||||
|  |  | ||||||
| use crate::{Index, Result}; |  | ||||||
|  |  | ||||||
| #[derive(Default)] | #[derive(Default)] | ||||||
| pub struct DatabaseCache<'transaction> { | pub struct DatabaseCache<'search> { | ||||||
|     pub word_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, |     // TODO: interner for all database cache keys | ||||||
|  |     pub word_pair_proximity_docids: | ||||||
|  |         FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>, | ||||||
|     pub word_prefix_pair_proximity_docids: |     pub word_prefix_pair_proximity_docids: | ||||||
|         FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, |         FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>, | ||||||
|     pub prefix_word_pair_proximity_docids: |     pub prefix_word_pair_proximity_docids: | ||||||
|         FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, |         FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>, | ||||||
|     pub word_docids: FxHashMap<String, Option<&'transaction [u8]>>, |     pub word_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>, | ||||||
|     pub exact_word_docids: FxHashMap<String, Option<&'transaction [u8]>>, |     pub exact_word_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>, | ||||||
|     pub word_prefix_docids: FxHashMap<String, Option<&'transaction [u8]>>, |     pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>, | ||||||
| } | } | ||||||
| impl<'transaction> DatabaseCache<'transaction> { | impl<'search> SearchContext<'search> { | ||||||
|     pub fn get_word_docids( |     pub fn get_word_docids(&mut self, word: Interned<String>) -> Result<Option<&'search [u8]>> { | ||||||
|         &mut self, |         let bitmap_ptr = match self.db_cache.word_docids.entry(word) { | ||||||
|         index: &Index, |  | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         word: &str, |  | ||||||
|     ) -> Result<Option<&'transaction [u8]>> { |  | ||||||
|         let bitmap_ptr = match self.word_docids.entry(word.to_owned()) { |  | ||||||
|             Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), |             Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), | ||||||
|             Entry::Vacant(entry) => { |             Entry::Vacant(entry) => { | ||||||
|                 let bitmap_ptr = index.word_docids.remap_data_type::<ByteSlice>().get(txn, word)?; |                 let bitmap_ptr = self | ||||||
|  |                     .index | ||||||
|  |                     .word_docids | ||||||
|  |                     .remap_data_type::<ByteSlice>() | ||||||
|  |                     .get(self.txn, self.word_interner.get(word))?; | ||||||
|                 entry.insert(bitmap_ptr); |                 entry.insert(bitmap_ptr); | ||||||
|                 bitmap_ptr |                 bitmap_ptr | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|         Ok(bitmap_ptr) |         Ok(bitmap_ptr) | ||||||
|     } |     } | ||||||
|     pub fn get_prefix_docids( |     pub fn get_prefix_docids(&mut self, prefix: Interned<String>) -> Result<Option<&'search [u8]>> { | ||||||
|         &mut self, |  | ||||||
|         index: &Index, |  | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         prefix: &str, |  | ||||||
|     ) -> Result<Option<&'transaction [u8]>> { |  | ||||||
|         // In the future, this will be a frozen roaring bitmap |         // In the future, this will be a frozen roaring bitmap | ||||||
|         let bitmap_ptr = match self.word_prefix_docids.entry(prefix.to_owned()) { |         let bitmap_ptr = match self.db_cache.word_prefix_docids.entry(prefix) { | ||||||
|             Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), |             Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), | ||||||
|             Entry::Vacant(entry) => { |             Entry::Vacant(entry) => { | ||||||
|                 let bitmap_ptr = |                 let bitmap_ptr = self | ||||||
|                     index.word_prefix_docids.remap_data_type::<ByteSlice>().get(txn, prefix)?; |                     .index | ||||||
|  |                     .word_prefix_docids | ||||||
|  |                     .remap_data_type::<ByteSlice>() | ||||||
|  |                     .get(self.txn, self.word_interner.get(prefix))?; | ||||||
|                 entry.insert(bitmap_ptr); |                 entry.insert(bitmap_ptr); | ||||||
|                 bitmap_ptr |                 bitmap_ptr | ||||||
|             } |             } | ||||||
| @@ -55,14 +52,12 @@ impl<'transaction> DatabaseCache<'transaction> { | |||||||
|  |  | ||||||
|     pub fn get_word_pair_proximity_docids( |     pub fn get_word_pair_proximity_docids( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         word1: Interned<String>, | ||||||
|         txn: &'transaction RoTxn, |         word2: Interned<String>, | ||||||
|         word1: &str, |  | ||||||
|         word2: &str, |  | ||||||
|         proximity: u8, |         proximity: u8, | ||||||
|     ) -> Result<Option<&'transaction [u8]>> { |     ) -> Result<Option<&'search [u8]>> { | ||||||
|         let key = (proximity, word1.to_owned(), word2.to_owned()); |         let key = (proximity, word1, word2); | ||||||
|         match self.word_pair_proximity_docids.entry(key.clone()) { |         match self.db_cache.word_pair_proximity_docids.entry(key) { | ||||||
|             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), |             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), | ||||||
|             Entry::Vacant(entry) => { |             Entry::Vacant(entry) => { | ||||||
|                 // We shouldn't greedily access this DB at all |                 // We shouldn't greedily access this DB at all | ||||||
| @@ -86,10 +81,11 @@ impl<'transaction> DatabaseCache<'transaction> { | |||||||
|                 //          output.push(word1, word2, proximities); |                 //          output.push(word1, word2, proximities); | ||||||
|                 //      } |                 //      } | ||||||
|                 //  } |                 //  } | ||||||
|                 let bitmap_ptr = index |                 let bitmap_ptr = | ||||||
|                     .word_pair_proximity_docids |                     self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>().get( | ||||||
|                     .remap_data_type::<ByteSlice>() |                         self.txn, | ||||||
|                     .get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; |                         &(key.0, self.word_interner.get(key.1), self.word_interner.get(key.2)), | ||||||
|  |                     )?; | ||||||
|                 entry.insert(bitmap_ptr); |                 entry.insert(bitmap_ptr); | ||||||
|                 Ok(bitmap_ptr) |                 Ok(bitmap_ptr) | ||||||
|             } |             } | ||||||
| @@ -98,20 +94,22 @@ impl<'transaction> DatabaseCache<'transaction> { | |||||||
|  |  | ||||||
|     pub fn get_word_prefix_pair_proximity_docids( |     pub fn get_word_prefix_pair_proximity_docids( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         word1: Interned<String>, | ||||||
|         txn: &'transaction RoTxn, |         prefix2: Interned<String>, | ||||||
|         word1: &str, |  | ||||||
|         prefix2: &str, |  | ||||||
|         proximity: u8, |         proximity: u8, | ||||||
|     ) -> Result<Option<&'transaction [u8]>> { |     ) -> Result<Option<&'search [u8]>> { | ||||||
|         let key = (proximity, word1.to_owned(), prefix2.to_owned()); |         let key = (proximity, word1, prefix2); | ||||||
|         match self.word_prefix_pair_proximity_docids.entry(key.clone()) { |         match self.db_cache.word_prefix_pair_proximity_docids.entry(key) { | ||||||
|             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), |             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), | ||||||
|             Entry::Vacant(entry) => { |             Entry::Vacant(entry) => { | ||||||
|                 let bitmap_ptr = index |                 let bitmap_ptr = self | ||||||
|  |                     .index | ||||||
|                     .word_prefix_pair_proximity_docids |                     .word_prefix_pair_proximity_docids | ||||||
|                     .remap_data_type::<ByteSlice>() |                     .remap_data_type::<ByteSlice>() | ||||||
|                     .get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; |                     .get( | ||||||
|  |                         self.txn, | ||||||
|  |                         &(key.0, self.word_interner.get(key.1), self.word_interner.get(key.2)), | ||||||
|  |                     )?; | ||||||
|                 entry.insert(bitmap_ptr); |                 entry.insert(bitmap_ptr); | ||||||
|                 Ok(bitmap_ptr) |                 Ok(bitmap_ptr) | ||||||
|             } |             } | ||||||
| @@ -119,20 +117,26 @@ impl<'transaction> DatabaseCache<'transaction> { | |||||||
|     } |     } | ||||||
|     pub fn get_prefix_word_pair_proximity_docids( |     pub fn get_prefix_word_pair_proximity_docids( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         left_prefix: Interned<String>, | ||||||
|         txn: &'transaction RoTxn, |         right: Interned<String>, | ||||||
|         left_prefix: &str, |  | ||||||
|         right: &str, |  | ||||||
|         proximity: u8, |         proximity: u8, | ||||||
|     ) -> Result<Option<&'transaction [u8]>> { |     ) -> Result<Option<&'search [u8]>> { | ||||||
|         let key = (proximity, left_prefix.to_owned(), right.to_owned()); |         let key = (proximity, left_prefix, right); | ||||||
|         match self.prefix_word_pair_proximity_docids.entry(key) { |         match self.db_cache.prefix_word_pair_proximity_docids.entry(key) { | ||||||
|             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), |             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), | ||||||
|             Entry::Vacant(entry) => { |             Entry::Vacant(entry) => { | ||||||
|                 let bitmap_ptr = index |                 let bitmap_ptr = self | ||||||
|  |                     .index | ||||||
|                     .prefix_word_pair_proximity_docids |                     .prefix_word_pair_proximity_docids | ||||||
|                     .remap_data_type::<ByteSlice>() |                     .remap_data_type::<ByteSlice>() | ||||||
|                     .get(txn, &(proximity, left_prefix, right))?; |                     .get( | ||||||
|  |                         self.txn, | ||||||
|  |                         &( | ||||||
|  |                             proximity, | ||||||
|  |                             self.word_interner.get(left_prefix), | ||||||
|  |                             self.word_interner.get(right), | ||||||
|  |                         ), | ||||||
|  |                     )?; | ||||||
|                 entry.insert(bitmap_ptr); |                 entry.insert(bitmap_ptr); | ||||||
|                 Ok(bitmap_ptr) |                 Ok(bitmap_ptr) | ||||||
|             } |             } | ||||||
|   | |||||||
| @@ -1,15 +1,11 @@ | |||||||
| use heed::RoTxn; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::db_cache::DatabaseCache; |  | ||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
| use super::ranking_rule_graph::EdgeDocidsCache; | use super::ranking_rule_graph::EdgeDocidsCache; | ||||||
| use super::ranking_rule_graph::EmptyPathsCache; | use super::ranking_rule_graph::EmptyPathsCache; | ||||||
|  |  | ||||||
| use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait}; | use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait}; | ||||||
|  | use super::SearchContext; | ||||||
| use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput}; | use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput}; | ||||||
|  | use crate::Result; | ||||||
| use crate::{Index, Result}; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> { | pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> { | ||||||
|     id: String, |     id: String, | ||||||
| @@ -29,12 +25,10 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> { | |||||||
|     cur_distance_idx: usize, |     cur_distance_idx: usize, | ||||||
| } | } | ||||||
|  |  | ||||||
| fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>( | fn remove_empty_edges<'search, G: RankingRuleGraphTrait>( | ||||||
|  |     ctx: &mut SearchContext<'search>, | ||||||
|     graph: &mut RankingRuleGraph<G>, |     graph: &mut RankingRuleGraph<G>, | ||||||
|     edge_docids_cache: &mut EdgeDocidsCache<G>, |     edge_docids_cache: &mut EdgeDocidsCache<G>, | ||||||
|     index: &Index, |  | ||||||
|     txn: &'transaction RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
|     empty_paths_cache: &mut EmptyPathsCache, |     empty_paths_cache: &mut EmptyPathsCache, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
| @@ -42,8 +36,7 @@ fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>( | |||||||
|         if graph.all_edges[edge_index as usize].is_none() { |         if graph.all_edges[edge_index as usize].is_none() { | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|         let docids = edge_docids_cache |         let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?; | ||||||
|             .get_edge_docids(index, txn, db_cache, edge_index, &*graph, universe)?; |  | ||||||
|         match docids { |         match docids { | ||||||
|             BitmapOrAllRef::Bitmap(bitmap) => { |             BitmapOrAllRef::Bitmap(bitmap) => { | ||||||
|                 if bitmap.is_disjoint(universe) { |                 if bitmap.is_disjoint(universe) { | ||||||
| @@ -59,7 +52,7 @@ fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>( | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGraph> | impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph> | ||||||
|     for GraphBasedRankingRule<G> |     for GraphBasedRankingRule<G> | ||||||
| { | { | ||||||
|     fn id(&self) -> String { |     fn id(&self) -> String { | ||||||
| @@ -67,24 +60,20 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | |||||||
|     } |     } | ||||||
|     fn start_iteration( |     fn start_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _logger: &mut dyn SearchLogger<QueryGraph>, |         _logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|         query_graph: &QueryGraph, |         query_graph: &QueryGraph, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         // TODO: update old state instead of starting from scratch |         // TODO: update old state instead of starting from scratch | ||||||
|         let mut graph = RankingRuleGraph::build(index, txn, db_cache, query_graph.clone())?; |         let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; | ||||||
|         let mut edge_docids_cache = EdgeDocidsCache::default(); |         let mut edge_docids_cache = EdgeDocidsCache::default(); | ||||||
|         let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len()); |         let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len()); | ||||||
|  |  | ||||||
|         remove_empty_edges( |         remove_empty_edges( | ||||||
|  |             ctx, | ||||||
|             &mut graph, |             &mut graph, | ||||||
|             &mut edge_docids_cache, |             &mut edge_docids_cache, | ||||||
|             index, |  | ||||||
|             txn, |  | ||||||
|             db_cache, |  | ||||||
|             universe, |             universe, | ||||||
|             &mut empty_paths_cache, |             &mut empty_paths_cache, | ||||||
|         )?; |         )?; | ||||||
| @@ -105,20 +94,16 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | |||||||
|  |  | ||||||
|     fn next_bucket( |     fn next_bucket( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         logger: &mut dyn SearchLogger<QueryGraph>, |         logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|     ) -> Result<Option<RankingRuleOutput<QueryGraph>>> { |     ) -> Result<Option<RankingRuleOutput<QueryGraph>>> { | ||||||
|         assert!(universe.len() > 1); |         assert!(universe.len() > 1); | ||||||
|         let mut state = self.state.take().unwrap(); |         let mut state = self.state.take().unwrap(); | ||||||
|         remove_empty_edges( |         remove_empty_edges( | ||||||
|  |             ctx, | ||||||
|             &mut state.graph, |             &mut state.graph, | ||||||
|             &mut state.edge_docids_cache, |             &mut state.edge_docids_cache, | ||||||
|             index, |  | ||||||
|             txn, |  | ||||||
|             db_cache, |  | ||||||
|             universe, |             universe, | ||||||
|             &mut state.empty_paths_cache, |             &mut state.empty_paths_cache, | ||||||
|         )?; |         )?; | ||||||
| @@ -151,9 +136,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | |||||||
|         ); |         ); | ||||||
|  |  | ||||||
|         let bucket = state.graph.resolve_paths( |         let bucket = state.graph.resolve_paths( | ||||||
|             index, |             ctx, | ||||||
|             txn, |  | ||||||
|             db_cache, |  | ||||||
|             &mut state.edge_docids_cache, |             &mut state.edge_docids_cache, | ||||||
|             &mut state.empty_paths_cache, |             &mut state.empty_paths_cache, | ||||||
|             universe, |             universe, | ||||||
| @@ -169,9 +152,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | |||||||
|  |  | ||||||
|     fn end_iteration( |     fn end_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         _index: &Index, |         _ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _logger: &mut dyn SearchLogger<QueryGraph>, |         _logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|     ) { |     ) { | ||||||
|         self.state = None; |         self.state = None; | ||||||
|   | |||||||
							
								
								
									
										78
									
								
								milli/src/search/new/interner.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								milli/src/search/new/interner.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | |||||||
|  | use fxhash::FxHashMap; | ||||||
|  | use std::hash::Hash; | ||||||
|  | use std::marker::PhantomData; | ||||||
|  |  | ||||||
|  | pub struct Interned<T> { | ||||||
|  |     idx: u32, | ||||||
|  |     _phantom: PhantomData<T>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<T> Interned<T> { | ||||||
|  |     fn new(idx: u32) -> Self { | ||||||
|  |         Self { idx, _phantom: PhantomData } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct Interner<T> { | ||||||
|  |     stable_store: Vec<T>, | ||||||
|  |     lookup: FxHashMap<T, Interned<T>>, | ||||||
|  | } | ||||||
|  | impl<T> Default for Interner<T> { | ||||||
|  |     fn default() -> Self { | ||||||
|  |         Self { stable_store: Default::default(), lookup: Default::default() } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<T> Interner<T> | ||||||
|  | where | ||||||
|  |     T: Clone + Eq + Hash, | ||||||
|  | { | ||||||
|  |     pub fn insert(&mut self, s: T) -> Interned<T> { | ||||||
|  |         if let Some(interned) = self.lookup.get(&s) { | ||||||
|  |             *interned | ||||||
|  |         } else { | ||||||
|  |             self.stable_store.push(s.clone()); | ||||||
|  |             let interned = Interned::new(self.stable_store.len() as u32 - 1); | ||||||
|  |             self.lookup.insert(s, interned); | ||||||
|  |             interned | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     pub fn get(&self, interned: Interned<T>) -> &T { | ||||||
|  |         &self.stable_store[interned.idx as usize] | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Interned<T> boilerplate implementations | ||||||
|  |  | ||||||
|  | impl<T> Hash for Interned<T> { | ||||||
|  |     fn hash<H: std::hash::Hasher>(&self, state: &mut H) { | ||||||
|  |         self.idx.hash(state); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<T: Ord> Ord for Interned<T> { | ||||||
|  |     fn cmp(&self, other: &Self) -> std::cmp::Ordering { | ||||||
|  |         self.idx.cmp(&other.idx) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<T> PartialOrd for Interned<T> { | ||||||
|  |     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { | ||||||
|  |         self.idx.partial_cmp(&other.idx) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<T> Eq for Interned<T> {} | ||||||
|  |  | ||||||
|  | impl<T> PartialEq for Interned<T> { | ||||||
|  |     fn eq(&self, other: &Self) -> bool { | ||||||
|  |         self.idx == other.idx | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<T> Clone for Interned<T> { | ||||||
|  |     fn clone(&self) -> Self { | ||||||
|  |         Self { idx: self.idx, _phantom: PhantomData } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<T> Copy for Interned<T> {} | ||||||
| @@ -6,7 +6,7 @@ use std::time::Instant; | |||||||
| use std::{io::Write, path::PathBuf}; | use std::{io::Write, path::PathBuf}; | ||||||
|  |  | ||||||
| use crate::new::ranking_rule_graph::TypoGraph; | use crate::new::ranking_rule_graph::TypoGraph; | ||||||
| use crate::new::{QueryNode, QueryGraph}; | use crate::new::{QueryNode, QueryGraph, SearchContext}; | ||||||
| use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; | use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; | ||||||
| use crate::new::ranking_rule_graph::EmptyPathsCache; | use crate::new::ranking_rule_graph::EmptyPathsCache; | ||||||
| use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait}; | use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait}; | ||||||
| @@ -176,7 +176,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl DetailedSearchLogger { | impl DetailedSearchLogger { | ||||||
|     pub fn write_d2_description(&self) { |     pub fn write_d2_description(&self,ctx: &mut SearchContext,) { | ||||||
|         let mut prev_time = self.initial_query_time.unwrap(); |         let mut prev_time = self.initial_query_time.unwrap(); | ||||||
|         let mut timestamp = vec![]; |         let mut timestamp = vec![]; | ||||||
|         fn activated_id(timestamp: &[usize]) -> String { |         fn activated_id(timestamp: &[usize]) -> String { | ||||||
| @@ -193,12 +193,12 @@ impl DetailedSearchLogger { | |||||||
|         writeln!(&mut file, "direction: right").unwrap(); |         writeln!(&mut file, "direction: right").unwrap(); | ||||||
|         writeln!(&mut file, "Initial Query Graph: {{").unwrap(); |         writeln!(&mut file, "Initial Query Graph: {{").unwrap(); | ||||||
|         let initial_query_graph = self.initial_query.as_ref().unwrap(); |         let initial_query_graph = self.initial_query.as_ref().unwrap(); | ||||||
|         Self::query_graph_d2_description(initial_query_graph, &mut file); |         Self::query_graph_d2_description(ctx, initial_query_graph, &mut file); | ||||||
|         writeln!(&mut file, "}}").unwrap(); |         writeln!(&mut file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap(); |         writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap(); | ||||||
|         let query_graph_for_universe = self.query_for_universe.as_ref().unwrap(); |         let query_graph_for_universe = self.query_for_universe.as_ref().unwrap(); | ||||||
|         Self::query_graph_d2_description(query_graph_for_universe, &mut file); |         Self::query_graph_d2_description(ctx, query_graph_for_universe, &mut file); | ||||||
|         writeln!(&mut file, "}}").unwrap(); |         writeln!(&mut file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         let initial_universe = self.initial_universe.as_ref().unwrap(); |         let initial_universe = self.initial_universe.as_ref().unwrap(); | ||||||
| @@ -308,7 +308,7 @@ results.{random} {{ | |||||||
|                     let id = format!("{cur_ranking_rule}.{cur_activated_id}"); |                     let id = format!("{cur_ranking_rule}.{cur_activated_id}"); | ||||||
|                     let new_file_path = self.folder_path.join(format!("{id}.d2")); |                     let new_file_path = self.folder_path.join(format!("{id}.d2")); | ||||||
|                     let mut new_file = std::fs::File::create(new_file_path).unwrap(); |                     let mut new_file = std::fs::File::create(new_file_path).unwrap(); | ||||||
|                     Self::query_graph_d2_description(query_graph, &mut new_file); |                     Self::query_graph_d2_description(ctx, query_graph, &mut new_file); | ||||||
|                     writeln!( |                     writeln!( | ||||||
|                         &mut file, |                         &mut file, | ||||||
|                         "{id} {{ |                         "{id} {{ | ||||||
| @@ -323,7 +323,7 @@ results.{random} {{ | |||||||
|                     let id = format!("{cur_ranking_rule}.{cur_activated_id}"); |                     let id = format!("{cur_ranking_rule}.{cur_activated_id}"); | ||||||
|                     let new_file_path = self.folder_path.join(format!("{id}.d2")); |                     let new_file_path = self.folder_path.join(format!("{id}.d2")); | ||||||
|                     let mut new_file = std::fs::File::create(new_file_path).unwrap(); |                     let mut new_file = std::fs::File::create(new_file_path).unwrap(); | ||||||
|                     Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, distances.clone(), &mut new_file); |                     Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file); | ||||||
|                     writeln!( |                     writeln!( | ||||||
|                         &mut file, |                         &mut file, | ||||||
|                         "{id} {{ |                         "{id} {{ | ||||||
| @@ -339,7 +339,7 @@ results.{random} {{ | |||||||
|                     let id = format!("{cur_ranking_rule}.{cur_activated_id}"); |                     let id = format!("{cur_ranking_rule}.{cur_activated_id}"); | ||||||
|                     let new_file_path = self.folder_path.join(format!("{id}.d2")); |                     let new_file_path = self.folder_path.join(format!("{id}.d2")); | ||||||
|                     let mut new_file = std::fs::File::create(new_file_path).unwrap(); |                     let mut new_file = std::fs::File::create(new_file_path).unwrap(); | ||||||
|                     Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, distances.clone(), &mut new_file); |                     Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file); | ||||||
|                     writeln!( |                     writeln!( | ||||||
|                         &mut file, |                         &mut file, | ||||||
|                         "{id} {{ |                         "{id} {{ | ||||||
| @@ -352,31 +352,40 @@ results.{random} {{ | |||||||
|         writeln!(&mut file, "}}").unwrap(); |         writeln!(&mut file, "}}").unwrap(); | ||||||
|     } |     } | ||||||
|      |      | ||||||
|     fn query_node_d2_desc(node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) { |     fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) { | ||||||
|         match &node { |         match &node { | ||||||
|             QueryNode::Term(LocatedQueryTerm { value, .. }) => { |             QueryNode::Term(LocatedQueryTerm { value, .. }) => { | ||||||
|                 match value { |                 match value { | ||||||
|                     QueryTerm::Phrase { phrase } => { |                     QueryTerm::Phrase { phrase } => { | ||||||
|                         let phrase_str = phrase.description(); |                         let phrase = ctx.phrase_interner.get(*phrase); | ||||||
|  |                         let phrase_str =  phrase.description(&ctx.word_interner); | ||||||
|                         writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap(); |                         writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap(); | ||||||
|                     }, |                     }, | ||||||
|                     QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => { |                     QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => { | ||||||
|  |                         let original = ctx.word_interner.get(*original); | ||||||
|                         writeln!(file,"{node_idx} : \"{original}\" {{ |                         writeln!(file,"{node_idx} : \"{original}\" {{ | ||||||
| shape: class").unwrap(); | shape: class").unwrap(); | ||||||
|                         for w in zero_typo { |                         for w in zero_typo.iter().copied() { | ||||||
|  |                             let w = ctx.word_interner.get(w); | ||||||
|                             writeln!(file, "\"{w}\" : 0").unwrap(); |                             writeln!(file, "\"{w}\" : 0").unwrap(); | ||||||
|                         } |                         } | ||||||
|                         for w in one_typo { |                         for w in one_typo.iter().copied() { | ||||||
|  |                             let w = ctx.word_interner.get(w); | ||||||
|                             writeln!(file, "\"{w}\" : 1").unwrap(); |                             writeln!(file, "\"{w}\" : 1").unwrap(); | ||||||
|                         } |                         } | ||||||
|                         for w in two_typos { |                         for w in two_typos.iter().copied() { | ||||||
|  |                             let w = ctx.word_interner.get(w); | ||||||
|                             writeln!(file, "\"{w}\" : 2").unwrap(); |                             writeln!(file, "\"{w}\" : 2").unwrap(); | ||||||
|                         } |                         } | ||||||
|                         if let Some((left, right)) = split_words { |                         if let Some(split_words) = split_words { | ||||||
|                             writeln!(file, "\"{left} {right}\" : split_words").unwrap(); |                             let phrase = ctx.phrase_interner.get(*split_words); | ||||||
|  |                             let phrase_str =  phrase.description(&ctx.word_interner); | ||||||
|  |                             writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); | ||||||
|                         } |                         } | ||||||
|                         for synonym in synonyms { |                         for synonym in synonyms.iter().copied() { | ||||||
|                             writeln!(file, "\"{}\" : synonym", synonym.description()).unwrap(); |                             let phrase = ctx.phrase_interner.get(synonym); | ||||||
|  |                             let phrase_str =  phrase.description(&ctx.word_interner); | ||||||
|  |                             writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); | ||||||
|                         } |                         } | ||||||
|                         if *use_prefix_db { |                         if *use_prefix_db { | ||||||
|                             writeln!(file, "use prefix DB : true").unwrap(); |                             writeln!(file, "use prefix DB : true").unwrap(); | ||||||
| @@ -398,20 +407,20 @@ shape: class").unwrap(); | |||||||
|             }, |             }, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) { |     fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) { | ||||||
|         writeln!(file,"direction: right").unwrap(); |         writeln!(file,"direction: right").unwrap(); | ||||||
|         for node in 0..query_graph.nodes.len() { |         for node in 0..query_graph.nodes.len() { | ||||||
|             if matches!(query_graph.nodes[node], QueryNode::Deleted) { |             if matches!(query_graph.nodes[node], QueryNode::Deleted) { | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             Self::query_node_d2_desc(node, &query_graph.nodes[node], &[], file); |             Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file); | ||||||
|              |              | ||||||
|             for edge in query_graph.edges[node].successors.iter() { |             for edge in query_graph.edges[node].successors.iter() { | ||||||
|                 writeln!(file, "{node} -> {edge};\n").unwrap(); |                 writeln!(file, "{node} -> {edge};\n").unwrap(); | ||||||
|             } |             } | ||||||
|         }         |         }         | ||||||
|     } |     } | ||||||
|     fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u64>>, file: &mut File) { |     fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u64>>, file: &mut File) { | ||||||
|         writeln!(file,"direction: right").unwrap(); |         writeln!(file,"direction: right").unwrap(); | ||||||
|  |  | ||||||
|         writeln!(file, "Proximity Graph {{").unwrap(); |         writeln!(file, "Proximity Graph {{").unwrap(); | ||||||
| @@ -420,7 +429,7 @@ shape: class").unwrap(); | |||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             let distances = &distances[node_idx]; |             let distances = &distances[node_idx]; | ||||||
|             Self::query_node_d2_desc(node_idx, node, distances.as_slice(), file); |             Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file); | ||||||
|         } |         } | ||||||
|         for edge in graph.all_edges.iter().flatten() { |         for edge in graph.all_edges.iter().flatten() { | ||||||
|             let Edge { from_node, to_node, details, .. } = edge; |             let Edge { from_node, to_node, details, .. } = edge; | ||||||
| @@ -449,7 +458,7 @@ shape: class").unwrap(); | |||||||
|  |  | ||||||
|          |          | ||||||
|         writeln!(file, "Shortest Paths {{").unwrap(); |         writeln!(file, "Shortest Paths {{").unwrap(); | ||||||
|         Self::paths_d2_description(graph, paths, file); |         Self::paths_d2_description(ctx, graph, paths, file); | ||||||
|         writeln!(file, "}}").unwrap(); |         writeln!(file, "}}").unwrap(); | ||||||
|  |  | ||||||
|         // writeln!(file, "Empty Edge Couples {{").unwrap();             |         // writeln!(file, "Empty Edge Couples {{").unwrap();             | ||||||
| @@ -468,15 +477,18 @@ shape: class").unwrap(); | |||||||
|         // } |         // } | ||||||
|         // writeln!(file, "}}").unwrap(); |         // writeln!(file, "}}").unwrap(); | ||||||
|     } |     } | ||||||
|     fn edge_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, edge_idx: u32, file: &mut File) { |     fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext,graph: &RankingRuleGraph<R>, edge_idx: u32, file: &mut File) { | ||||||
|         let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ; |         let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ; | ||||||
|         let from_node = &graph.query_graph.nodes[*from_node as usize]; |         let from_node = &graph.query_graph.nodes[*from_node as usize]; | ||||||
|         let from_node_desc = match from_node { |         let from_node_desc = match from_node { | ||||||
|             QueryNode::Term(term) => match &term.value { |             QueryNode::Term(term) => match &term.value { | ||||||
|                 QueryTerm::Phrase { phrase } => { |                 QueryTerm::Phrase { phrase } => { | ||||||
|                     phrase.description() |                     let phrase = ctx.phrase_interner.get(*phrase); | ||||||
|  |                     phrase.description(&ctx.word_interner) | ||||||
|  |                 }, | ||||||
|  |                 QueryTerm::Word { derivations } => { | ||||||
|  |                     ctx.word_interner.get(derivations.original).to_owned() | ||||||
|                 }, |                 }, | ||||||
|                 QueryTerm::Word { derivations } => derivations.original.clone(), |  | ||||||
|             }, |             }, | ||||||
|             QueryNode::Deleted => panic!(), |             QueryNode::Deleted => panic!(), | ||||||
|             QueryNode::Start => "START".to_owned(), |             QueryNode::Start => "START".to_owned(), | ||||||
| @@ -485,8 +497,11 @@ shape: class").unwrap(); | |||||||
|         let to_node = &graph.query_graph.nodes[*to_node as usize]; |         let to_node = &graph.query_graph.nodes[*to_node as usize]; | ||||||
|         let to_node_desc = match to_node { |         let to_node_desc = match to_node { | ||||||
|             QueryNode::Term(term) => match &term.value { |             QueryNode::Term(term) => match &term.value { | ||||||
|                 QueryTerm::Phrase { phrase } => phrase.description(), |                 QueryTerm::Phrase { phrase } => { | ||||||
|                 QueryTerm::Word { derivations } => derivations.original.clone(), |                     let phrase = ctx.phrase_interner.get(*phrase); | ||||||
|  |                     phrase.description(&ctx.word_interner) | ||||||
|  |                 }, | ||||||
|  |                 QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(), | ||||||
|             }, |             }, | ||||||
|             QueryNode::Deleted => panic!(), |             QueryNode::Deleted => panic!(), | ||||||
|             QueryNode::Start => "START".to_owned(), |             QueryNode::Start => "START".to_owned(), | ||||||
| @@ -496,11 +511,11 @@ shape: class").unwrap(); | |||||||
|             shape: class |             shape: class | ||||||
|         }}").unwrap(); |         }}").unwrap(); | ||||||
|     } |     } | ||||||
|     fn paths_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], file: &mut File) {  |     fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], file: &mut File) {  | ||||||
|         for (path_idx, edge_indexes) in paths.iter().enumerate() { |         for (path_idx, edge_indexes) in paths.iter().enumerate() { | ||||||
|             writeln!(file, "{path_idx} {{").unwrap(); |             writeln!(file, "{path_idx} {{").unwrap(); | ||||||
|             for edge_idx in edge_indexes.iter() { |             for edge_idx in edge_indexes.iter() { | ||||||
|                 Self::edge_d2_description(graph, *edge_idx, file); |                 Self::edge_d2_description(ctx, graph, *edge_idx, file); | ||||||
|             } |             } | ||||||
|             for couple_edges in edge_indexes.windows(2) { |             for couple_edges in edge_indexes.windows(2) { | ||||||
|                 let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() }; |                 let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() }; | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| mod db_cache; | mod db_cache; | ||||||
| mod graph_based_ranking_rule; | mod graph_based_ranking_rule; | ||||||
|  | mod interner; | ||||||
| mod logger; | mod logger; | ||||||
| mod query_graph; | mod query_graph; | ||||||
| mod query_term; | mod query_term; | ||||||
| @@ -26,7 +27,9 @@ use query_graph::{QueryGraph, QueryNode}; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use self::{ | use self::{ | ||||||
|  |     interner::Interner, | ||||||
|     logger::SearchLogger, |     logger::SearchLogger, | ||||||
|  |     query_term::Phrase, | ||||||
|     resolve_query_graph::{resolve_query_graph, NodeDocIdsCache}, |     resolve_query_graph::{resolve_query_graph, NodeDocIdsCache}, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -35,14 +38,32 @@ pub enum BitmapOrAllRef<'s> { | |||||||
|     All, |     All, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub struct SearchContext<'search> { | ||||||
|  |     pub index: &'search Index, | ||||||
|  |     pub txn: &'search RoTxn<'search>, | ||||||
|  |     pub db_cache: DatabaseCache<'search>, | ||||||
|  |     pub word_interner: Interner<String>, | ||||||
|  |     pub phrase_interner: Interner<Phrase>, | ||||||
|  |     pub node_docids_cache: NodeDocIdsCache, | ||||||
|  | } | ||||||
|  | impl<'search> SearchContext<'search> { | ||||||
|  |     pub fn new(index: &'search Index, txn: &'search RoTxn<'search>) -> Self { | ||||||
|  |         Self { | ||||||
|  |             index, | ||||||
|  |             txn, | ||||||
|  |             db_cache: <_>::default(), | ||||||
|  |             word_interner: <_>::default(), | ||||||
|  |             phrase_interner: <_>::default(), | ||||||
|  |             node_docids_cache: <_>::default(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| #[allow(clippy::too_many_arguments)] | #[allow(clippy::too_many_arguments)] | ||||||
| pub fn resolve_maximally_reduced_query_graph<'transaction>( | pub fn resolve_maximally_reduced_query_graph<'search>( | ||||||
|     index: &Index, |     ctx: &mut SearchContext<'search>, | ||||||
|     txn: &'transaction heed::RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
|     query_graph: &QueryGraph, |     query_graph: &QueryGraph, | ||||||
|     node_docids_cache: &mut NodeDocIdsCache, |  | ||||||
|     matching_strategy: TermsMatchingStrategy, |     matching_strategy: TermsMatchingStrategy, | ||||||
|     logger: &mut dyn SearchLogger<QueryGraph>, |     logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
| ) -> Result<RoaringBitmap> { | ) -> Result<RoaringBitmap> { | ||||||
| @@ -73,16 +94,14 @@ pub fn resolve_maximally_reduced_query_graph<'transaction>( | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     logger.query_for_universe(&graph); |     logger.query_for_universe(&graph); | ||||||
|     let docids = resolve_query_graph(index, txn, db_cache, node_docids_cache, &graph, universe)?; |     let docids = resolve_query_graph(ctx, &graph, universe)?; | ||||||
|  |  | ||||||
|     Ok(docids) |     Ok(docids) | ||||||
| } | } | ||||||
|  |  | ||||||
| #[allow(clippy::too_many_arguments)] | #[allow(clippy::too_many_arguments)] | ||||||
| pub fn execute_search<'transaction>( | pub fn execute_search<'search>( | ||||||
|     index: &Index, |     ctx: &mut SearchContext<'search>, | ||||||
|     txn: &'transaction RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     query: &str, |     query: &str, | ||||||
|     filters: Option<Filter>, |     filters: Option<Filter>, | ||||||
|     from: usize, |     from: usize, | ||||||
| @@ -90,26 +109,21 @@ pub fn execute_search<'transaction>( | |||||||
|     logger: &mut dyn SearchLogger<QueryGraph>, |     logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
| ) -> Result<Vec<u32>> { | ) -> Result<Vec<u32>> { | ||||||
|     assert!(!query.is_empty()); |     assert!(!query.is_empty()); | ||||||
|     let query_terms = located_query_terms_from_string(index, txn, query.tokenize(), None).unwrap(); |     let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None).unwrap(); | ||||||
|     let graph = QueryGraph::from_query(index, txn, db_cache, query_terms)?; |     let graph = QueryGraph::from_query(ctx, query_terms)?; | ||||||
|  |  | ||||||
|     logger.initial_query(&graph); |     logger.initial_query(&graph); | ||||||
|  |  | ||||||
|     let universe = if let Some(filters) = filters { |     let universe = if let Some(filters) = filters { | ||||||
|         filters.evaluate(txn, index)? |         filters.evaluate(ctx.txn, ctx.index)? | ||||||
|     } else { |     } else { | ||||||
|         index.documents_ids(txn)? |         ctx.index.documents_ids(ctx.txn)? | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let mut node_docids_cache = NodeDocIdsCache::default(); |  | ||||||
|  |  | ||||||
|     let universe = resolve_maximally_reduced_query_graph( |     let universe = resolve_maximally_reduced_query_graph( | ||||||
|         index, |         ctx, | ||||||
|         txn, |  | ||||||
|         db_cache, |  | ||||||
|         &universe, |         &universe, | ||||||
|         &graph, |         &graph, | ||||||
|         &mut node_docids_cache, |  | ||||||
|         TermsMatchingStrategy::Last, |         TermsMatchingStrategy::Last, | ||||||
|         logger, |         logger, | ||||||
|     )?; |     )?; | ||||||
| @@ -117,5 +131,5 @@ pub fn execute_search<'transaction>( | |||||||
|  |  | ||||||
|     logger.initial_universe(&universe); |     logger.initial_universe(&universe); | ||||||
|  |  | ||||||
|     apply_ranking_rules(index, txn, db_cache, &graph, &universe, from, length, logger) |     apply_ranking_rules(ctx, &graph, &universe, from, length, logger) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,13 +1,10 @@ | |||||||
|  | use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations}; | ||||||
|  | use super::SearchContext; | ||||||
|  | use crate::Result; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
| use std::fmt::Debug; | use std::fmt::Debug; | ||||||
|  |  | ||||||
| use heed::RoTxn; | #[derive(Clone)] | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::db_cache::DatabaseCache; |  | ||||||
| use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations}; |  | ||||||
| use crate::{Index, Result}; |  | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] |  | ||||||
| pub enum QueryNode { | pub enum QueryNode { | ||||||
|     Term(LocatedQueryTerm), |     Term(LocatedQueryTerm), | ||||||
|     Deleted, |     Deleted, | ||||||
| @@ -22,7 +19,7 @@ pub struct Edges { | |||||||
|     pub successors: RoaringBitmap, |     pub successors: RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Clone)] | ||||||
| pub struct QueryGraph { | pub struct QueryGraph { | ||||||
|     pub root_node: u32, |     pub root_node: u32, | ||||||
|     pub end_node: u32, |     pub end_node: u32, | ||||||
| @@ -31,8 +28,8 @@ pub struct QueryGraph { | |||||||
| } | } | ||||||
|  |  | ||||||
| fn _assert_sizes() { | fn _assert_sizes() { | ||||||
|     // TODO: QueryNodes are too big now, 184B is an unreasonable size |     // TODO: QueryNodes are too big now, 88B is a bit too big | ||||||
|     let _: [u8; 184] = [0; std::mem::size_of::<QueryNode>()]; |     let _: [u8; 88] = [0; std::mem::size_of::<QueryNode>()]; | ||||||
|     let _: [u8; 48] = [0; std::mem::size_of::<Edges>()]; |     let _: [u8; 48] = [0; std::mem::size_of::<Edges>()]; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -72,19 +69,14 @@ impl QueryGraph { | |||||||
|  |  | ||||||
| impl QueryGraph { | impl QueryGraph { | ||||||
|     // TODO: return the list of all matching words here as well |     // TODO: return the list of all matching words here as well | ||||||
|     pub fn from_query<'transaction>( |     pub fn from_query(ctx: &mut SearchContext, terms: Vec<LocatedQueryTerm>) -> Result<QueryGraph> { | ||||||
|         index: &Index, |  | ||||||
|         txn: &RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         terms: Vec<LocatedQueryTerm>, |  | ||||||
|     ) -> Result<QueryGraph> { |  | ||||||
|         // TODO: maybe empty nodes should not be removed here, to compute |         // TODO: maybe empty nodes should not be removed here, to compute | ||||||
|         // the score of the `words` ranking rule correctly |         // the score of the `words` ranking rule correctly | ||||||
|         // it is very easy to traverse the graph and remove afterwards anyway |         // it is very easy to traverse the graph and remove afterwards anyway | ||||||
|         // Still, I'm keeping this here as a demo |         // Still, I'm keeping this here as a demo | ||||||
|         let mut empty_nodes = vec![]; |         let mut empty_nodes = vec![]; | ||||||
|  |  | ||||||
|         let word_set = index.words_fst(txn)?; |         let word_set = ctx.index.words_fst(ctx.txn)?; | ||||||
|         let mut graph = QueryGraph::default(); |         let mut graph = QueryGraph::default(); | ||||||
|  |  | ||||||
|         let (mut prev2, mut prev1, mut prev0): (Vec<u32>, Vec<u32>, Vec<u32>) = |         let (mut prev2, mut prev1, mut prev0): (Vec<u32>, Vec<u32>, Vec<u32>) = | ||||||
| @@ -105,20 +97,20 @@ impl QueryGraph { | |||||||
|  |  | ||||||
|             if !prev1.is_empty() { |             if !prev1.is_empty() { | ||||||
|                 if let Some((ngram2_str, ngram2_pos)) = |                 if let Some((ngram2_str, ngram2_pos)) = | ||||||
|                     query_term::ngram2(&query[length - 2], &query[length - 1]) |                     query_term::ngram2(ctx, &query[length - 2], &query[length - 1]) | ||||||
|                 { |                 { | ||||||
|                     if word_set.contains(ngram2_str.as_bytes()) { |                     if word_set.contains(ctx.word_interner.get(ngram2_str)) { | ||||||
|                         let ngram2 = LocatedQueryTerm { |                         let ngram2 = LocatedQueryTerm { | ||||||
|                             value: QueryTerm::Word { |                             value: QueryTerm::Word { | ||||||
|                                 derivations: WordDerivations { |                                 derivations: WordDerivations { | ||||||
|                                     original: ngram2_str.clone(), |                                     original: ngram2_str, | ||||||
|                                     // TODO: could add a typo if it's an ngram? |                                     // TODO: could add a typo if it's an ngram? | ||||||
|                                     zero_typo: vec![ngram2_str], |                                     zero_typo: Box::new([ngram2_str]), | ||||||
|                                     one_typo: vec![], |                                     one_typo: Box::new([]), | ||||||
|                                     two_typos: vec![], |                                     two_typos: Box::new([]), | ||||||
|                                     use_prefix_db: false, |                                     use_prefix_db: false, | ||||||
|                                     synonyms: vec![],  // TODO: ngram synonyms |                                     synonyms: Box::new([]), // TODO: ngram synonyms | ||||||
|                                     split_words: None, // TODO: maybe ngram split words? |                                     split_words: None,      // TODO: maybe ngram split words? | ||||||
|                                 }, |                                 }, | ||||||
|                             }, |                             }, | ||||||
|                             positions: ngram2_pos, |                             positions: ngram2_pos, | ||||||
| @@ -129,22 +121,25 @@ impl QueryGraph { | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             if !prev2.is_empty() { |             if !prev2.is_empty() { | ||||||
|                 if let Some((ngram3_str, ngram3_pos)) = |                 if let Some((ngram3_str, ngram3_pos)) = query_term::ngram3( | ||||||
|                     query_term::ngram3(&query[length - 3], &query[length - 2], &query[length - 1]) |                     ctx, | ||||||
|                 { |                     &query[length - 3], | ||||||
|                     if word_set.contains(ngram3_str.as_bytes()) { |                     &query[length - 2], | ||||||
|  |                     &query[length - 1], | ||||||
|  |                 ) { | ||||||
|  |                     if word_set.contains(ctx.word_interner.get(ngram3_str)) { | ||||||
|                         let ngram3 = LocatedQueryTerm { |                         let ngram3 = LocatedQueryTerm { | ||||||
|                             value: QueryTerm::Word { |                             value: QueryTerm::Word { | ||||||
|                                 derivations: WordDerivations { |                                 derivations: WordDerivations { | ||||||
|                                     original: ngram3_str.clone(), |                                     original: ngram3_str, | ||||||
|                                     // TODO: could add a typo if it's an ngram? |                                     // TODO: could add a typo if it's an ngram? | ||||||
|                                     zero_typo: vec![ngram3_str], |                                     zero_typo: Box::new([ngram3_str]), | ||||||
|                                     one_typo: vec![], |                                     one_typo: Box::new([]), | ||||||
|                                     two_typos: vec![], |                                     two_typos: Box::new([]), | ||||||
|                                     use_prefix_db: false, |                                     use_prefix_db: false, | ||||||
|                                     synonyms: vec![], // TODO: ngram synonyms |                                     synonyms: Box::new([]), // TODO: ngram synonyms | ||||||
|                                     split_words: None, // TODO: maybe ngram split words? |                                     split_words: None,      // TODO: maybe ngram split words? | ||||||
|                                                       // would be nice for typos like su nflower |                                                             // would be nice for typos like su nflower | ||||||
|                                 }, |                                 }, | ||||||
|                             }, |                             }, | ||||||
|                             positions: ngram3_pos, |                             positions: ngram3_pos, | ||||||
|   | |||||||
| @@ -16,30 +16,35 @@ use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union}; | |||||||
| use crate::search::{build_dfa, get_first}; | use crate::search::{build_dfa, get_first}; | ||||||
| use crate::{CboRoaringBitmapLenCodec, Index, Result}; | use crate::{CboRoaringBitmapLenCodec, Index, Result}; | ||||||
|  |  | ||||||
| #[derive(Debug, Default, Clone)] | use super::interner::{Interned, Interner}; | ||||||
|  | use super::SearchContext; | ||||||
|  |  | ||||||
|  | #[derive(Default, Clone, PartialEq, Eq, Hash)] | ||||||
| pub struct Phrase { | pub struct Phrase { | ||||||
|     pub words: Vec<Option<String>>, |     pub words: Vec<Option<Interned<String>>>, | ||||||
| } | } | ||||||
| impl Phrase { | impl Phrase { | ||||||
|     pub fn description(&self) -> String { |     pub fn description(&self, interner: &Interner<String>) -> String { | ||||||
|         self.words.iter().flatten().join(" ") |         self.words.iter().flatten().map(|w| interner.get(*w)).join(" ") | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Clone)] | ||||||
| pub struct WordDerivations { | pub struct WordDerivations { | ||||||
|     pub original: String, |     pub original: Interned<String>, | ||||||
|     // TODO: pub prefix_of: Vec<String>, |     // TODO: pub prefix_of: Vec<String>, | ||||||
|     pub synonyms: Vec<Phrase>, |     pub synonyms: Box<[Interned<Phrase>]>, | ||||||
|     pub split_words: Option<(String, String)>, |     pub split_words: Option<Interned<Phrase>>, | ||||||
|     pub zero_typo: Vec<String>, |     pub zero_typo: Box<[Interned<String>]>, | ||||||
|     pub one_typo: Vec<String>, |     pub one_typo: Box<[Interned<String>]>, | ||||||
|     pub two_typos: Vec<String>, |     pub two_typos: Box<[Interned<String>]>, | ||||||
|     pub use_prefix_db: bool, |     pub use_prefix_db: bool, | ||||||
| } | } | ||||||
| impl WordDerivations { | impl WordDerivations { | ||||||
|     pub fn all_derivations_except_prefix_db(&self) -> impl Iterator<Item = &String> + Clone { |     pub fn all_derivations_except_prefix_db( | ||||||
|         self.zero_typo.iter().chain(self.one_typo.iter()).chain(self.two_typos.iter()) |         &'_ self, | ||||||
|  |     ) -> impl Iterator<Item = Interned<String>> + Clone + '_ { | ||||||
|  |         self.zero_typo.iter().chain(self.one_typo.iter()).chain(self.two_typos.iter()).copied() | ||||||
|     } |     } | ||||||
|     fn is_empty(&self) -> bool { |     fn is_empty(&self) -> bool { | ||||||
|         self.zero_typo.is_empty() |         self.zero_typo.is_empty() | ||||||
| @@ -50,15 +55,21 @@ impl WordDerivations { | |||||||
| } | } | ||||||
|  |  | ||||||
| pub fn word_derivations( | pub fn word_derivations( | ||||||
|     index: &Index, |     ctx: &mut SearchContext, | ||||||
|     txn: &RoTxn, |  | ||||||
|     word: &str, |     word: &str, | ||||||
|     max_typo: u8, |     max_typo: u8, | ||||||
|     is_prefix: bool, |     is_prefix: bool, | ||||||
|     fst: &fst::Set<Cow<[u8]>>, |     fst: &fst::Set<Cow<[u8]>>, | ||||||
| ) -> Result<WordDerivations> { | ) -> Result<WordDerivations> { | ||||||
|  |     let word_interned = ctx.word_interner.insert(word.to_owned()); | ||||||
|  |  | ||||||
|     let use_prefix_db = is_prefix |     let use_prefix_db = is_prefix | ||||||
|         && index.word_prefix_docids.remap_data_type::<DecodeIgnore>().get(txn, word)?.is_some(); |         && ctx | ||||||
|  |             .index | ||||||
|  |             .word_prefix_docids | ||||||
|  |             .remap_data_type::<DecodeIgnore>() | ||||||
|  |             .get(ctx.txn, word)? | ||||||
|  |             .is_some(); | ||||||
|  |  | ||||||
|     let mut zero_typo = vec![]; |     let mut zero_typo = vec![]; | ||||||
|     let mut one_typo = vec![]; |     let mut one_typo = vec![]; | ||||||
| @@ -70,11 +81,12 @@ pub fn word_derivations( | |||||||
|             let mut stream = fst.search(prefix).into_stream(); |             let mut stream = fst.search(prefix).into_stream(); | ||||||
|  |  | ||||||
|             while let Some(word) = stream.next() { |             while let Some(word) = stream.next() { | ||||||
|                 let word = std::str::from_utf8(word)?; |                 let word = std::str::from_utf8(word)?.to_owned(); | ||||||
|                 zero_typo.push(word.to_string()); |                 let word_interned = ctx.word_interner.insert(word); | ||||||
|  |                 zero_typo.push(word_interned); | ||||||
|             } |             } | ||||||
|         } else if fst.contains(word) { |         } else if fst.contains(word) { | ||||||
|             zero_typo.push(word.to_string()); |             zero_typo.push(word_interned); | ||||||
|         } |         } | ||||||
|     } else if max_typo == 1 { |     } else if max_typo == 1 { | ||||||
|         let dfa = build_dfa(word, 1, is_prefix); |         let dfa = build_dfa(word, 1, is_prefix); | ||||||
| @@ -83,13 +95,14 @@ pub fn word_derivations( | |||||||
|  |  | ||||||
|         while let Some((word, state)) = stream.next() { |         while let Some((word, state)) = stream.next() { | ||||||
|             let word = std::str::from_utf8(word)?; |             let word = std::str::from_utf8(word)?; | ||||||
|  |             let word_interned = ctx.word_interner.insert(word.to_owned()); | ||||||
|             let d = dfa.distance(state.1); |             let d = dfa.distance(state.1); | ||||||
|             match d.to_u8() { |             match d.to_u8() { | ||||||
|                 0 => { |                 0 => { | ||||||
|                     zero_typo.push(word.to_string()); |                     zero_typo.push(word_interned); | ||||||
|                 } |                 } | ||||||
|                 1 => { |                 1 => { | ||||||
|                     one_typo.push(word.to_string()); |                     one_typo.push(word_interned); | ||||||
|                 } |                 } | ||||||
|                 _ => panic!(), |                 _ => panic!(), | ||||||
|             } |             } | ||||||
| @@ -105,47 +118,56 @@ pub fn word_derivations( | |||||||
|  |  | ||||||
|         while let Some((found_word, state)) = stream.next() { |         while let Some((found_word, state)) = stream.next() { | ||||||
|             let found_word = std::str::from_utf8(found_word)?; |             let found_word = std::str::from_utf8(found_word)?; | ||||||
|  |             let found_word_interned = ctx.word_interner.insert(found_word.to_owned()); | ||||||
|             // in the case the typo is on the first letter, we know the number of typo |             // in the case the typo is on the first letter, we know the number of typo | ||||||
|             // is two |             // is two | ||||||
|             if get_first(found_word) != get_first(word) { |             if get_first(found_word) != get_first(word) { | ||||||
|                 two_typos.push(found_word.to_string()); |                 two_typos.push(found_word_interned); | ||||||
|             } else { |             } else { | ||||||
|                 // Else, we know that it is the second dfa that matched and compute the |                 // Else, we know that it is the second dfa that matched and compute the | ||||||
|                 // correct distance |                 // correct distance | ||||||
|                 let d = second_dfa.distance((state.1).0); |                 let d = second_dfa.distance((state.1).0); | ||||||
|                 match d.to_u8() { |                 match d.to_u8() { | ||||||
|                     0 => { |                     0 => { | ||||||
|                         zero_typo.push(found_word.to_string()); |                         zero_typo.push(found_word_interned); | ||||||
|                     } |                     } | ||||||
|                     1 => { |                     1 => { | ||||||
|                         one_typo.push(found_word.to_string()); |                         one_typo.push(found_word_interned); | ||||||
|                     } |                     } | ||||||
|                     2 => { |                     2 => { | ||||||
|                         two_typos.push(found_word.to_string()); |                         two_typos.push(found_word_interned); | ||||||
|                     } |                     } | ||||||
|                     _ => panic!(), |                     _ => panic!(), | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     let split_words = split_best_frequency(index, txn, word)?; |     let split_words = split_best_frequency(ctx.index, ctx.txn, word)?.map(|(l, r)| { | ||||||
|  |         ctx.phrase_interner.insert(Phrase { | ||||||
|  |             words: vec![Some(ctx.word_interner.insert(l)), Some(ctx.word_interner.insert(r))], | ||||||
|  |         }) | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     let synonyms = ctx.index.synonyms(ctx.txn)?; | ||||||
|  |  | ||||||
|     let synonyms = index.synonyms(txn)?; |  | ||||||
|     let synonyms = synonyms |     let synonyms = synonyms | ||||||
|         .get(&vec![word.to_owned()]) |         .get(&vec![word.to_owned()]) | ||||||
|         .cloned() |         .cloned() | ||||||
|         .unwrap_or_default() |         .unwrap_or_default() | ||||||
|         .into_iter() |         .into_iter() | ||||||
|         .map(|words| Phrase { words: words.into_iter().map(Some).collect() }) |         .map(|words| { | ||||||
|  |             let words = words.into_iter().map(|w| Some(ctx.word_interner.insert(w))).collect(); | ||||||
|  |             ctx.phrase_interner.insert(Phrase { words }) | ||||||
|  |         }) | ||||||
|         .collect(); |         .collect(); | ||||||
|  |  | ||||||
|     Ok(WordDerivations { |     Ok(WordDerivations { | ||||||
|         original: word.to_owned(), |         original: ctx.word_interner.insert(word.to_owned()), | ||||||
|         synonyms, |         synonyms, | ||||||
|         split_words, |         split_words, | ||||||
|         zero_typo, |         zero_typo: zero_typo.into_boxed_slice(), | ||||||
|         one_typo, |         one_typo: one_typo.into_boxed_slice(), | ||||||
|         two_typos, |         two_typos: two_typos.into_boxed_slice(), | ||||||
|         use_prefix_db, |         use_prefix_db, | ||||||
|     }) |     }) | ||||||
| } | } | ||||||
| @@ -176,33 +198,36 @@ fn split_best_frequency( | |||||||
|     Ok(best.map(|(_, left, right)| (left.to_owned(), right.to_owned()))) |     Ok(best.map(|(_, left, right)| (left.to_owned(), right.to_owned()))) | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Clone)] | ||||||
| pub enum QueryTerm { | pub enum QueryTerm { | ||||||
|     // TODO: should there be SplitWord, NGram2, and NGram3 variants? |     // TODO: should there be SplitWord, NGram2, and NGram3 variants? | ||||||
|     // NGram2 can have 1 typo and synonyms |     // NGram2 can have 1 typo and synonyms | ||||||
|     // NGram3 cannot have typos but can have synonyms |     // NGram3 cannot have typos but can have synonyms | ||||||
|     // SplitWords are a phrase |     // SplitWords are a phrase | ||||||
|     // Can NGrams be prefixes? |     // Can NGrams be prefixes? | ||||||
|     Phrase { phrase: Phrase }, |     Phrase { phrase: Interned<Phrase> }, | ||||||
|     Word { derivations: WordDerivations }, |     Word { derivations: WordDerivations }, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl QueryTerm { | impl QueryTerm { | ||||||
|     pub fn original_single_word(&self) -> Option<&str> { |     pub fn original_single_word<'interner>( | ||||||
|  |         &self, | ||||||
|  |         word_interner: &'interner Interner<String>, | ||||||
|  |     ) -> Option<&'interner str> { | ||||||
|         match self { |         match self { | ||||||
|             QueryTerm::Phrase { phrase: _ } => None, |             QueryTerm::Phrase { phrase: _ } => None, | ||||||
|             QueryTerm::Word { derivations } => { |             QueryTerm::Word { derivations } => { | ||||||
|                 if derivations.is_empty() { |                 if derivations.is_empty() { | ||||||
|                     None |                     None | ||||||
|                 } else { |                 } else { | ||||||
|                     Some(derivations.original.as_str()) |                     Some(word_interner.get(derivations.original)) | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Clone)] | ||||||
| pub struct LocatedQueryTerm { | pub struct LocatedQueryTerm { | ||||||
|     pub value: QueryTerm, |     pub value: QueryTerm, | ||||||
|     pub positions: RangeInclusive<i8>, |     pub positions: RangeInclusive<i8>, | ||||||
| @@ -217,18 +242,17 @@ impl LocatedQueryTerm { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn located_query_terms_from_string<'transaction>( | pub fn located_query_terms_from_string<'search>( | ||||||
|     index: &Index, |     ctx: &mut SearchContext<'search>, | ||||||
|     txn: &'transaction RoTxn, |  | ||||||
|     query: NormalizedTokenIter<Vec<u8>>, |     query: NormalizedTokenIter<Vec<u8>>, | ||||||
|     words_limit: Option<usize>, |     words_limit: Option<usize>, | ||||||
| ) -> Result<Vec<LocatedQueryTerm>> { | ) -> Result<Vec<LocatedQueryTerm>> { | ||||||
|     let authorize_typos = index.authorize_typos(txn)?; |     let authorize_typos = ctx.index.authorize_typos(ctx.txn)?; | ||||||
|     let min_len_one_typo = index.min_word_len_one_typo(txn)?; |     let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?; | ||||||
|     let min_len_two_typos = index.min_word_len_two_typos(txn)?; |     let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?; | ||||||
|  |  | ||||||
|     let exact_words = index.exact_words(txn)?; |     let exact_words = ctx.index.exact_words(ctx.txn)?; | ||||||
|     let fst = index.words_fst(txn)?; |     let fst = ctx.index.words_fst(ctx.txn)?; | ||||||
|  |  | ||||||
|     let nbr_typos = |word: &str| { |     let nbr_typos = |word: &str| { | ||||||
|         if !authorize_typos |         if !authorize_typos | ||||||
| @@ -243,10 +267,6 @@ pub fn located_query_terms_from_string<'transaction>( | |||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let derivations = |word: &str, is_prefix: bool| { |  | ||||||
|         word_derivations(index, txn, word, nbr_typos(word), is_prefix, &fst) |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     let mut primitive_query = Vec::new(); |     let mut primitive_query = Vec::new(); | ||||||
|     let mut phrase = Vec::new(); |     let mut phrase = Vec::new(); | ||||||
|  |  | ||||||
| @@ -279,14 +299,17 @@ pub fn located_query_terms_from_string<'transaction>( | |||||||
|                     if let TokenKind::StopWord = token.kind { |                     if let TokenKind::StopWord = token.kind { | ||||||
|                         phrase.push(None); |                         phrase.push(None); | ||||||
|                     } else { |                     } else { | ||||||
|  |                         let word = ctx.word_interner.insert(token.lemma().to_string()); | ||||||
|                         // TODO: in a phrase, check that every word exists |                         // TODO: in a phrase, check that every word exists | ||||||
|                         // otherwise return WordDerivations::Empty |                         // otherwise return WordDerivations::Empty | ||||||
|                         phrase.push(Some(token.lemma().to_string())); |                         phrase.push(Some(word)); | ||||||
|                     } |                     } | ||||||
|                 } else if peekable.peek().is_some() { |                 } else if peekable.peek().is_some() { | ||||||
|                     match token.kind { |                     match token.kind { | ||||||
|                         TokenKind::Word => { |                         TokenKind::Word => { | ||||||
|                             let derivations = derivations(token.lemma(), false)?; |                             let word = token.lemma(); | ||||||
|  |                             let derivations = | ||||||
|  |                                 word_derivations(ctx, word, nbr_typos(word), false, &fst)?; | ||||||
|                             let located_term = LocatedQueryTerm { |                             let located_term = LocatedQueryTerm { | ||||||
|                                 value: QueryTerm::Word { derivations }, |                                 value: QueryTerm::Word { derivations }, | ||||||
|                                 positions: position..=position, |                                 positions: position..=position, | ||||||
| @@ -296,7 +319,8 @@ pub fn located_query_terms_from_string<'transaction>( | |||||||
|                         TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {} |                         TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {} | ||||||
|                     } |                     } | ||||||
|                 } else { |                 } else { | ||||||
|                     let derivations = derivations(token.lemma(), true)?; |                     let word = token.lemma(); | ||||||
|  |                     let derivations = word_derivations(ctx, word, nbr_typos(word), true, &fst)?; | ||||||
|                     let located_term = LocatedQueryTerm { |                     let located_term = LocatedQueryTerm { | ||||||
|                         value: QueryTerm::Word { derivations }, |                         value: QueryTerm::Word { derivations }, | ||||||
|                         positions: position..=position, |                         positions: position..=position, | ||||||
| @@ -323,7 +347,9 @@ pub fn located_query_terms_from_string<'transaction>( | |||||||
|                 { |                 { | ||||||
|                     let located_query_term = LocatedQueryTerm { |                     let located_query_term = LocatedQueryTerm { | ||||||
|                         value: QueryTerm::Phrase { |                         value: QueryTerm::Phrase { | ||||||
|                             phrase: Phrase { words: mem::take(&mut phrase) }, |                             phrase: ctx | ||||||
|  |                                 .phrase_interner | ||||||
|  |                                 .insert(Phrase { words: mem::take(&mut phrase) }), | ||||||
|                         }, |                         }, | ||||||
|                         positions: phrase_start..=phrase_end, |                         positions: phrase_start..=phrase_end, | ||||||
|                     }; |                     }; | ||||||
| @@ -337,7 +363,9 @@ pub fn located_query_terms_from_string<'transaction>( | |||||||
|     // If a quote is never closed, we consider all of the end of the query as a phrase. |     // If a quote is never closed, we consider all of the end of the query as a phrase. | ||||||
|     if !phrase.is_empty() { |     if !phrase.is_empty() { | ||||||
|         let located_query_term = LocatedQueryTerm { |         let located_query_term = LocatedQueryTerm { | ||||||
|             value: QueryTerm::Phrase { phrase: Phrase { words: mem::take(&mut phrase) } }, |             value: QueryTerm::Phrase { | ||||||
|  |                 phrase: ctx.phrase_interner.insert(Phrase { words: mem::take(&mut phrase) }), | ||||||
|  |             }, | ||||||
|             positions: phrase_start..=phrase_end, |             positions: phrase_start..=phrase_end, | ||||||
|         }; |         }; | ||||||
|         primitive_query.push(located_query_term); |         primitive_query.push(located_query_term); | ||||||
| @@ -347,35 +375,49 @@ pub fn located_query_terms_from_string<'transaction>( | |||||||
| } | } | ||||||
|  |  | ||||||
| // TODO: return a word derivations instead? | // TODO: return a word derivations instead? | ||||||
| pub fn ngram2(x: &LocatedQueryTerm, y: &LocatedQueryTerm) -> Option<(String, RangeInclusive<i8>)> { | pub fn ngram2( | ||||||
|  |     ctx: &mut SearchContext, | ||||||
|  |     x: &LocatedQueryTerm, | ||||||
|  |     y: &LocatedQueryTerm, | ||||||
|  | ) -> Option<(Interned<String>, RangeInclusive<i8>)> { | ||||||
|     if *x.positions.end() != y.positions.start() - 1 { |     if *x.positions.end() != y.positions.start() - 1 { | ||||||
|         return None; |         return None; | ||||||
|     } |     } | ||||||
|     match (&x.value.original_single_word(), &y.value.original_single_word()) { |     match ( | ||||||
|  |         &x.value.original_single_word(&ctx.word_interner), | ||||||
|  |         &y.value.original_single_word(&ctx.word_interner), | ||||||
|  |     ) { | ||||||
|         (Some(w1), Some(w2)) => { |         (Some(w1), Some(w2)) => { | ||||||
|             let term = (format!("{w1}{w2}"), *x.positions.start()..=*y.positions.end()); |             let term = ( | ||||||
|  |                 ctx.word_interner.insert(format!("{w1}{w2}")), | ||||||
|  |                 *x.positions.start()..=*y.positions.end(), | ||||||
|  |             ); | ||||||
|             Some(term) |             Some(term) | ||||||
|         } |         } | ||||||
|         _ => None, |         _ => None, | ||||||
|     } |     } | ||||||
| } | } | ||||||
| pub fn ngram3( | pub fn ngram3( | ||||||
|  |     ctx: &mut SearchContext, | ||||||
|     x: &LocatedQueryTerm, |     x: &LocatedQueryTerm, | ||||||
|     y: &LocatedQueryTerm, |     y: &LocatedQueryTerm, | ||||||
|     z: &LocatedQueryTerm, |     z: &LocatedQueryTerm, | ||||||
| ) -> Option<(String, RangeInclusive<i8>)> { | ) -> Option<(Interned<String>, RangeInclusive<i8>)> { | ||||||
|     if *x.positions.end() != y.positions.start() - 1 |     if *x.positions.end() != y.positions.start() - 1 | ||||||
|         || *y.positions.end() != z.positions.start() - 1 |         || *y.positions.end() != z.positions.start() - 1 | ||||||
|     { |     { | ||||||
|         return None; |         return None; | ||||||
|     } |     } | ||||||
|     match ( |     match ( | ||||||
|         &x.value.original_single_word(), |         &x.value.original_single_word(&ctx.word_interner), | ||||||
|         &y.value.original_single_word(), |         &y.value.original_single_word(&ctx.word_interner), | ||||||
|         &z.value.original_single_word(), |         &z.value.original_single_word(&ctx.word_interner), | ||||||
|     ) { |     ) { | ||||||
|         (Some(w1), Some(w2), Some(w3)) => { |         (Some(w1), Some(w2), Some(w3)) => { | ||||||
|             let term = (format!("{w1}{w2}{w3}"), *x.positions.start()..=*z.positions.end()); |             let term = ( | ||||||
|  |                 ctx.word_interner.insert(format!("{w1}{w2}{w3}")), | ||||||
|  |                 *x.positions.start()..=*z.positions.end(), | ||||||
|  |             ); | ||||||
|             Some(term) |             Some(term) | ||||||
|         } |         } | ||||||
|         _ => None, |         _ => None, | ||||||
|   | |||||||
| @@ -1,18 +1,10 @@ | |||||||
| use heed::RoTxn; | use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; | ||||||
|  | use crate::new::{QueryGraph, SearchContext}; | ||||||
|  | use crate::Result; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; |  | ||||||
| use crate::new::db_cache::DatabaseCache; |  | ||||||
| use crate::new::QueryGraph; |  | ||||||
| use crate::{Index, Result}; |  | ||||||
|  |  | ||||||
| impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||||
|     pub fn build<'db_cache, 'transaction: 'db_cache>( |     pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> { | ||||||
|         index: &Index, |  | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         query_graph: QueryGraph, |  | ||||||
|     ) -> Result<Self> { |  | ||||||
|         let mut ranking_rule_graph = |         let mut ranking_rule_graph = | ||||||
|             Self { query_graph, all_edges: vec![], node_edges: vec![], successors: vec![] }; |             Self { query_graph, all_edges: vec![], node_edges: vec![], successors: vec![] }; | ||||||
|  |  | ||||||
| @@ -22,12 +14,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|             let new_edges = ranking_rule_graph.node_edges.last_mut().unwrap(); |             let new_edges = ranking_rule_graph.node_edges.last_mut().unwrap(); | ||||||
|             let new_successors = ranking_rule_graph.successors.last_mut().unwrap(); |             let new_successors = ranking_rule_graph.successors.last_mut().unwrap(); | ||||||
|  |  | ||||||
|             let Some(from_node_data) = G::build_visit_from_node(index, txn, db_cache, node)? else { continue }; |             let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue }; | ||||||
|  |  | ||||||
|             for successor_idx in ranking_rule_graph.query_graph.edges[node_idx].successors.iter() { |             for successor_idx in ranking_rule_graph.query_graph.edges[node_idx].successors.iter() { | ||||||
|                 let to_node = &ranking_rule_graph.query_graph.nodes[successor_idx as usize]; |                 let to_node = &ranking_rule_graph.query_graph.nodes[successor_idx as usize]; | ||||||
|                 let mut edges = |                 let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?; | ||||||
|                     G::build_visit_to_node(index, txn, db_cache, to_node, &from_node_data)?; |  | ||||||
|                 if edges.is_empty() { |                 if edges.is_empty() { | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
|   | |||||||
| @@ -1,13 +1,10 @@ | |||||||
| use std::marker::PhantomData; | use std::marker::PhantomData; | ||||||
|  |  | ||||||
| use fxhash::FxHashMap; |  | ||||||
| use heed::RoTxn; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; | use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; | ||||||
| use crate::new::db_cache::DatabaseCache; | use crate::new::{BitmapOrAllRef, SearchContext}; | ||||||
| use crate::new::BitmapOrAllRef; | use crate::Result; | ||||||
| use crate::{Index, Result}; | use fxhash::FxHashMap; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| // TODO: the cache should have a G::EdgeDetails as key | // TODO: the cache should have a G::EdgeDetails as key | ||||||
| // but then it means that we should have a quick way of | // but then it means that we should have a quick way of | ||||||
| @@ -25,11 +22,9 @@ impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
| impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> { | impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> { | ||||||
|     pub fn get_edge_docids<'s, 'transaction>( |     pub fn get_edge_docids<'s, 'search>( | ||||||
|         &'s mut self, |         &'s mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         edge_index: u32, |         edge_index: u32, | ||||||
|         graph: &RankingRuleGraph<G>, |         graph: &RankingRuleGraph<G>, | ||||||
|         // TODO: maybe universe doesn't belong here |         // TODO: maybe universe doesn't belong here | ||||||
| @@ -46,7 +41,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> { | |||||||
|                     return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index])); |                     return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index])); | ||||||
|                 } |                 } | ||||||
|                 // TODO: maybe universe doesn't belong here |                 // TODO: maybe universe doesn't belong here | ||||||
|                 let docids = universe & G::compute_docids(index, txn, db_cache, details)?; |                 let docids = universe & G::compute_docids(ctx, details)?; | ||||||
|                 let _ = self.cache.insert(edge_index, docids); |                 let _ = self.cache.insert(edge_index, docids); | ||||||
|                 let docids = &self.cache[&edge_index]; |                 let docids = &self.cache[&edge_index]; | ||||||
|                 Ok(BitmapOrAllRef::Bitmap(docids)) |                 Ok(BitmapOrAllRef::Bitmap(docids)) | ||||||
|   | |||||||
| @@ -7,20 +7,15 @@ mod proximity; | |||||||
| mod resolve_paths; | mod resolve_paths; | ||||||
| mod typo; | mod typo; | ||||||
|  |  | ||||||
|  | use super::logger::SearchLogger; | ||||||
|  | use super::{QueryGraph, QueryNode, SearchContext}; | ||||||
|  | use crate::Result; | ||||||
| pub use edge_docids_cache::EdgeDocidsCache; | pub use edge_docids_cache::EdgeDocidsCache; | ||||||
| pub use empty_paths_cache::EmptyPathsCache; | pub use empty_paths_cache::EmptyPathsCache; | ||||||
| pub use proximity::ProximityGraph; | pub use proximity::ProximityGraph; | ||||||
| pub use typo::TypoGraph; |  | ||||||
|  |  | ||||||
| use std::ops::ControlFlow; |  | ||||||
|  |  | ||||||
| use heed::RoTxn; |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  | use std::ops::ControlFlow; | ||||||
| use super::db_cache::DatabaseCache; | pub use typo::TypoGraph; | ||||||
| use super::logger::SearchLogger; |  | ||||||
| use super::{QueryGraph, QueryNode}; |  | ||||||
| use crate::{Index, Result}; |  | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Debug, Clone)] | ||||||
| pub enum EdgeDetails<E> { | pub enum EdgeDetails<E> { | ||||||
| @@ -42,6 +37,48 @@ pub struct EdgePointer<'graph, E> { | |||||||
|     pub edge: &'graph Edge<E>, |     pub edge: &'graph Edge<E>, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // pub struct SubWordDerivations { | ||||||
|  | //     words: FxHashSet<Interned<String>>, | ||||||
|  | //     synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings | ||||||
|  | //     split_words: bool, | ||||||
|  | //     use_prefix_db: bool, | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // pub struct EdgeWordDerivations { | ||||||
|  | //     // TODO: not Option, instead: Any | All | Subset(SubWordDerivations) | ||||||
|  | //     from_words: Option<SubWordDerivations>, // ??? | ||||||
|  | //     to_words: Option<SubWordDerivations>,   // + use prefix db? | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // fn aggregate_edge_word_derivations( | ||||||
|  | //     graph: (), | ||||||
|  | //     edges: Vec<usize>, | ||||||
|  | // ) -> BTreeMap<usize, SubWordDerivations> { | ||||||
|  | //     todo!() | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // fn reduce_word_term_to_sub_word_derivations( | ||||||
|  | //     term: &mut WordDerivations, | ||||||
|  | //     derivations: &SubWordDerivations, | ||||||
|  | // ) { | ||||||
|  | //     let mut new_one_typo = vec![]; | ||||||
|  | //     for w in term.one_typo { | ||||||
|  | //         if derivations.words.contains(w) { | ||||||
|  | //             new_one_typo.push(w); | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  | //     if term.use_prefix_db && !derivations.use_prefix_db { | ||||||
|  | //         term.use_prefix_db = false; | ||||||
|  | //     } | ||||||
|  | //     // etc. | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>( | ||||||
|  | //     edge: G::EdgeDetails, | ||||||
|  | // ) -> SubWordDerivations { | ||||||
|  | //     todo!() | ||||||
|  | // } | ||||||
|  |  | ||||||
| pub trait RankingRuleGraphTrait: Sized { | pub trait RankingRuleGraphTrait: Sized { | ||||||
|     /// The details of an edge connecting two query nodes. These details |     /// The details of an edge connecting two query nodes. These details | ||||||
|     /// should be sufficient to compute the edge's cost and associated document ids |     /// should be sufficient to compute the edge's cost and associated document ids | ||||||
| @@ -55,10 +92,8 @@ pub trait RankingRuleGraphTrait: Sized { | |||||||
|     fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String; |     fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String; | ||||||
|  |  | ||||||
|     /// Compute the document ids associated with the given edge. |     /// Compute the document ids associated with the given edge. | ||||||
|     fn compute_docids<'transaction>( |     fn compute_docids<'search>( | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         edge_details: &Self::EdgeDetails, |         edge_details: &Self::EdgeDetails, | ||||||
|     ) -> Result<RoaringBitmap>; |     ) -> Result<RoaringBitmap>; | ||||||
|  |  | ||||||
| @@ -66,19 +101,15 @@ pub trait RankingRuleGraphTrait: Sized { | |||||||
|     /// |     /// | ||||||
|     /// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node), |     /// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node), | ||||||
|     /// which builds the actual edges. |     /// which builds the actual edges. | ||||||
|     fn build_visit_from_node<'transaction>( |     fn build_visit_from_node<'search>( | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         from_node: &QueryNode, |         from_node: &QueryNode, | ||||||
|     ) -> Result<Option<Self::BuildVisitedFromNode>>; |     ) -> Result<Option<Self::BuildVisitedFromNode>>; | ||||||
|  |  | ||||||
|     /// Return the cost and details of the edges going from the previously visited node |     /// Return the cost and details of the edges going from the previously visited node | ||||||
|     /// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`. |     /// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`. | ||||||
|     fn build_visit_to_node<'from_data, 'transaction: 'from_data>( |     fn build_visit_to_node<'from_data, 'search: 'from_data>( | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         to_node: &QueryNode, |         to_node: &QueryNode, | ||||||
|         from_node_data: &'from_data Self::BuildVisitedFromNode, |         from_node_data: &'from_data Self::BuildVisitedFromNode, | ||||||
|     ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>; |     ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>; | ||||||
|   | |||||||
| @@ -1,30 +1,30 @@ | |||||||
| use std::collections::BTreeMap; |  | ||||||
|  |  | ||||||
| use heed::RoTxn; |  | ||||||
| use itertools::Itertools; |  | ||||||
|  |  | ||||||
| use super::ProximityEdge; | use super::ProximityEdge; | ||||||
| use crate::new::db_cache::DatabaseCache; |  | ||||||
| use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; | use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; | ||||||
| use crate::new::ranking_rule_graph::proximity::WordPair; | use crate::new::ranking_rule_graph::proximity::WordPair; | ||||||
| use crate::new::ranking_rule_graph::EdgeDetails; | use crate::new::ranking_rule_graph::EdgeDetails; | ||||||
| use crate::new::QueryNode; | use crate::new::{QueryNode, SearchContext}; | ||||||
| use crate::{Index, Result}; | use crate::Result; | ||||||
|  | use itertools::Itertools; | ||||||
|  | use std::collections::BTreeMap; | ||||||
|  |  | ||||||
| pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, i8)>> { | pub fn visit_from_node( | ||||||
|  |     ctx: &mut SearchContext, | ||||||
|  |     from_node: &QueryNode, | ||||||
|  | ) -> Result<Option<(WordDerivations, i8)>> { | ||||||
|     Ok(Some(match from_node { |     Ok(Some(match from_node { | ||||||
|         QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => match value1 { |         QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => match value1 { | ||||||
|             QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()), |             QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()), | ||||||
|             QueryTerm::Phrase { phrase: phrase1 } => { |             QueryTerm::Phrase { phrase: phrase1 } => { | ||||||
|                 if let Some(original) = phrase1.words.last().unwrap().as_ref() { |                 let phrase1 = ctx.phrase_interner.get(*phrase1); | ||||||
|  |                 if let Some(original) = *phrase1.words.last().unwrap() { | ||||||
|                     ( |                     ( | ||||||
|                         WordDerivations { |                         WordDerivations { | ||||||
|                             original: original.clone(), |                             original, | ||||||
|                             zero_typo: vec![original.to_owned()], |                             zero_typo: Box::new([original]), | ||||||
|                             one_typo: vec![], |                             one_typo: Box::new([]), | ||||||
|                             two_typos: vec![], |                             two_typos: Box::new([]), | ||||||
|                             use_prefix_db: false, |                             use_prefix_db: false, | ||||||
|                             synonyms: vec![], |                             synonyms: Box::new([]), | ||||||
|                             split_words: None, |                             split_words: None, | ||||||
|                         }, |                         }, | ||||||
|                         *pos1.end(), |                         *pos1.end(), | ||||||
| @@ -37,12 +37,12 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, | |||||||
|         }, |         }, | ||||||
|         QueryNode::Start => ( |         QueryNode::Start => ( | ||||||
|             WordDerivations { |             WordDerivations { | ||||||
|                 original: String::new(), |                 original: ctx.word_interner.insert(String::new()), | ||||||
|                 zero_typo: vec![], |                 zero_typo: Box::new([]), | ||||||
|                 one_typo: vec![], |                 one_typo: Box::new([]), | ||||||
|                 two_typos: vec![], |                 two_typos: Box::new([]), | ||||||
|                 use_prefix_db: false, |                 use_prefix_db: false, | ||||||
|                 synonyms: vec![], |                 synonyms: Box::new([]), | ||||||
|                 split_words: None, |                 split_words: None, | ||||||
|             }, |             }, | ||||||
|             -100, |             -100, | ||||||
| @@ -51,10 +51,8 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, | |||||||
|     })) |     })) | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn visit_to_node<'transaction, 'from_data>( | pub fn visit_to_node<'search, 'from_data>( | ||||||
|     index: &Index, |     ctx: &mut SearchContext<'search>, | ||||||
|     txn: &'transaction RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     to_node: &QueryNode, |     to_node: &QueryNode, | ||||||
|     from_node_data: &'from_data (WordDerivations, i8), |     from_node_data: &'from_data (WordDerivations, i8), | ||||||
| ) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> { | ) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> { | ||||||
| @@ -69,15 +67,16 @@ pub fn visit_to_node<'transaction, 'from_data>( | |||||||
|     let (derivations2, pos2, ngram_len2) = match value2 { |     let (derivations2, pos2, ngram_len2) = match value2 { | ||||||
|         QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()), |         QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()), | ||||||
|         QueryTerm::Phrase { phrase: phrase2 } => { |         QueryTerm::Phrase { phrase: phrase2 } => { | ||||||
|             if let Some(original) = phrase2.words.first().unwrap().as_ref() { |             let phrase2 = ctx.phrase_interner.get(*phrase2); | ||||||
|  |             if let Some(original) = *phrase2.words.first().unwrap() { | ||||||
|                 ( |                 ( | ||||||
|                     WordDerivations { |                     WordDerivations { | ||||||
|                         original: original.clone(), |                         original, | ||||||
|                         zero_typo: vec![original.to_owned()], |                         zero_typo: Box::new([original]), | ||||||
|                         one_typo: vec![], |                         one_typo: Box::new([]), | ||||||
|                         two_typos: vec![], |                         two_typos: Box::new([]), | ||||||
|                         use_prefix_db: false, |                         use_prefix_db: false, | ||||||
|                         synonyms: vec![], |                         synonyms: Box::new([]), | ||||||
|                         split_words: None, |                         split_words: None, | ||||||
|                     }, |                     }, | ||||||
|                     *pos2.start(), |                     *pos2.start(), | ||||||
| @@ -106,19 +105,16 @@ pub fn visit_to_node<'transaction, 'from_data>( | |||||||
|  |  | ||||||
|     let derivations1 = derivations1.all_derivations_except_prefix_db(); |     let derivations1 = derivations1.all_derivations_except_prefix_db(); | ||||||
|     // TODO: eventually, we want to get rid of the uses from `orginal` |     // TODO: eventually, we want to get rid of the uses from `orginal` | ||||||
|     let original_word_2 = derivations2.original.clone(); |  | ||||||
|     let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new(); |     let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new(); | ||||||
|  |  | ||||||
|     if updb2 { |     if updb2 { | ||||||
|         for word1 in derivations1.clone() { |         for word1 in derivations1.clone() { | ||||||
|             for proximity in 1..=(8 - ngram_len2) { |             for proximity in 1..=(8 - ngram_len2) { | ||||||
|                 let cost = (proximity + ngram_len2 - 1) as u8; |                 let cost = (proximity + ngram_len2 - 1) as u8; | ||||||
|                 if db_cache |                 if ctx | ||||||
|                     .get_word_prefix_pair_proximity_docids( |                     .get_word_prefix_pair_proximity_docids( | ||||||
|                         index, |  | ||||||
|                         txn, |  | ||||||
|                         word1, |                         word1, | ||||||
|                         original_word_2.as_str(), |                         derivations2.original, | ||||||
|                         proximity as u8, |                         proximity as u8, | ||||||
|                     )? |                     )? | ||||||
|                     .is_some() |                     .is_some() | ||||||
| @@ -129,16 +125,14 @@ pub fn visit_to_node<'transaction, 'from_data>( | |||||||
|                         .entry(proximity as u8) |                         .entry(proximity as u8) | ||||||
|                         .or_default() |                         .or_default() | ||||||
|                         .push(WordPair::WordPrefix { |                         .push(WordPair::WordPrefix { | ||||||
|                             left: word1.to_owned(), |                             left: word1, | ||||||
|                             right_prefix: original_word_2.to_owned(), |                             right_prefix: derivations2.original, | ||||||
|                         }); |                         }); | ||||||
|                 } |                 } | ||||||
|                 if db_cache |                 if ctx | ||||||
|                     .get_prefix_word_pair_proximity_docids( |                     .get_prefix_word_pair_proximity_docids( | ||||||
|                         index, |                         derivations2.original, | ||||||
|                         txn, |                         word1, | ||||||
|                         original_word_2.as_str(), |  | ||||||
|                         word1.as_str(), |  | ||||||
|                         proximity as u8 - 1, |                         proximity as u8 - 1, | ||||||
|                     )? |                     )? | ||||||
|                     .is_some() |                     .is_some() | ||||||
| @@ -149,8 +143,8 @@ pub fn visit_to_node<'transaction, 'from_data>( | |||||||
|                         .entry(proximity as u8) |                         .entry(proximity as u8) | ||||||
|                         .or_default() |                         .or_default() | ||||||
|                         .push(WordPair::WordPrefixSwapped { |                         .push(WordPair::WordPrefixSwapped { | ||||||
|                             left_prefix: original_word_2.to_owned(), |                             left_prefix: derivations2.original, | ||||||
|                             right: word1.to_owned(), |                             right: word1, | ||||||
|                         }); |                         }); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| @@ -164,28 +158,23 @@ pub fn visit_to_node<'transaction, 'from_data>( | |||||||
|     for (word1, word2) in product_derivations { |     for (word1, word2) in product_derivations { | ||||||
|         for proximity in 1..=(8 - ngram_len2) { |         for proximity in 1..=(8 - ngram_len2) { | ||||||
|             let cost = (proximity + ngram_len2 - 1) as u8; |             let cost = (proximity + ngram_len2 - 1) as u8; | ||||||
|             if db_cache |             if ctx.get_word_pair_proximity_docids(word1, word2, proximity as u8)?.is_some() { | ||||||
|                 .get_word_pair_proximity_docids(index, txn, word1, word2, proximity as u8)? |  | ||||||
|                 .is_some() |  | ||||||
|             { |  | ||||||
|                 cost_proximity_word_pairs |                 cost_proximity_word_pairs | ||||||
|                     .entry(cost) |                     .entry(cost) | ||||||
|                     .or_default() |                     .or_default() | ||||||
|                     .entry(proximity as u8) |                     .entry(proximity as u8) | ||||||
|                     .or_default() |                     .or_default() | ||||||
|                     .push(WordPair::Words { left: word1.to_owned(), right: word2.to_owned() }); |                     .push(WordPair::Words { left: word1, right: word2 }); | ||||||
|             } |             } | ||||||
|             if proximity > 1 |             if proximity > 1 | ||||||
|                 && db_cache |                 && ctx.get_word_pair_proximity_docids(word2, word1, proximity as u8 - 1)?.is_some() | ||||||
|                     .get_word_pair_proximity_docids(index, txn, word2, word1, proximity as u8 - 1)? |  | ||||||
|                     .is_some() |  | ||||||
|             { |             { | ||||||
|                 cost_proximity_word_pairs |                 cost_proximity_word_pairs | ||||||
|                     .entry(cost) |                     .entry(cost) | ||||||
|                     .or_default() |                     .or_default() | ||||||
|                     .entry(proximity as u8 - 1) |                     .entry(proximity as u8 - 1) | ||||||
|                     .or_default() |                     .or_default() | ||||||
|                     .push(WordPair::Words { left: word2.to_owned(), right: word1.to_owned() }); |                     .push(WordPair::Words { left: word2, right: word1 }); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -1,14 +1,10 @@ | |||||||
| use heed::RoTxn; | use super::{ProximityEdge, WordPair}; | ||||||
|  | use crate::new::SearchContext; | ||||||
|  | use crate::{CboRoaringBitmapCodec, Result}; | ||||||
| use roaring::{MultiOps, RoaringBitmap}; | use roaring::{MultiOps, RoaringBitmap}; | ||||||
|  |  | ||||||
| use super::{ProximityEdge, WordPair}; | pub fn compute_docids<'search>( | ||||||
| use crate::new::db_cache::DatabaseCache; |     ctx: &mut SearchContext<'search>, | ||||||
| use crate::{CboRoaringBitmapCodec, Result}; |  | ||||||
|  |  | ||||||
| pub fn compute_docids<'transaction>( |  | ||||||
|     index: &crate::Index, |  | ||||||
|     txn: &'transaction RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     edge: &ProximityEdge, |     edge: &ProximityEdge, | ||||||
| ) -> Result<RoaringBitmap> { | ) -> Result<RoaringBitmap> { | ||||||
|     let ProximityEdge { pairs, proximity } = edge; |     let ProximityEdge { pairs, proximity } = edge; | ||||||
| @@ -16,12 +12,14 @@ pub fn compute_docids<'transaction>( | |||||||
|     for pair in pairs.iter() { |     for pair in pairs.iter() { | ||||||
|         let bytes = match pair { |         let bytes = match pair { | ||||||
|             WordPair::Words { left, right } => { |             WordPair::Words { left, right } => { | ||||||
|                 db_cache.get_word_pair_proximity_docids(index, txn, left, right, *proximity) |                 ctx.get_word_pair_proximity_docids(*left, *right, *proximity) | ||||||
|  |             } | ||||||
|  |             WordPair::WordPrefix { left, right_prefix } => { | ||||||
|  |                 ctx.get_word_prefix_pair_proximity_docids(*left, *right_prefix, *proximity) | ||||||
|  |             } | ||||||
|  |             WordPair::WordPrefixSwapped { left_prefix, right } => { | ||||||
|  |                 ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity) | ||||||
|             } |             } | ||||||
|             WordPair::WordPrefix { left, right_prefix } => db_cache |  | ||||||
|                 .get_word_prefix_pair_proximity_docids(index, txn, left, right_prefix, *proximity), |  | ||||||
|             WordPair::WordPrefixSwapped { left_prefix, right } => db_cache |  | ||||||
|                 .get_prefix_word_pair_proximity_docids(index, txn, left_prefix, right, *proximity), |  | ||||||
|         }?; |         }?; | ||||||
|         let bitmap = |         let bitmap = | ||||||
|             bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default(); |             bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default(); | ||||||
|   | |||||||
| @@ -1,25 +1,22 @@ | |||||||
| pub mod build; | pub mod build; | ||||||
| pub mod compute_docids; | pub mod compute_docids; | ||||||
|  |  | ||||||
| use heed::RoTxn; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::empty_paths_cache::EmptyPathsCache; | use super::empty_paths_cache::EmptyPathsCache; | ||||||
|  |  | ||||||
| use super::{EdgeDetails, RankingRuleGraphTrait}; | use super::{EdgeDetails, RankingRuleGraphTrait}; | ||||||
| use crate::new::db_cache::DatabaseCache; | use crate::new::interner::Interned; | ||||||
| use crate::new::logger::SearchLogger; | use crate::new::logger::SearchLogger; | ||||||
| use crate::new::query_term::WordDerivations; | use crate::new::query_term::WordDerivations; | ||||||
| use crate::new::{QueryGraph, QueryNode}; | use crate::new::{QueryGraph, QueryNode, SearchContext}; | ||||||
| use crate::{Index, Result}; | use crate::Result; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| // TODO: intern the strings, refer to them by their pointer? | // TODO: intern the strings, refer to them by their pointer? | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Clone)] | ||||||
| pub enum WordPair { | pub enum WordPair { | ||||||
|     Words { left: String, right: String }, |     Words { left: Interned<String>, right: Interned<String> }, | ||||||
|     WordPrefix { left: String, right_prefix: String }, |     WordPrefix { left: Interned<String>, right_prefix: Interned<String> }, | ||||||
|     WordPrefixSwapped { left_prefix: String, right: String }, |     WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> }, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Clone)] | #[derive(Clone)] | ||||||
| @@ -40,32 +37,26 @@ impl RankingRuleGraphTrait for ProximityGraph { | |||||||
|         format!(", prox {proximity}, {} pairs", pairs.len()) |         format!(", prox {proximity}, {} pairs", pairs.len()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn compute_docids<'db_cache, 'transaction>( |     fn compute_docids<'search>( | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         edge: &Self::EdgeDetails, |         edge: &Self::EdgeDetails, | ||||||
|     ) -> Result<roaring::RoaringBitmap> { |     ) -> Result<roaring::RoaringBitmap> { | ||||||
|         compute_docids::compute_docids(index, txn, db_cache, edge) |         compute_docids::compute_docids(ctx, edge) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn build_visit_from_node<'transaction>( |     fn build_visit_from_node<'search>( | ||||||
|         _index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         from_node: &QueryNode, |         from_node: &QueryNode, | ||||||
|     ) -> Result<Option<Self::BuildVisitedFromNode>> { |     ) -> Result<Option<Self::BuildVisitedFromNode>> { | ||||||
|         build::visit_from_node(from_node) |         build::visit_from_node(ctx, from_node) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn build_visit_to_node<'from_data, 'transaction: 'from_data>( |     fn build_visit_to_node<'from_data, 'search: 'from_data>( | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         to_node: &QueryNode, |         to_node: &QueryNode, | ||||||
|         from_node_data: &'from_data Self::BuildVisitedFromNode, |         from_node_data: &'from_data Self::BuildVisitedFromNode, | ||||||
|     ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { |     ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { | ||||||
|         build::visit_to_node(index, txn, db_cache, to_node, from_node_data) |         build::visit_to_node(ctx, to_node, from_node_data) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn log_state( |     fn log_state( | ||||||
|   | |||||||
| @@ -1,23 +1,18 @@ | |||||||
| #![allow(clippy::too_many_arguments)] | #![allow(clippy::too_many_arguments)] | ||||||
|  |  | ||||||
| use heed::RoTxn; |  | ||||||
| use roaring::{MultiOps, RoaringBitmap}; |  | ||||||
|  |  | ||||||
| use super::edge_docids_cache::EdgeDocidsCache; | use super::edge_docids_cache::EdgeDocidsCache; | ||||||
| use super::empty_paths_cache::EmptyPathsCache; | use super::empty_paths_cache::EmptyPathsCache; | ||||||
|  |  | ||||||
| use super::{RankingRuleGraph, RankingRuleGraphTrait}; | use super::{RankingRuleGraph, RankingRuleGraphTrait}; | ||||||
| use crate::new::db_cache::DatabaseCache; | use crate::new::{BitmapOrAllRef, SearchContext}; | ||||||
|  | use crate::Result; | ||||||
| use crate::new::BitmapOrAllRef; | use roaring::{MultiOps, RoaringBitmap}; | ||||||
| use crate::{Index, Result}; |  | ||||||
|  |  | ||||||
| impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||||
|     pub fn resolve_paths<'transaction>( |     // TODO: reduce the universe after computing each path | ||||||
|  |     // TODO: deserialize roaring bitmap within a universe | ||||||
|  |     pub fn resolve_paths<'search>( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         edge_docids_cache: &mut EdgeDocidsCache<G>, |         edge_docids_cache: &mut EdgeDocidsCache<G>, | ||||||
|         empty_paths_cache: &mut EmptyPathsCache, |         empty_paths_cache: &mut EmptyPathsCache, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
| @@ -52,8 +47,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | |||||||
|             let mut cached_edge_docids = vec![]; |             let mut cached_edge_docids = vec![]; | ||||||
|             'edge_loop: for edge_index in edge_indexes { |             'edge_loop: for edge_index in edge_indexes { | ||||||
|                 visited_edges.push(edge_index); |                 visited_edges.push(edge_index); | ||||||
|                 let edge_docids = edge_docids_cache |                 let edge_docids = | ||||||
|                     .get_edge_docids(index, txn, db_cache, edge_index, self, universe)?; |                     edge_docids_cache.get_edge_docids(ctx, edge_index, self, universe)?; | ||||||
|                 match edge_docids { |                 match edge_docids { | ||||||
|                     BitmapOrAllRef::Bitmap(edge_docids) => { |                     BitmapOrAllRef::Bitmap(edge_docids) => { | ||||||
|                         cached_edge_docids.push((edge_index, edge_docids.clone())); |                         cached_edge_docids.push((edge_index, edge_docids.clone())); | ||||||
|   | |||||||
| @@ -1,19 +1,17 @@ | |||||||
| use heed::{BytesDecode, RoTxn}; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::empty_paths_cache::EmptyPathsCache; | use super::empty_paths_cache::EmptyPathsCache; | ||||||
|  |  | ||||||
| use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; | use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; | ||||||
| use crate::new::db_cache::DatabaseCache; | use crate::new::interner::Interned; | ||||||
| use crate::new::logger::SearchLogger; | use crate::new::logger::SearchLogger; | ||||||
| use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; | use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; | ||||||
| use crate::new::resolve_query_graph::resolve_phrase; | use crate::new::resolve_query_graph::resolve_phrase; | ||||||
| use crate::new::{QueryGraph, QueryNode}; | use crate::new::{QueryGraph, QueryNode, SearchContext}; | ||||||
| use crate::{Index, Result, RoaringBitmapCodec}; | use crate::{Result, RoaringBitmapCodec}; | ||||||
|  | use heed::BytesDecode; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| #[derive(Clone)] | #[derive(Clone)] | ||||||
| pub enum TypoEdge { | pub enum TypoEdge { | ||||||
|     Phrase { phrase: Phrase }, |     Phrase { phrase: Interned<Phrase> }, | ||||||
|     Word { derivations: WordDerivations, nbr_typos: u8 }, |     Word { derivations: WordDerivations, nbr_typos: u8 }, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -30,14 +28,12 @@ impl RankingRuleGraphTrait for TypoGraph { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn compute_docids<'db_cache, 'transaction>( |     fn compute_docids<'db_cache, 'search>( | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         edge: &Self::EdgeDetails, |         edge: &Self::EdgeDetails, | ||||||
|     ) -> Result<RoaringBitmap> { |     ) -> Result<RoaringBitmap> { | ||||||
|         match edge { |         match edge { | ||||||
|             TypoEdge::Phrase { phrase } => resolve_phrase(index, txn, db_cache, phrase), |             TypoEdge::Phrase { phrase } => resolve_phrase(ctx, *phrase), | ||||||
|             TypoEdge::Word { derivations, nbr_typos } => { |             TypoEdge::Word { derivations, nbr_typos } => { | ||||||
|                 let words = match nbr_typos { |                 let words = match nbr_typos { | ||||||
|                     0 => &derivations.zero_typo, |                     0 => &derivations.zero_typo, | ||||||
| @@ -46,16 +42,14 @@ impl RankingRuleGraphTrait for TypoGraph { | |||||||
|                     _ => panic!(), |                     _ => panic!(), | ||||||
|                 }; |                 }; | ||||||
|                 let mut docids = RoaringBitmap::new(); |                 let mut docids = RoaringBitmap::new(); | ||||||
|                 for word in words.iter() { |                 for word in words.iter().copied() { | ||||||
|                     let Some(bytes) = db_cache.get_word_docids(index, txn, word)? else { continue }; |                     let Some(bytes) = ctx.get_word_docids(word)? else { continue }; | ||||||
|                     let bitmap = |                     let bitmap = | ||||||
|                         RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?; |                         RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?; | ||||||
|                     docids |= bitmap; |                     docids |= bitmap; | ||||||
|                 } |                 } | ||||||
|                 if *nbr_typos == 0 { |                 if *nbr_typos == 0 { | ||||||
|                     if let Some(bytes) = |                     if let Some(bytes) = ctx.get_prefix_docids(derivations.original)? { | ||||||
|                         db_cache.get_prefix_docids(index, txn, &derivations.original)? |  | ||||||
|                     { |  | ||||||
|                         let bitmap = |                         let bitmap = | ||||||
|                             RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?; |                             RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?; | ||||||
|                         docids |= bitmap; |                         docids |= bitmap; | ||||||
| @@ -66,26 +60,22 @@ impl RankingRuleGraphTrait for TypoGraph { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn build_visit_from_node<'transaction>( |     fn build_visit_from_node<'search>( | ||||||
|         _index: &Index, |         _ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _from_node: &QueryNode, |         _from_node: &QueryNode, | ||||||
|     ) -> Result<Option<Self::BuildVisitedFromNode>> { |     ) -> Result<Option<Self::BuildVisitedFromNode>> { | ||||||
|         Ok(Some(())) |         Ok(Some(())) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn build_visit_to_node<'from_data, 'transaction: 'from_data>( |     fn build_visit_to_node<'from_data, 'search: 'from_data>( | ||||||
|         _index: &Index, |         _ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         to_node: &QueryNode, |         to_node: &QueryNode, | ||||||
|         _from_node_data: &'from_data Self::BuildVisitedFromNode, |         _from_node_data: &'from_data Self::BuildVisitedFromNode, | ||||||
|     ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { |     ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { | ||||||
|         match to_node { |         match to_node { | ||||||
|             QueryNode::Term(LocatedQueryTerm { value, .. }) => match value { |             QueryNode::Term(LocatedQueryTerm { value, .. }) => match value { | ||||||
|                 QueryTerm::Phrase { phrase } => { |                 &QueryTerm::Phrase { phrase } => { | ||||||
|                     Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase: phrase.clone() }))]) |                     Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase }))]) | ||||||
|                 } |                 } | ||||||
|                 QueryTerm::Word { derivations } => { |                 QueryTerm::Word { derivations } => { | ||||||
|                     let mut edges = vec![]; |                     let mut edges = vec![]; | ||||||
|   | |||||||
| @@ -1,33 +1,28 @@ | |||||||
| use heed::RoTxn; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::db_cache::DatabaseCache; |  | ||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
|  |  | ||||||
| use super::QueryGraph; | use super::QueryGraph; | ||||||
|  | use super::SearchContext; | ||||||
| use crate::new::graph_based_ranking_rule::GraphBasedRankingRule; | use crate::new::graph_based_ranking_rule::GraphBasedRankingRule; | ||||||
| use crate::new::ranking_rule_graph::ProximityGraph; | use crate::new::ranking_rule_graph::ProximityGraph; | ||||||
| use crate::new::ranking_rule_graph::TypoGraph; | use crate::new::ranking_rule_graph::TypoGraph; | ||||||
| use crate::new::words::Words; | use crate::new::words::Words; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
| // use crate::search::new::sort::Sort; | // use crate::search::new::sort::Sort; | ||||||
| use crate::{Index, Result, TermsMatchingStrategy}; | use crate::{Result, TermsMatchingStrategy}; | ||||||
|  |  | ||||||
| pub trait RankingRuleOutputIter<'transaction, Query> { | pub trait RankingRuleOutputIter<'search, Query> { | ||||||
|     fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>>; |     fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>>; | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct RankingRuleOutputIterWrapper<'transaction, Query> { | pub struct RankingRuleOutputIterWrapper<'search, Query> { | ||||||
|     iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'transaction>, |     iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'search>, | ||||||
| } | } | ||||||
| impl<'transaction, Query> RankingRuleOutputIterWrapper<'transaction, Query> { | impl<'search, Query> RankingRuleOutputIterWrapper<'search, Query> { | ||||||
|     pub fn new( |     pub fn new(iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'search>) -> Self { | ||||||
|         iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'transaction>, |  | ||||||
|     ) -> Self { |  | ||||||
|         Self { iter } |         Self { iter } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| impl<'transaction, Query> RankingRuleOutputIter<'transaction, Query> | impl<'search, Query> RankingRuleOutputIter<'search, Query> | ||||||
|     for RankingRuleOutputIterWrapper<'transaction, Query> |     for RankingRuleOutputIterWrapper<'search, Query> | ||||||
| { | { | ||||||
|     fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>> { |     fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>> { | ||||||
|         match self.iter.next() { |         match self.iter.next() { | ||||||
| @@ -44,7 +39,7 @@ pub struct PlaceholderQuery; | |||||||
| impl RankingRuleQueryTrait for PlaceholderQuery {} | impl RankingRuleQueryTrait for PlaceholderQuery {} | ||||||
| impl RankingRuleQueryTrait for QueryGraph {} | impl RankingRuleQueryTrait for QueryGraph {} | ||||||
|  |  | ||||||
| pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> { | pub trait RankingRule<'search, Query: RankingRuleQueryTrait> { | ||||||
|     fn id(&self) -> String; |     fn id(&self) -> String; | ||||||
|  |  | ||||||
|     /// Prepare the ranking rule such that it can start iterating over its |     /// Prepare the ranking rule such that it can start iterating over its | ||||||
| @@ -53,9 +48,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> { | |||||||
|     /// The given universe is the universe that will be given to [`next_bucket`](RankingRule::next_bucket). |     /// The given universe is the universe that will be given to [`next_bucket`](RankingRule::next_bucket). | ||||||
|     fn start_iteration( |     fn start_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         logger: &mut dyn SearchLogger<Query>, |         logger: &mut dyn SearchLogger<Query>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|         query: &Query, |         query: &Query, | ||||||
| @@ -70,9 +63,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> { | |||||||
|     /// - the universe given to [`start_iteration`](RankingRule::start_iteration) |     /// - the universe given to [`start_iteration`](RankingRule::start_iteration) | ||||||
|     fn next_bucket( |     fn next_bucket( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         logger: &mut dyn SearchLogger<Query>, |         logger: &mut dyn SearchLogger<Query>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|     ) -> Result<Option<RankingRuleOutput<Query>>>; |     ) -> Result<Option<RankingRuleOutput<Query>>>; | ||||||
| @@ -81,9 +72,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> { | |||||||
|     /// The next call to this ranking rule, if any, will be [`start_iteration`](RankingRule::start_iteration). |     /// The next call to this ranking rule, if any, will be [`start_iteration`](RankingRule::start_iteration). | ||||||
|     fn end_iteration( |     fn end_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         logger: &mut dyn SearchLogger<Query>, |         logger: &mut dyn SearchLogger<Query>, | ||||||
|     ); |     ); | ||||||
| } | } | ||||||
| @@ -98,11 +87,9 @@ pub struct RankingRuleOutput<Q> { | |||||||
|  |  | ||||||
| // TODO: can make it generic over the query type (either query graph or placeholder) fairly easily | // TODO: can make it generic over the query type (either query graph or placeholder) fairly easily | ||||||
| #[allow(clippy::too_many_arguments)] | #[allow(clippy::too_many_arguments)] | ||||||
| pub fn apply_ranking_rules<'transaction>( | pub fn apply_ranking_rules<'search>( | ||||||
|     index: &Index, |     ctx: &mut SearchContext<'search>, | ||||||
|     txn: &'transaction heed::RoTxn, |  | ||||||
|     // TODO: ranking rules parameter |     // TODO: ranking rules parameter | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     query_graph: &QueryGraph, |     query_graph: &QueryGraph, | ||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
|     from: usize, |     from: usize, | ||||||
| @@ -115,7 +102,7 @@ pub fn apply_ranking_rules<'transaction>( | |||||||
|     let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned()); |     let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned()); | ||||||
|     let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned()); |     let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned()); | ||||||
|     // TODO: ranking rules given as argument |     // TODO: ranking rules given as argument | ||||||
|     let mut ranking_rules: Vec<&mut dyn RankingRule<'transaction, QueryGraph>> = |     let mut ranking_rules: Vec<&mut dyn RankingRule<'search, QueryGraph>> = | ||||||
|         vec![words, typo, proximity /*sort*/]; |         vec![words, typo, proximity /*sort*/]; | ||||||
|  |  | ||||||
|     logger.ranking_rules(&ranking_rules); |     logger.ranking_rules(&ranking_rules); | ||||||
| @@ -126,7 +113,7 @@ pub fn apply_ranking_rules<'transaction>( | |||||||
|  |  | ||||||
|     let ranking_rules_len = ranking_rules.len(); |     let ranking_rules_len = ranking_rules.len(); | ||||||
|     logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe); |     logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe); | ||||||
|     ranking_rules[0].start_iteration(index, txn, db_cache, logger, universe, query_graph)?; |     ranking_rules[0].start_iteration(ctx, logger, universe, query_graph)?; | ||||||
|  |  | ||||||
|     let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len]; |     let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len]; | ||||||
|     candidates[0] = universe.clone(); |     candidates[0] = universe.clone(); | ||||||
| @@ -142,7 +129,7 @@ pub fn apply_ranking_rules<'transaction>( | |||||||
|                 &candidates[cur_ranking_rule_index], |                 &candidates[cur_ranking_rule_index], | ||||||
|             ); |             ); | ||||||
|             candidates[cur_ranking_rule_index].clear(); |             candidates[cur_ranking_rule_index].clear(); | ||||||
|             ranking_rules[cur_ranking_rule_index].end_iteration(index, txn, db_cache, logger); |             ranking_rules[cur_ranking_rule_index].end_iteration(ctx, logger); | ||||||
|             if cur_ranking_rule_index == 0 { |             if cur_ranking_rule_index == 0 { | ||||||
|                 break; |                 break; | ||||||
|             } else { |             } else { | ||||||
| @@ -206,7 +193,7 @@ pub fn apply_ranking_rules<'transaction>( | |||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, logger, &candidates[cur_ranking_rule_index])? else { |         let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(ctx, logger, &candidates[cur_ranking_rule_index])? else { | ||||||
|             // TODO: add remaining candidates automatically here? |             // TODO: add remaining candidates automatically here? | ||||||
|             back!(); |             back!(); | ||||||
|             continue; |             continue; | ||||||
| @@ -239,9 +226,7 @@ pub fn apply_ranking_rules<'transaction>( | |||||||
|             &candidates[cur_ranking_rule_index], |             &candidates[cur_ranking_rule_index], | ||||||
|         ); |         ); | ||||||
|         ranking_rules[cur_ranking_rule_index].start_iteration( |         ranking_rules[cur_ranking_rule_index].start_iteration( | ||||||
|             index, |             ctx, | ||||||
|             txn, |  | ||||||
|             db_cache, |  | ||||||
|             logger, |             logger, | ||||||
|             &next_bucket.candidates, |             &next_bucket.candidates, | ||||||
|             &next_bucket.query, |             &next_bucket.query, | ||||||
| @@ -255,9 +240,7 @@ pub fn apply_ranking_rules<'transaction>( | |||||||
| mod tests { | mod tests { | ||||||
|     // use crate::allocator::ALLOC; |     // use crate::allocator::ALLOC; | ||||||
|     use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; |     use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; | ||||||
|     use crate::index::tests::TempIndex; |     use crate::new::{execute_search, SearchContext}; | ||||||
|     use crate::new::db_cache::DatabaseCache; |  | ||||||
|     use crate::new::execute_search; |  | ||||||
|     use big_s::S; |     use big_s::S; | ||||||
|     use heed::EnvOpenOptions; |     use heed::EnvOpenOptions; | ||||||
|     use maplit::hashset; |     use maplit::hashset; | ||||||
| @@ -269,55 +252,6 @@ mod tests { | |||||||
|     use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; |     use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; | ||||||
|     use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy}; |     use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy}; | ||||||
|  |  | ||||||
|     #[test] |  | ||||||
|     fn execute_new_search() { |  | ||||||
|         let index = TempIndex::new(); |  | ||||||
|         index |  | ||||||
|             .add_documents(documents!([ |  | ||||||
|                 { |  | ||||||
|                     "id": 7, |  | ||||||
|                     "text": "the super quick super brown fox jumps over", |  | ||||||
|                 }, |  | ||||||
|                 { |  | ||||||
|                     "id": 8, |  | ||||||
|                     "text": "the super quick brown fox jumps over", |  | ||||||
|                 }, |  | ||||||
|                 { |  | ||||||
|                     "id": 9, |  | ||||||
|                     "text": "the quick super brown fox jumps over", |  | ||||||
|                 }, |  | ||||||
|                 { |  | ||||||
|                     "id": 10, |  | ||||||
|                     "text": "the quick brown fox jumps over", |  | ||||||
|                 }, |  | ||||||
|                 { |  | ||||||
|                     "id": 11, |  | ||||||
|                     "text": "the quick brown fox jumps over the lazy dog", |  | ||||||
|                 }, |  | ||||||
|                 { |  | ||||||
|                     "id": 12, |  | ||||||
|                     "text": "the quick brown cat jumps over the lazy dog", |  | ||||||
|                 }, |  | ||||||
|             ])) |  | ||||||
|             .unwrap(); |  | ||||||
|         let txn = index.read_txn().unwrap(); |  | ||||||
|         let mut db_cache = DatabaseCache::default(); |  | ||||||
|  |  | ||||||
|         let results = execute_search( |  | ||||||
|             &index, |  | ||||||
|             &txn, |  | ||||||
|             &mut db_cache, |  | ||||||
|             "releases from poison by the government", |  | ||||||
|             None, |  | ||||||
|             0, |  | ||||||
|             50, |  | ||||||
|             &mut DefaultSearchLogger, |  | ||||||
|         ) |  | ||||||
|         .unwrap(); |  | ||||||
|  |  | ||||||
|         println!("{results:?}") |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn search_wiki_new() { |     fn search_wiki_new() { | ||||||
|         let mut options = EnvOpenOptions::new(); |         let mut options = EnvOpenOptions::new(); | ||||||
| @@ -331,24 +265,20 @@ mod tests { | |||||||
|         // loop { |         // loop { | ||||||
|         let start = Instant::now(); |         let start = Instant::now(); | ||||||
|  |  | ||||||
|         let mut db_cache = DatabaseCache::default(); |         // let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); | ||||||
|  |  | ||||||
|         let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); |  | ||||||
|  |  | ||||||
|         let results = execute_search( |         let results = execute_search( | ||||||
|             &index, |             &mut SearchContext::new(&index, &txn), | ||||||
|             &txn, |  | ||||||
|             &mut db_cache, |  | ||||||
|             "releases from poison by the government", |             "releases from poison by the government", | ||||||
|             None, |             None, | ||||||
|             0, |             0, | ||||||
|             20, |             20, | ||||||
|             // &mut DefaultSearchLogger, |             &mut DefaultSearchLogger, | ||||||
|             &mut logger, |             // &mut logger, | ||||||
|         ) |         ) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
|  |  | ||||||
|         logger.write_d2_description(); |         // logger.write_d2_description(); | ||||||
|  |  | ||||||
|         let elapsed = start.elapsed(); |         let elapsed = start.elapsed(); | ||||||
|  |  | ||||||
| @@ -425,19 +355,15 @@ mod tests { | |||||||
|         let index = Index::new(options, "data_movies").unwrap(); |         let index = Index::new(options, "data_movies").unwrap(); | ||||||
|         let txn = index.read_txn().unwrap(); |         let txn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|         let primary_key = index.primary_key(&txn).unwrap().unwrap(); |         // let primary_key = index.primary_key(&txn).unwrap().unwrap(); | ||||||
|         let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap(); |         // let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap(); | ||||||
|         // loop { |         // loop { | ||||||
|         let start = Instant::now(); |         let start = Instant::now(); | ||||||
|  |  | ||||||
|         let mut db_cache = DatabaseCache::default(); |  | ||||||
|  |  | ||||||
|         let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); |         let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); | ||||||
|  |         let mut ctx = SearchContext::new(&index, &txn); | ||||||
|         let results = execute_search( |         let results = execute_search( | ||||||
|             &index, |             &mut ctx, | ||||||
|             &txn, |  | ||||||
|             &mut db_cache, |  | ||||||
|             "releases from poison by the government", |             "releases from poison by the government", | ||||||
|             None, |             None, | ||||||
|             0, |             0, | ||||||
| @@ -447,24 +373,24 @@ mod tests { | |||||||
|         ) |         ) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
|  |  | ||||||
|         logger.write_d2_description(); |         logger.write_d2_description(&mut ctx); | ||||||
|  |  | ||||||
|         let elapsed = start.elapsed(); |         let elapsed = start.elapsed(); | ||||||
|  |  | ||||||
|         let ids = index |         // let ids = index | ||||||
|             .documents(&txn, results.iter().copied()) |         //     .documents(&txn, results.iter().copied()) | ||||||
|             .unwrap() |         //     .unwrap() | ||||||
|             .into_iter() |         //     .into_iter() | ||||||
|             .map(|x| { |         //     .map(|x| { | ||||||
|                 let obkv = &x.1; |         //         let obkv = &x.1; | ||||||
|                 let id = obkv.get(primary_key).unwrap(); |         //         let id = obkv.get(primary_key).unwrap(); | ||||||
|                 let id: serde_json::Value = serde_json::from_slice(id).unwrap(); |         //         let id: serde_json::Value = serde_json::from_slice(id).unwrap(); | ||||||
|                 id.as_str().unwrap().to_owned() |         //         id.as_str().unwrap().to_owned() | ||||||
|             }) |         //     }) | ||||||
|             .collect::<Vec<_>>(); |         //     .collect::<Vec<_>>(); | ||||||
|  |  | ||||||
|         println!("{}us: {results:?}", elapsed.as_micros()); |         println!("{}us: {results:?}", elapsed.as_micros()); | ||||||
|         println!("external ids: {ids:?}"); |         // println!("external ids: {ids:?}"); | ||||||
|         // } |         // } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,34 +1,28 @@ | |||||||
| use std::collections::VecDeque; | use super::interner::Interned; | ||||||
|  |  | ||||||
| use fxhash::FxHashMap; |  | ||||||
| use heed::{BytesDecode, RoTxn}; |  | ||||||
| use roaring::{MultiOps, RoaringBitmap}; |  | ||||||
|  |  | ||||||
| use super::db_cache::DatabaseCache; |  | ||||||
| use super::query_term::{Phrase, QueryTerm, WordDerivations}; | use super::query_term::{Phrase, QueryTerm, WordDerivations}; | ||||||
| use super::{QueryGraph, QueryNode}; | use super::{QueryGraph, QueryNode, SearchContext}; | ||||||
|  | use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; | ||||||
| use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec}; | use fxhash::FxHashMap; | ||||||
|  | use heed::BytesDecode; | ||||||
|  | use roaring::{MultiOps, RoaringBitmap}; | ||||||
|  | use std::collections::VecDeque; | ||||||
|  |  | ||||||
| // TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc. | // TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc. | ||||||
| #[derive(Default)] | #[derive(Default)] | ||||||
| pub struct NodeDocIdsCache { | pub struct NodeDocIdsCache { | ||||||
|     pub cache: FxHashMap<u32, RoaringBitmap>, |     pub cache: FxHashMap<u32, RoaringBitmap>, | ||||||
| } | } | ||||||
| impl NodeDocIdsCache { | impl<'search> SearchContext<'search> { | ||||||
|     fn get_docids<'cache, 'transaction>( |     fn get_node_docids<'cache>( | ||||||
|         &'cache mut self, |         &'cache mut self, | ||||||
|         index: &Index, |  | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         term: &QueryTerm, |         term: &QueryTerm, | ||||||
|         node_idx: u32, |         node_idx: u32, | ||||||
|     ) -> Result<&'cache RoaringBitmap> { |     ) -> Result<&'cache RoaringBitmap> { | ||||||
|         if self.cache.contains_key(&node_idx) { |         if self.node_docids_cache.cache.contains_key(&node_idx) { | ||||||
|             return Ok(&self.cache[&node_idx]); |             return Ok(&self.node_docids_cache.cache[&node_idx]); | ||||||
|         }; |         }; | ||||||
|         let docids = match term { |         let docids = match term { | ||||||
|             QueryTerm::Phrase { phrase } => resolve_phrase(index, txn, db_cache, phrase)?, |             QueryTerm::Phrase { phrase } => resolve_phrase(self, *phrase)?, | ||||||
|             QueryTerm::Word { |             QueryTerm::Word { | ||||||
|                 derivations: |                 derivations: | ||||||
|                     WordDerivations { |                     WordDerivations { | ||||||
| @@ -42,15 +36,14 @@ impl NodeDocIdsCache { | |||||||
|                     }, |                     }, | ||||||
|             } => { |             } => { | ||||||
|                 let mut or_docids = vec![]; |                 let mut or_docids = vec![]; | ||||||
|                 for word in zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter()) { |                 for word in zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter()).copied() | ||||||
|                     if let Some(word_docids) = db_cache.get_word_docids(index, txn, word)? { |                 { | ||||||
|  |                     if let Some(word_docids) = self.get_word_docids(word)? { | ||||||
|                         or_docids.push(word_docids); |                         or_docids.push(word_docids); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 if *use_prefix_db { |                 if *use_prefix_db { | ||||||
|                     if let Some(prefix_docids) = |                     if let Some(prefix_docids) = self.get_prefix_docids(*original)? { | ||||||
|                         db_cache.get_prefix_docids(index, txn, original.as_str())? |  | ||||||
|                     { |  | ||||||
|                         or_docids.push(prefix_docids); |                         or_docids.push(prefix_docids); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
| @@ -58,32 +51,25 @@ impl NodeDocIdsCache { | |||||||
|                     .into_iter() |                     .into_iter() | ||||||
|                     .map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap()) |                     .map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap()) | ||||||
|                     .collect::<Vec<_>>(); |                     .collect::<Vec<_>>(); | ||||||
|                 for synonym in synonyms { |                 for synonym in synonyms.iter().copied() { | ||||||
|                     // TODO: cache resolve_phrase? |                     // TODO: cache resolve_phrase? | ||||||
|                     docids.push(resolve_phrase(index, txn, db_cache, synonym)?); |                     docids.push(resolve_phrase(self, synonym)?); | ||||||
|                 } |                 } | ||||||
|                 if let Some((left, right)) = split_words { |                 if let Some(split_words) = split_words { | ||||||
|                     if let Some(split_word_docids) = |                     docids.push(resolve_phrase(self, *split_words)?); | ||||||
|                         db_cache.get_word_pair_proximity_docids(index, txn, left, right, 1)? |  | ||||||
|                     { |  | ||||||
|                         docids.push(CboRoaringBitmapCodec::deserialize_from(split_word_docids)?); |  | ||||||
|                     } |  | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 MultiOps::union(docids) |                 MultiOps::union(docids) | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|         let _ = self.cache.insert(node_idx, docids); |         let _ = self.node_docids_cache.cache.insert(node_idx, docids); | ||||||
|         let docids = &self.cache[&node_idx]; |         let docids = &self.node_docids_cache.cache[&node_idx]; | ||||||
|         Ok(docids) |         Ok(docids) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn resolve_query_graph<'transaction>( | pub fn resolve_query_graph<'search>( | ||||||
|     index: &Index, |     ctx: &mut SearchContext<'search>, | ||||||
|     txn: &'transaction RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     node_docids_cache: &mut NodeDocIdsCache, |  | ||||||
|     q: &QueryGraph, |     q: &QueryGraph, | ||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
| ) -> Result<RoaringBitmap> { | ) -> Result<RoaringBitmap> { | ||||||
| @@ -111,8 +97,7 @@ pub fn resolve_query_graph<'transaction>( | |||||||
|         let node_docids = match n { |         let node_docids = match n { | ||||||
|             QueryNode::Term(located_term) => { |             QueryNode::Term(located_term) => { | ||||||
|                 let term = &located_term.value; |                 let term = &located_term.value; | ||||||
|                 let derivations_docids = |                 let derivations_docids = ctx.get_node_docids(term, node)?; | ||||||
|                     node_docids_cache.get_docids(index, txn, db_cache, term, node)?; |  | ||||||
|                 predecessors_docids & derivations_docids |                 predecessors_docids & derivations_docids | ||||||
|             } |             } | ||||||
|             QueryNode::Deleted => { |             QueryNode::Deleted => { | ||||||
| @@ -143,13 +128,8 @@ pub fn resolve_query_graph<'transaction>( | |||||||
|     panic!() |     panic!() | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn resolve_phrase<'transaction>( | pub fn resolve_phrase(ctx: &mut SearchContext, phrase: Interned<Phrase>) -> Result<RoaringBitmap> { | ||||||
|     index: &Index, |     let Phrase { words } = ctx.phrase_interner.get(phrase).clone(); | ||||||
|     txn: &'transaction RoTxn, |  | ||||||
|     db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|     phrase: &Phrase, |  | ||||||
| ) -> Result<RoaringBitmap> { |  | ||||||
|     let Phrase { words } = phrase; |  | ||||||
|     let mut candidates = RoaringBitmap::new(); |     let mut candidates = RoaringBitmap::new(); | ||||||
|     let mut first_iter = true; |     let mut first_iter = true; | ||||||
|     let winsize = words.len().min(3); |     let winsize = words.len().min(3); | ||||||
| @@ -161,19 +141,19 @@ pub fn resolve_phrase<'transaction>( | |||||||
|     for win in words.windows(winsize) { |     for win in words.windows(winsize) { | ||||||
|         // Get all the documents with the matching distance for each word pairs. |         // Get all the documents with the matching distance for each word pairs. | ||||||
|         let mut bitmaps = Vec::with_capacity(winsize.pow(2)); |         let mut bitmaps = Vec::with_capacity(winsize.pow(2)); | ||||||
|         for (offset, s1) in win |         for (offset, &s1) in win | ||||||
|             .iter() |             .iter() | ||||||
|             .enumerate() |             .enumerate() | ||||||
|             .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) |             .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) | ||||||
|         { |         { | ||||||
|             for (dist, s2) in win |             for (dist, &s2) in win | ||||||
|                 .iter() |                 .iter() | ||||||
|                 .skip(offset + 1) |                 .skip(offset + 1) | ||||||
|                 .enumerate() |                 .enumerate() | ||||||
|                 .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) |                 .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) | ||||||
|             { |             { | ||||||
|                 if dist == 0 { |                 if dist == 0 { | ||||||
|                     match db_cache.get_word_pair_proximity_docids(index, txn, s1, s2, 1)? { |                     match ctx.get_word_pair_proximity_docids(s1, s2, 1)? { | ||||||
|                         Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), |                         Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), | ||||||
|                         // If there are no documents for this pair, there will be no |                         // If there are no documents for this pair, there will be no | ||||||
|                         // results for the phrase query. |                         // results for the phrase query. | ||||||
| @@ -182,13 +162,9 @@ pub fn resolve_phrase<'transaction>( | |||||||
|                 } else { |                 } else { | ||||||
|                     let mut bitmap = RoaringBitmap::new(); |                     let mut bitmap = RoaringBitmap::new(); | ||||||
|                     for dist in 0..=dist { |                     for dist in 0..=dist { | ||||||
|                         if let Some(m) = db_cache.get_word_pair_proximity_docids( |                         if let Some(m) = | ||||||
|                             index, |                             ctx.get_word_pair_proximity_docids(s1, s2, dist as u8 + 1)? | ||||||
|                             txn, |                         { | ||||||
|                             s1, |  | ||||||
|                             s2, |  | ||||||
|                             dist as u8 + 1, |  | ||||||
|                         )? { |  | ||||||
|                             bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; |                             bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|   | |||||||
| @@ -1,11 +1,7 @@ | |||||||
| use heed::RoTxn; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::db_cache::DatabaseCache; |  | ||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
| use super::{ | use super::{ | ||||||
|     RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper, |     RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper, | ||||||
|     RankingRuleQueryTrait, |     RankingRuleQueryTrait, SearchContext, | ||||||
| }; | }; | ||||||
| use crate::{ | use crate::{ | ||||||
|     // facet::FacetType, |     // facet::FacetType, | ||||||
| @@ -15,18 +11,19 @@ use crate::{ | |||||||
|     Index, |     Index, | ||||||
|     Result, |     Result, | ||||||
| }; | }; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| pub struct Sort<'transaction, Query> { | pub struct Sort<'search, Query> { | ||||||
|     field_name: String, |     field_name: String, | ||||||
|     field_id: Option<FieldId>, |     field_id: Option<FieldId>, | ||||||
|     is_ascending: bool, |     is_ascending: bool, | ||||||
|     original_query: Option<Query>, |     original_query: Option<Query>, | ||||||
|     iter: Option<RankingRuleOutputIterWrapper<'transaction, Query>>, |     iter: Option<RankingRuleOutputIterWrapper<'search, Query>>, | ||||||
| } | } | ||||||
| impl<'transaction, Query> Sort<'transaction, Query> { | impl<'search, Query> Sort<'search, Query> { | ||||||
|     pub fn new( |     pub fn _new( | ||||||
|         index: &Index, |         index: &Index, | ||||||
|         rtxn: &'transaction heed::RoTxn, |         rtxn: &'search heed::RoTxn, | ||||||
|         field_name: String, |         field_name: String, | ||||||
|         is_ascending: bool, |         is_ascending: bool, | ||||||
|     ) -> Result<Self> { |     ) -> Result<Self> { | ||||||
| @@ -37,18 +34,14 @@ impl<'transaction, Query> Sort<'transaction, Query> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query> | impl<'search, Query: RankingRuleQueryTrait> RankingRule<'search, Query> for Sort<'search, Query> { | ||||||
|     for Sort<'transaction, Query> |  | ||||||
| { |  | ||||||
|     fn id(&self) -> String { |     fn id(&self) -> String { | ||||||
|         let Self { field_name, is_ascending, .. } = self; |         let Self { field_name, is_ascending, .. } = self; | ||||||
|         format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " }) |         format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " }) | ||||||
|     } |     } | ||||||
|     fn start_iteration( |     fn start_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _logger: &mut dyn SearchLogger<Query>, |         _logger: &mut dyn SearchLogger<Query>, | ||||||
|         parent_candidates: &RoaringBitmap, |         parent_candidates: &RoaringBitmap, | ||||||
|         parent_query_graph: &Query, |         parent_query_graph: &Query, | ||||||
| @@ -59,8 +52,8 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query | |||||||
|                     if self.is_ascending { ascending_facet_sort } else { descending_facet_sort }; |                     if self.is_ascending { ascending_facet_sort } else { descending_facet_sort }; | ||||||
|  |  | ||||||
|                 let number_iter = make_iter( |                 let number_iter = make_iter( | ||||||
|                     txn, |                     ctx.txn, | ||||||
|                     index |                     ctx.index | ||||||
|                         .facet_id_f64_docids |                         .facet_id_f64_docids | ||||||
|                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), |                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|                     field_id, |                     field_id, | ||||||
| @@ -68,8 +61,8 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query | |||||||
|                 )?; |                 )?; | ||||||
|  |  | ||||||
|                 let string_iter = make_iter( |                 let string_iter = make_iter( | ||||||
|                     txn, |                     ctx.txn, | ||||||
|                     index |                     ctx.index | ||||||
|                         .facet_id_string_docids |                         .facet_id_string_docids | ||||||
|                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), |                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|                     field_id, |                     field_id, | ||||||
| @@ -91,9 +84,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query | |||||||
|  |  | ||||||
|     fn next_bucket( |     fn next_bucket( | ||||||
|         &mut self, |         &mut self, | ||||||
|         _index: &Index, |         _ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _logger: &mut dyn SearchLogger<Query>, |         _logger: &mut dyn SearchLogger<Query>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|     ) -> Result<Option<RankingRuleOutput<Query>>> { |     ) -> Result<Option<RankingRuleOutput<Query>>> { | ||||||
| @@ -110,9 +101,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query | |||||||
|  |  | ||||||
|     fn end_iteration( |     fn end_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         _index: &Index, |         _ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _logger: &mut dyn SearchLogger<Query>, |         _logger: &mut dyn SearchLogger<Query>, | ||||||
|     ) { |     ) { | ||||||
|         self.original_query = None; |         self.original_query = None; | ||||||
|   | |||||||
| @@ -1,13 +1,9 @@ | |||||||
| use std::collections::BTreeSet; |  | ||||||
|  |  | ||||||
| use heed::RoTxn; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::db_cache::DatabaseCache; |  | ||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
| use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache}; | use super::resolve_query_graph::resolve_query_graph; | ||||||
| use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput}; | use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext}; | ||||||
| use crate::{Index, Result, TermsMatchingStrategy}; | use crate::{Result, TermsMatchingStrategy}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  | use std::collections::BTreeSet; | ||||||
|  |  | ||||||
| pub struct Words { | pub struct Words { | ||||||
|     exhausted: bool, |     exhausted: bool, | ||||||
| @@ -15,7 +11,6 @@ pub struct Words { | |||||||
|     iterating: bool, |     iterating: bool, | ||||||
|     positions_to_remove: Vec<i8>, |     positions_to_remove: Vec<i8>, | ||||||
|     terms_matching_strategy: TermsMatchingStrategy, |     terms_matching_strategy: TermsMatchingStrategy, | ||||||
|     node_docids_cache: NodeDocIdsCache, |  | ||||||
| } | } | ||||||
| impl Words { | impl Words { | ||||||
|     pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self { |     pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self { | ||||||
| @@ -25,20 +20,17 @@ impl Words { | |||||||
|             iterating: false, |             iterating: false, | ||||||
|             positions_to_remove: vec![], |             positions_to_remove: vec![], | ||||||
|             terms_matching_strategy, |             terms_matching_strategy, | ||||||
|             node_docids_cache: <_>::default(), |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { | impl<'search> RankingRule<'search, QueryGraph> for Words { | ||||||
|     fn id(&self) -> String { |     fn id(&self) -> String { | ||||||
|         "words".to_owned() |         "words".to_owned() | ||||||
|     } |     } | ||||||
|     fn start_iteration( |     fn start_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         _index: &Index, |         _ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _logger: &mut dyn SearchLogger<QueryGraph>, |         _logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|         _parent_candidates: &RoaringBitmap, |         _parent_candidates: &RoaringBitmap, | ||||||
|         parent_query_graph: &QueryGraph, |         parent_query_graph: &QueryGraph, | ||||||
| @@ -71,9 +63,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { | |||||||
|  |  | ||||||
|     fn next_bucket( |     fn next_bucket( | ||||||
|         &mut self, |         &mut self, | ||||||
|         index: &Index, |         ctx: &mut SearchContext<'search>, | ||||||
|         txn: &'transaction RoTxn, |  | ||||||
|         db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         logger: &mut dyn SearchLogger<QueryGraph>, |         logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|         universe: &RoaringBitmap, |         universe: &RoaringBitmap, | ||||||
|     ) -> Result<Option<RankingRuleOutput<QueryGraph>>> { |     ) -> Result<Option<RankingRuleOutput<QueryGraph>>> { | ||||||
| @@ -87,14 +77,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { | |||||||
|  |  | ||||||
|         logger.log_words_state(query_graph); |         logger.log_words_state(query_graph); | ||||||
|  |  | ||||||
|         let this_bucket = resolve_query_graph( |         let this_bucket = resolve_query_graph(ctx, query_graph, universe)?; | ||||||
|             index, |  | ||||||
|             txn, |  | ||||||
|             db_cache, |  | ||||||
|             &mut self.node_docids_cache, |  | ||||||
|             query_graph, |  | ||||||
|             universe, |  | ||||||
|         )?; |  | ||||||
|  |  | ||||||
|         let child_query_graph = query_graph.clone(); |         let child_query_graph = query_graph.clone(); | ||||||
|         loop { |         loop { | ||||||
| @@ -115,9 +98,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { | |||||||
|  |  | ||||||
|     fn end_iteration( |     fn end_iteration( | ||||||
|         &mut self, |         &mut self, | ||||||
|         _index: &Index, |         _ctx: &mut SearchContext<'search>, | ||||||
|         _txn: &'transaction RoTxn, |  | ||||||
|         _db_cache: &mut DatabaseCache<'transaction>, |  | ||||||
|         _logger: &mut dyn SearchLogger<QueryGraph>, |         _logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|     ) { |     ) { | ||||||
|         self.iterating = false; |         self.iterating = false; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user