mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	Remove noise in codebase
This commit is contained in:
		| @@ -1,7 +1,8 @@ | ||||
| use std::collections::hash_map::Entry; | ||||
|  | ||||
| use fxhash::FxHashMap; | ||||
| use heed::{types::ByteSlice, RoTxn}; | ||||
| use heed::types::ByteSlice; | ||||
| use heed::RoTxn; | ||||
|  | ||||
| use crate::{Index, Result}; | ||||
|  | ||||
| @@ -62,10 +63,7 @@ impl<'transaction> DatabaseCache<'transaction> { | ||||
|         match self.word_pair_proximity_docids.entry(key.clone()) { | ||||
|             Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), | ||||
|             Entry::Vacant(entry) => { | ||||
|                 // Note that now, we really want to do a prefix iter over (w1, w2) to get all the possible proximities | ||||
|                 // but oh well | ||||
|                 // | ||||
|                 // Actually, we shouldn'transaction greedily access this DB at all | ||||
|                 // We shouldn't greedily access this DB at all | ||||
|                 // a DB (w1, w2) -> [proximities] would be much better | ||||
|                 // We could even have a DB that is (w1) -> set of words such that (w1, w2) are in proximity | ||||
|                 // And if we worked with words encoded as integers, the set of words could be a roaring bitmap | ||||
|   | ||||
| @@ -1,20 +1,15 @@ | ||||
| use heed::RoTxn; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::{ | ||||
|     new::ranking_rule_graph::cheapest_paths::{self, Path}, | ||||
|     Index, Result, | ||||
| }; | ||||
|  | ||||
| use super::{ | ||||
|     db_cache::DatabaseCache, | ||||
|     ranking_rule_graph::{ | ||||
|         cheapest_paths::KCheapestPathsState, edge_docids_cache::EdgeDocidsCache, | ||||
|         empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, RankingRuleGraph, | ||||
|         RankingRuleGraphTrait, | ||||
|     }, | ||||
|     QueryGraph, RankingRule, RankingRuleOutput, | ||||
| }; | ||||
| use super::db_cache::DatabaseCache; | ||||
| use super::ranking_rule_graph::cheapest_paths::KCheapestPathsState; | ||||
| use super::ranking_rule_graph::edge_docids_cache::EdgeDocidsCache; | ||||
| use super::ranking_rule_graph::empty_paths_cache::EmptyPathsCache; | ||||
| use super::ranking_rule_graph::paths_map::PathsMap; | ||||
| use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait}; | ||||
| use super::{QueryGraph, RankingRule, RankingRuleOutput}; | ||||
| use crate::new::ranking_rule_graph::cheapest_paths::{self, Path}; | ||||
| use crate::{Index, Result}; | ||||
|  | ||||
| pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> { | ||||
|     state: Option<GraphBasedRankingRuleState<G>>, | ||||
| @@ -43,16 +38,8 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | ||||
|         universe: &RoaringBitmap, | ||||
|         query_graph: &QueryGraph, | ||||
|     ) -> Result<()> { | ||||
|         // if let Some(state) = &mut self.state { | ||||
|         //     // TODO: update the previous state | ||||
|         //     // TODO: update the existing graph incrementally, based on a diff | ||||
|  | ||||
|         // } else { | ||||
|         // TODO: update old state instead of starting from scratch | ||||
|         let graph = RankingRuleGraph::build(index, txn, db_cache, query_graph.clone())?; | ||||
|         // println!("Initialized Proximity Ranking Rule."); | ||||
|         // println!("GRAPH:"); | ||||
|         // let graphviz = graph.graphviz(); | ||||
|         // println!("{graphviz}"); | ||||
|  | ||||
|         let cheapest_paths_state = KCheapestPathsState::new(&graph); | ||||
|         let state = GraphBasedRankingRuleState { | ||||
| @@ -62,13 +49,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | ||||
|             empty_paths_cache: <_>::default(), | ||||
|         }; | ||||
|  | ||||
|         // let desc = state.graph.graphviz_with_path( | ||||
|         //     &state.cheapest_paths_state.as_ref().unwrap().kth_cheapest_path.clone(), | ||||
|         // ); | ||||
|         // println!("Cheapest path: {desc}"); | ||||
|  | ||||
|         self.state = Some(state); | ||||
|         // } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -86,17 +67,9 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | ||||
|         let Some(cheapest_paths_state) = state.cheapest_paths_state.take() else { | ||||
|             return Ok(None); | ||||
|         }; | ||||
|         // println!("Proximity: Next Bucket"); | ||||
|  | ||||
|         let mut paths = PathsMap::default(); | ||||
|  | ||||
|         // let desc = state.graph.dot_description_with_path(&cheapest_paths_state.kth_cheapest_path); | ||||
|         // println!("CHeapest Path: {desc}"); | ||||
|         // TODO: when does it return None? -> when there is no cheapest path | ||||
|         // How to handle it? -> ... return all document ids from the universe? | ||||
|         // | ||||
|         // TODO: Give an empty_edge and empty_prefix argument to the | ||||
|         // compute_paths_of_next_lowest_cost function | ||||
|         if let Some(next_cheapest_paths_state) = cheapest_paths_state | ||||
|             .compute_paths_of_next_lowest_cost( | ||||
|                 &mut state.graph, | ||||
| @@ -107,31 +80,12 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | ||||
|             state.cheapest_paths_state = Some(next_cheapest_paths_state); | ||||
|         } else { | ||||
|             state.cheapest_paths_state = None; | ||||
|             // If returns None if there are no longer any paths to compute | ||||
|             // BUT! paths_map may not be empty, and we need to compute the current bucket still | ||||
|         } | ||||
|  | ||||
|         // println!("PATHS: {}", paths.graphviz(&state.graph)); | ||||
|  | ||||
|         // paths.iterate(|path, cost| { | ||||
|         //     let desc = state.graph.graphviz_with_path(&Path { edges: path.clone(), cost: *cost }); | ||||
|         //     println!("Path to resolve of cost {cost}: {desc}"); | ||||
|         // }); | ||||
|  | ||||
|         // let desc = state.graph.dot_description_with_path( | ||||
|         //     &state.cheapest_paths_state.as_ref().unwrap().kth_cheapest_path.clone(), | ||||
|         // ); | ||||
|         // println!("Cheapest path: {desc}"); | ||||
|  | ||||
|         // TODO: verify that this is correct | ||||
|         // If the paths are empty, we should probably return the universe? | ||||
|         // BUT! Is there a case where the paths are empty AND the universe is | ||||
|         // not empty? | ||||
|         if paths.is_empty() { | ||||
|             self.state = None; | ||||
|             return Ok(None); | ||||
|         } | ||||
|         // Here, log all the paths? | ||||
|  | ||||
|         let bucket = state.graph.resolve_paths( | ||||
|             index, | ||||
| @@ -142,10 +96,6 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | ||||
|             universe, | ||||
|             paths, | ||||
|         )?; | ||||
|         // The call above also updated the graph such that it doesn't contain the empty edges anymore. | ||||
|         // println!("Resolved all the paths: {bucket:?} from universe {:?}", state.universe); | ||||
|         // let graphviz = state.graph.graphviz(); | ||||
|         // println!("{graphviz}"); | ||||
|  | ||||
|         let next_query_graph = state.graph.query_graph.clone(); | ||||
|  | ||||
| @@ -160,7 +110,6 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap | ||||
|         _txn: &'transaction RoTxn, | ||||
|         _db_cache: &mut DatabaseCache<'transaction>, | ||||
|     ) { | ||||
|         // println!("PROXIMITY: end iteration"); | ||||
|         self.state = None; | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -14,10 +14,8 @@ pub use query_graph::*; | ||||
| pub use ranking_rules::*; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use self::{ | ||||
|     db_cache::DatabaseCache, | ||||
|     query_term::{word_derivations, LocatedQueryTerm}, | ||||
| }; | ||||
| use self::db_cache::DatabaseCache; | ||||
| use self::query_term::{word_derivations, LocatedQueryTerm}; | ||||
| use crate::{Index, Result}; | ||||
|  | ||||
| pub enum BitmapOrAllRef<'s> { | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::fmt; | ||||
| use std::fmt::Debug; | ||||
| use std::{collections::HashSet, fmt}; | ||||
|  | ||||
| use heed::RoTxn; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{ | ||||
|     db_cache::DatabaseCache, | ||||
|     query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}, | ||||
| }; | ||||
| use super::db_cache::DatabaseCache; | ||||
| use super::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; | ||||
| use crate::{Index, Result}; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| @@ -20,8 +19,7 @@ pub enum QueryNode { | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct Edges { | ||||
|     // TODO: use a tiny bitset instead | ||||
|     // something like a simple Vec<u8> where most queries will see a vector of one element | ||||
|     // TODO: use a tiny bitset instead, something like a simple Vec<u8> where most queries will see a vector of one element | ||||
|     pub predecessors: RoaringBitmap, | ||||
|     pub successors: RoaringBitmap, | ||||
| } | ||||
| @@ -75,7 +73,6 @@ impl QueryGraph { | ||||
|  | ||||
| impl QueryGraph { | ||||
|     // TODO: return the list of all matching words here as well | ||||
|  | ||||
|     pub fn from_query<'transaction>( | ||||
|         index: &Index, | ||||
|         txn: &RoTxn, | ||||
| @@ -94,9 +91,7 @@ impl QueryGraph { | ||||
|         let (mut prev2, mut prev1, mut prev0): (Vec<u32>, Vec<u32>, Vec<u32>) = | ||||
|             (vec![], vec![], vec![graph.root_node]); | ||||
|  | ||||
|         // TODO: add all the word derivations found in the fst | ||||
|         // and add split words / support phrases | ||||
|  | ||||
|         // TODO: split words / synonyms | ||||
|         for length in 1..=query.len() { | ||||
|             let query = &query[..length]; | ||||
|  | ||||
| @@ -279,18 +274,6 @@ impl Debug for QueryNode { | ||||
|     } | ||||
| } | ||||
|  | ||||
| /* | ||||
| TODO: | ||||
|  | ||||
| 1. Find the minimum number of words to check to resolve the 10 query trees at once. | ||||
|     (e.g. just 0 | 01 | 012 ) | ||||
| 2. Simplify the query tree after removal of a node ✅ | ||||
| 3. Create the proximity graph ✅ | ||||
| 4. Assign different proximities for the ngrams ✅ | ||||
| 5. Walk the proximity graph, finding all the potential paths of weight N from START to END ✅ | ||||
| (without checking the bitmaps) | ||||
|  | ||||
| */ | ||||
| impl QueryGraph { | ||||
|     pub fn graphviz(&self) -> String { | ||||
|         let mut desc = String::new(); | ||||
| @@ -317,91 +300,9 @@ node [shape = "record"] | ||||
|             for edge in self.edges[node].successors.iter() { | ||||
|                 desc.push_str(&format!("{node} -> {edge};\n")); | ||||
|             } | ||||
|             // for edge in self.edges[node].incoming.iter() { | ||||
|             //     desc.push_str(&format!("{node} -> {edge} [color = grey];\n")); | ||||
|             // } | ||||
|         } | ||||
|  | ||||
|         desc.push('}'); | ||||
|         desc | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use charabia::Tokenize; | ||||
|  | ||||
|     use super::{LocatedQueryTerm, QueryGraph, QueryNode}; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::new::db_cache::DatabaseCache; | ||||
|     use crate::search::new::query_term::word_derivations; | ||||
|  | ||||
|     #[test] | ||||
|     fn build_graph() { | ||||
|         let mut index = TempIndex::new(); | ||||
|         index.index_documents_config.autogenerate_docids = true; | ||||
|         index | ||||
|             .update_settings(|s| { | ||||
|                 s.set_searchable_fields(vec!["text".to_owned()]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|         index | ||||
|             .add_documents(documents!({ | ||||
|                 "text": "0 1 2 3 4 5 6 7 01 23 234 56 79 709 7356", | ||||
|             })) | ||||
|             .unwrap(); | ||||
|  | ||||
|         // let fst = fst::Set::from_iter(["01", "23", "234", "56"]).unwrap(); | ||||
|         let txn = index.read_txn().unwrap(); | ||||
|         let mut db_cache = DatabaseCache::default(); | ||||
|  | ||||
|         let fst = index.words_fst(&txn).unwrap(); | ||||
|         let query = LocatedQueryTerm::from_query( | ||||
|             "0 no 1 2 3 4 5 6 7".tokenize(), | ||||
|             None, | ||||
|             |word, is_prefix| { | ||||
|                 word_derivations( | ||||
|                     &index, | ||||
|                     &txn, | ||||
|                     word, | ||||
|                     if word.len() < 3 { | ||||
|                         0 | ||||
|                     } else if word.len() < 6 { | ||||
|                         1 | ||||
|                     } else { | ||||
|                         2 | ||||
|                     }, | ||||
|                     is_prefix, | ||||
|                     &fst, | ||||
|                 ) | ||||
|             }, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
|         let graph = QueryGraph::from_query(&index, &txn, &mut db_cache, query).unwrap(); | ||||
|         println!("{}", graph.graphviz()); | ||||
|  | ||||
|         // let positions_to_remove = vec![3, 6, 0, 4]; | ||||
|         // for p in positions_to_remove { | ||||
|         //     graph.remove_words_at_position(p); | ||||
|         //     println!("{}", graph.graphviz()); | ||||
|         // } | ||||
|  | ||||
|         // let proximities = |w1: &str, w2: &str| -> Vec<i8> { | ||||
|         //     if matches!((w1, w2), ("56", "7")) { | ||||
|         //         vec![] | ||||
|         //     } else { | ||||
|         //         vec![1, 2] | ||||
|         //     } | ||||
|         // }; | ||||
|  | ||||
|         // let prox_graph = ProximityGraph::from_query_graph(graph, proximities); | ||||
|  | ||||
|         // println!("{}", prox_graph.graphviz()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| // fn remove_element_from_vector(v: &mut Vec<usize>, el: usize) { | ||||
| //     let position = v.iter().position(|&x| x == el).unwrap(); | ||||
| //     v.swap_remove(position); | ||||
| // } | ||||
|   | ||||
| @@ -17,10 +17,6 @@ use crate::{Index, Result}; | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct WordDerivations { | ||||
|     // TODO: should have a list for the words corresponding to the prefix as well! | ||||
|     // This is to implement the `exactness` ranking rule. | ||||
|     // However, we could also consider every term in `zero_typo` (except first one) to | ||||
|     // be words of that the original word is a prefix of | ||||
|     pub original: String, | ||||
|     pub zero_typo: Vec<String>, | ||||
|     pub one_typo: Vec<String>, | ||||
|   | ||||
| @@ -46,8 +46,6 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         // ranking_rule_graph.simplify(); | ||||
|  | ||||
|         Ok(ranking_rule_graph) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,10 +3,9 @@ use std::collections::{BTreeMap, HashSet}; | ||||
| use itertools::Itertools; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{ | ||||
|     empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, Edge, RankingRuleGraph, | ||||
|     RankingRuleGraphTrait, | ||||
| }; | ||||
| use super::empty_paths_cache::EmptyPathsCache; | ||||
| use super::paths_map::PathsMap; | ||||
| use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||||
| pub struct Path { | ||||
|   | ||||
| @@ -18,18 +18,12 @@ use crate::{Index, Result}; | ||||
|  | ||||
| pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> { | ||||
|     pub cache: FxHashMap<u32, RoaringBitmap>, | ||||
|  | ||||
|     // TODO: There is a big difference between `cache`, which is always valid, and | ||||
|     // `empty_path_prefixes`, which is only accurate for a particular universe | ||||
|     // ALSO, we should have a universe-specific `empty_edge` to use | ||||
|     // pub empty_path_prefixes: HashSet<Vec<u32>>, | ||||
|     _phantom: PhantomData<G>, | ||||
| } | ||||
| impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> { | ||||
|     fn default() -> Self { | ||||
|         Self { | ||||
|             cache: Default::default(), | ||||
|             // empty_path_prefixes: Default::default(), | ||||
|             _phantom: Default::default(), | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -5,7 +5,6 @@ pub mod empty_paths_cache; | ||||
| pub mod paths_map; | ||||
| pub mod proximity; | ||||
| pub mod resolve_paths; | ||||
|  | ||||
| use std::collections::{BTreeSet, HashSet}; | ||||
| use std::ops::ControlFlow; | ||||
|  | ||||
| @@ -86,22 +85,10 @@ pub struct RankingRuleGraph<G: RankingRuleGraphTrait> { | ||||
|     pub node_edges: Vec<RoaringBitmap>, | ||||
|  | ||||
|     pub successors: Vec<RoaringBitmap>, | ||||
|     // to get the edges between two nodes: | ||||
|     // TODO: to get the edges between two nodes: | ||||
|     // 1. get node_outgoing_edges[from] | ||||
|     // 2. get node_incoming_edges[to] | ||||
|     // 3. take intersection betweem the two | ||||
|  | ||||
|     // TODO: node edges could be different I guess | ||||
|     // something like: | ||||
|     // pub node_edges: Vec<BitSet> | ||||
|     // where each index is the result of: | ||||
|     // the successor index in the top 16 bits, the edge index in the bottom 16 bits | ||||
|  | ||||
|     // TODO: | ||||
|     // node_successors? | ||||
|  | ||||
|     // pub removed_edges: HashSet<u32>, | ||||
|     // pub tmp_removed_edges: HashSet<u32>, | ||||
| } | ||||
| impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|     // Visit all edges between the two given nodes in order of increasing cost. | ||||
| @@ -142,50 +129,6 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|         } | ||||
|         self.successors[from_node as usize] = new_successors_from_node; | ||||
|     } | ||||
|     // pub fn remove_nodes(&mut self, nodes: &[usize]) { | ||||
|     //     for &node in nodes { | ||||
|     //         let edge_indices = &mut self.node_edges[node]; | ||||
|     //         for edge_index in edge_indices.iter() { | ||||
|     //             self.all_edges[*edge_index] = None; | ||||
|     //         } | ||||
|     //         edge_indices.clear(); | ||||
|  | ||||
|     //         let preds = &self.query_graph.edges[node].incoming; | ||||
|     //         for pred in preds { | ||||
|     //             let edge_indices = &mut self.node_edges[*pred]; | ||||
|     //             for edge_index in edge_indices.iter() { | ||||
|     //                 let edge_opt = &mut self.all_edges[*edge_index]; | ||||
|     //                 let Some(edge) = edge_opt else { continue; }; | ||||
|     //                 if edge.to_node == node { | ||||
|     //                     *edge_opt = None; | ||||
|     //                 } | ||||
|     //             } | ||||
|     //             panic!("remove nodes is incorrect at the moment"); | ||||
|     //             edge_indices.clear(); | ||||
|     //         } | ||||
|     //     } | ||||
|     //     self.query_graph.remove_nodes(nodes); | ||||
|     // } | ||||
|     // pub fn simplify(&mut self) { | ||||
|     //     loop { | ||||
|     //         let mut nodes_to_remove = vec![]; | ||||
|     //         for (node_idx, node) in self.query_graph.nodes.iter().enumerate() { | ||||
|     //             if !matches!(node, QueryNode::End | QueryNode::Deleted) | ||||
|     //                 && self.node_edges[node_idx].is_empty() | ||||
|     //             { | ||||
|     //                 nodes_to_remove.push(node_idx); | ||||
|     //             } | ||||
|     //         } | ||||
|     //         if nodes_to_remove.is_empty() { | ||||
|     //             break; | ||||
|     //         } else { | ||||
|     //             self.remove_nodes(&nodes_to_remove); | ||||
|     //         } | ||||
|     //     } | ||||
|     // } | ||||
|     // fn is_removed_edge(&self, edge: u32) -> bool { | ||||
|     //     self.removed_edges.contains(&edge) || self.tmp_removed_edges.contains(&edge) | ||||
|     // } | ||||
|  | ||||
|     pub fn graphviz(&self) -> String { | ||||
|         let mut desc = String::new(); | ||||
|   | ||||
| @@ -6,14 +6,13 @@ use std::hash::{Hash, Hasher}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::cheapest_paths::Path; | ||||
| use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait, Edge}; | ||||
| use super::{Edge, EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; | ||||
| use crate::new::QueryNode; | ||||
|  | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct PathsMap<V> { | ||||
|     nodes: Vec<(u32, PathsMap<V>)>, | ||||
|     value: Option<V> | ||||
|     value: Option<V>, | ||||
| } | ||||
| impl<V> Default for PathsMap<V> { | ||||
|     fn default() -> Self { | ||||
| @@ -73,7 +72,7 @@ impl<V> PathsMap<V> { | ||||
|     } | ||||
|     pub fn remove_first(&mut self) -> Option<(Vec<u32>, V)> { | ||||
|         if self.is_empty() { | ||||
|             return None | ||||
|             return None; | ||||
|         } | ||||
|  | ||||
|         let mut result = vec![]; | ||||
| @@ -173,12 +172,10 @@ impl<V> PathsMap<V> { | ||||
|  | ||||
|     pub fn contains_prefix_of_path(&self, path: &[u32]) -> bool { | ||||
|         if self.value.is_some() { | ||||
|             return true | ||||
|             return true; | ||||
|         } | ||||
|         match path { | ||||
|             [] => { | ||||
|                 false | ||||
|             } | ||||
|             [] => false, | ||||
|             [first_edge, remaining_path @ ..] => { | ||||
|                 for (edge, rest) in self.nodes.iter() { | ||||
|                     if edge == first_edge { | ||||
| @@ -197,7 +194,12 @@ impl<V> PathsMap<V> { | ||||
|         desc.push_str("\n}\n"); | ||||
|         desc | ||||
|     } | ||||
|     fn graphviz_rec<G: RankingRuleGraphTrait>(&self, desc: &mut String, path_from: Vec<u64>, graph: &RankingRuleGraph<G>) { | ||||
|     fn graphviz_rec<G: RankingRuleGraphTrait>( | ||||
|         &self, | ||||
|         desc: &mut String, | ||||
|         path_from: Vec<u64>, | ||||
|         graph: &RankingRuleGraph<G>, | ||||
|     ) { | ||||
|         let id_from = { | ||||
|             let mut h = DefaultHasher::new(); | ||||
|             path_from.hash(&mut h); | ||||
| @@ -227,7 +229,6 @@ impl<V> PathsMap<V> { | ||||
| } | ||||
|  | ||||
| impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|      | ||||
|     pub fn graphviz_with_path(&self, path: &Path) -> String { | ||||
|         let mut desc = String::new(); | ||||
|         desc.push_str("digraph G {\nrankdir = LR;\nnode [shape = \"record\"]\n"); | ||||
| @@ -248,11 +249,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|         for (edge_idx, edge) in self.all_edges.iter().enumerate() { | ||||
|             let Some(edge) = edge else { continue }; | ||||
|             let Edge { from_node, to_node, cost, details } = edge; | ||||
|             let color = if path.edges.contains(&(edge_idx as u32)) { | ||||
|                 "red" | ||||
|             } else { | ||||
|                 "green" | ||||
|             }; | ||||
|             let color = if path.edges.contains(&(edge_idx as u32)) { "red" } else { "green" }; | ||||
|             match &edge.details { | ||||
|                 EdgeDetails::Unconditional => { | ||||
|                     desc.push_str(&format!( | ||||
| @@ -273,157 +270,4 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|         desc.push('}'); | ||||
|         desc | ||||
|     } | ||||
|     | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::PathsMap; | ||||
|     use crate::db_snap; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::new::db_cache::DatabaseCache; | ||||
|     use crate::new::ranking_rule_graph::cheapest_paths::KCheapestPathsState; | ||||
|     use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache; | ||||
|     use crate::new::ranking_rule_graph::proximity::ProximityGraph; | ||||
|     use crate::new::ranking_rule_graph::RankingRuleGraph; | ||||
|     use crate::search::new::query_term::{word_derivations, LocatedQueryTerm}; | ||||
|     use crate::search::new::QueryGraph; | ||||
|     use charabia::Tokenize; | ||||
|  | ||||
|     #[test] | ||||
|     fn paths_tree() { | ||||
|         let mut index = TempIndex::new(); | ||||
|         index.index_documents_config.autogenerate_docids = true; | ||||
|         index | ||||
|             .update_settings(|s| { | ||||
|                 s.set_searchable_fields(vec!["text".to_owned()]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
|         index | ||||
|             .add_documents(documents!([ | ||||
|                 { | ||||
|                     "text": "0 1 2 3 4 5" | ||||
|                 }, | ||||
|                 { | ||||
|                     "text": "0 a 1 b 2 3 4 5" | ||||
|                 }, | ||||
|                 { | ||||
|                     "text": "0 a 1 b 3 a 4 b 5" | ||||
|                 }, | ||||
|                 { | ||||
|                     "text": "0 a a 1 b 2 3 4 5" | ||||
|                 }, | ||||
|                 { | ||||
|                     "text": "0 a a a a 1 b 3 45" | ||||
|                 }, | ||||
|             ])) | ||||
|             .unwrap(); | ||||
|  | ||||
|         db_snap!(index, word_pair_proximity_docids, @"679d1126b569b3e8b10dd937c3faedf9"); | ||||
|  | ||||
|         let txn = index.read_txn().unwrap(); | ||||
|         let mut db_cache = DatabaseCache::default(); | ||||
|         let fst = index.words_fst(&txn).unwrap(); | ||||
|         let query = | ||||
|             LocatedQueryTerm::from_query("0 1 2 3 4 5".tokenize(), None, |word, is_prefix| { | ||||
|                 word_derivations(&index, &txn, word, if word.len() < 3 { | ||||
|                     0 | ||||
|                 } else if word.len() < 6 { | ||||
|                     1 | ||||
|                 } else { | ||||
|                     2 | ||||
|                 },is_prefix, &fst) | ||||
|             }) | ||||
|             .unwrap(); | ||||
|         let graph = QueryGraph::from_query(&index, &txn, &mut db_cache, query).unwrap(); | ||||
|         let empty_paths_cache = EmptyPathsCache::default(); | ||||
|         let mut db_cache = DatabaseCache::default(); | ||||
|  | ||||
|         let mut prox_graph = | ||||
|             RankingRuleGraph::<ProximityGraph>::build(&index, &txn, &mut db_cache, graph).unwrap(); | ||||
|  | ||||
|         println!("{}", prox_graph.graphviz()); | ||||
|  | ||||
|         let mut state = KCheapestPathsState::new(&prox_graph).unwrap(); | ||||
|  | ||||
|         let mut path_tree = PathsMap::default(); | ||||
|         while state.next_cost() <= 6 { | ||||
|             let next_state = state.compute_paths_of_next_lowest_cost(&mut prox_graph, &empty_paths_cache, &mut path_tree); | ||||
|             if let Some(next_state) = next_state { | ||||
|                 state = next_state; | ||||
|             } else { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let desc = path_tree.graphviz(&prox_graph); | ||||
|         println!("{desc}"); | ||||
|  | ||||
|         // let path = vec![u32 { from: 0, to: 2, edge_idx: 0 }, u32 { from: 2, to: 3, edge_idx: 0 }, u32 { from: 3, to: 4, edge_idx: 0 }, u32 { from: 4, to: 5, edge_idx: 0 }, u32 { from: 5, to: 8, edge_idx: 0 }, u32 { from: 8, to: 1, edge_idx: 0 }, u32 { from: 1, to: 10, edge_idx: 0 }]; | ||||
|         // println!("{}", psath_tree.contains_prefix_of_path(&path)); | ||||
|          | ||||
|  | ||||
|         // let path =  vec![u32 { from: 0, to: 2, edge_idx: 0 }, u32 { from: 2, to: 3, edge_idx: 0 }, u32 { from: 3, to: 4, edge_idx: 0 }, u32 { from: 4, to: 5, edge_idx: 0 }, u32 { from: 5, to: 6, edge_idx: 0 }, u32 { from: 6, to: 7, edge_idx: 0 }, u32 { from: 7, to: 1, edge_idx: 0 }]; | ||||
|  | ||||
|  | ||||
|         // path_tree.iterate(|path, cost| { | ||||
|         //     println!("cost {cost} for path: {path:?}"); | ||||
|         // }); | ||||
|  | ||||
|         // path_tree.remove_forbidden_prefix(&[ | ||||
|         //     u32 { from: 0, to: 2, edge_idx: 0 }, | ||||
|         //     u32 { from: 2, to: 3, edge_idx: 2 }, | ||||
|         // ]); | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("{desc}"); | ||||
|  | ||||
|         // path_tree.remove_forbidden_edge(&u32 { from: 5, to: 6, cost: 1 }); | ||||
|  | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("AFTER REMOVING 5-6 [1]:\n{desc}"); | ||||
|  | ||||
|         // path_tree.remove_forbidden_edge(&u32 { from: 3, to: 4, cost: 1 }); | ||||
|  | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("AFTER REMOVING 3-4 [1]:\n{desc}"); | ||||
|  | ||||
|         // let p = path_tree.remove_first(); | ||||
|         // println!("PATH: {p:?}"); | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("AFTER REMOVING: {desc}"); | ||||
|  | ||||
|         // let p = path_tree.remove_first(); | ||||
|         // println!("PATH: {p:?}"); | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("AFTER REMOVING: {desc}"); | ||||
|  | ||||
|         // path_tree.remove_all_containing_edge(&u32 { from: 5, to: 6, cost: 2 }); | ||||
|  | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("{desc}"); | ||||
|  | ||||
|         // let first_edges = path_tree.remove_first().unwrap(); | ||||
|         // println!("{first_edges:?}"); | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("{desc}"); | ||||
|  | ||||
|         // let first_edges = path_tree.remove_first().unwrap(); | ||||
|         // println!("{first_edges:?}"); | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("{desc}"); | ||||
|  | ||||
|         // let first_edges = path_tree.remove_first().unwrap(); | ||||
|         // println!("{first_edges:?}"); | ||||
|         // let desc = path_tree.graphviz(); | ||||
|         // println!("{desc}"); | ||||
|  | ||||
|         // println!("{path_tree:?}"); | ||||
|     } | ||||
|  | ||||
|  | ||||
|     #[test] | ||||
|     fn test_contains_prefix_of_path() { | ||||
|  | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use heed::RoTxn; | ||||
| use itertools::Itertools; | ||||
|  | ||||
| use super::ProximityEdge; | ||||
| use crate::new::db_cache::DatabaseCache; | ||||
| use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; | ||||
| @@ -7,8 +10,6 @@ use crate::new::ranking_rule_graph::proximity::WordPair; | ||||
| use crate::new::ranking_rule_graph::{Edge, EdgeDetails}; | ||||
| use crate::new::QueryNode; | ||||
| use crate::{Index, Result}; | ||||
| use heed::RoTxn; | ||||
| use itertools::Itertools; | ||||
|  | ||||
| pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, i8)>> { | ||||
|     Ok(Some(match from_node { | ||||
|   | ||||
| @@ -1,17 +1,17 @@ | ||||
| use roaring::MultiOps; | ||||
| use heed::RoTxn; | ||||
| use roaring::{MultiOps, RoaringBitmap}; | ||||
|  | ||||
| use super::{ProximityEdge, WordPair}; | ||||
| use crate::new::db_cache::DatabaseCache; | ||||
| use crate::CboRoaringBitmapCodec; | ||||
| use crate::{CboRoaringBitmapCodec, Result}; | ||||
|  | ||||
| pub fn compute_docids<'transaction>( | ||||
|     index: &crate::Index, | ||||
|     txn: &'transaction heed::RoTxn, | ||||
|     txn: &'transaction RoTxn, | ||||
|     db_cache: &mut DatabaseCache<'transaction>, | ||||
|     edge: &ProximityEdge, | ||||
| ) -> crate::Result<roaring::RoaringBitmap> { | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let ProximityEdge { pairs, proximity } = edge; | ||||
|     // TODO: we should know already which pair of words to look for | ||||
|     let mut pair_docids = vec![]; | ||||
|     for pair in pairs.iter() { | ||||
|         let bytes = match pair { | ||||
| @@ -25,7 +25,6 @@ pub fn compute_docids<'transaction>( | ||||
|             bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default(); | ||||
|         pair_docids.push(bitmap); | ||||
|     } | ||||
|     pair_docids.sort_by_key(|rb| rb.len()); | ||||
|     let docids = MultiOps::union(pair_docids); | ||||
|     Ok(docids) | ||||
| } | ||||
|   | ||||
| @@ -1,12 +1,13 @@ | ||||
| pub mod build; | ||||
| pub mod compute_docids; | ||||
|  | ||||
| use heed::RoTxn; | ||||
|  | ||||
| use super::{Edge, EdgeDetails, RankingRuleGraphTrait}; | ||||
| use crate::new::db_cache::DatabaseCache; | ||||
| use crate::new::query_term::WordDerivations; | ||||
| use crate::new::QueryNode; | ||||
| use crate::{Index, Result}; | ||||
| use heed::RoTxn; | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub enum WordPair { | ||||
|   | ||||
| @@ -68,14 +68,6 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|             } | ||||
|             path_bitmaps.push(path_bitmap); | ||||
|         } | ||||
|         let docids = MultiOps::union(path_bitmaps); | ||||
|         Ok(docids) | ||||
|         // for each path, translate it to an intersection of cached roaring bitmaps | ||||
|         // then do a union for all paths | ||||
|  | ||||
|         // get the docids of the given paths in the proximity graph | ||||
|         // in the fastest possible way | ||||
|         // 1. roaring MultiOps (before we can do the Frozen+AST thing) | ||||
|         // 2. minimize number of operations | ||||
|         Ok(MultiOps::union(path_bitmaps)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -97,49 +97,10 @@ pub fn get_start_universe<'transaction>( | ||||
|     query_graph: &QueryGraph, | ||||
|     term_matching_strategy: TermsMatchingStrategy, | ||||
|     // filters: Filters, | ||||
|     // mut distinct: Option<D>, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     // NOTE: | ||||
|     // | ||||
|     // There is a performance problem when using `distinct` + exhaustive number of hits, | ||||
|     // especially for search that yield many results (many ~= almost all of the | ||||
|     // dataset). | ||||
|     // | ||||
|     // We'll solve it later. Maybe there are smart ways to go about it. | ||||
|     // | ||||
|     // For example, if there are millions of possible values for the distinct attribute, | ||||
|     // then we could just look at the documents which share any distinct attribute with | ||||
|     // another one, and remove the later docids them from the universe. | ||||
|     // => NO! because we don't know which one to remove, only after the sorting is done can we know it | ||||
|     // => this kind of computation can be done, but only in the evaluation of the number | ||||
|     // of hits for the documents that aren't returned by the search. | ||||
|     // | ||||
|     // `Distinct` otherwise should always be computed during | ||||
|  | ||||
|     // TODO: actually compute the universe from the query graph | ||||
|     let universe = index.documents_ids(txn).unwrap(); | ||||
|  | ||||
|     // resolve the whole query tree to retrieve an exhaustive list of documents matching the query. | ||||
|     // NOTE: this is wrong | ||||
|     // Instead, we should only compute the documents corresponding to the last remaining | ||||
|     // word, 2-gram, and 3-gran. | ||||
|     // let candidates = resolve_query_graph(index, txn, db_cache, query_graph, &universe)?; | ||||
|  | ||||
|     // Distinct should be lazy if placeholder? | ||||
|     // | ||||
|     // // because the initial_candidates should be an exhaustive count of the matching documents, | ||||
|     // // we precompute the distinct attributes. | ||||
|     // let initial_candidates = match &mut distinct { | ||||
|     //     Some(distinct) => { | ||||
|     //         let mut initial_candidates = RoaringBitmap::new(); | ||||
|     //         for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) { | ||||
|     //             initial_candidates.insert(c?); | ||||
|     //         } | ||||
|     //         initial_candidates | ||||
|     //     } | ||||
|     //     None => candidates.clone(), | ||||
|     // }; | ||||
|  | ||||
|     Ok(/*candidates*/ universe) | ||||
|     Ok(universe) | ||||
| } | ||||
|  | ||||
| pub fn execute_search<'transaction>( | ||||
| @@ -306,43 +267,6 @@ mod tests { | ||||
|         let primary_key = index.primary_key(&txn).unwrap().unwrap(); | ||||
|         let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap(); | ||||
|  | ||||
|         loop { | ||||
|             let start = Instant::now(); | ||||
|  | ||||
|             let mut db_cache = DatabaseCache::default(); | ||||
|  | ||||
|             let query_graph = make_query_graph( | ||||
|                 &index, | ||||
|                 &txn, | ||||
|                 &mut db_cache, | ||||
|                 "released from prison by the government", | ||||
|             ) | ||||
|             .unwrap(); | ||||
|             // println!("{}", query_graph.graphviz()); | ||||
|  | ||||
|             // TODO: filters + maybe distinct attributes? | ||||
|             let universe = get_start_universe( | ||||
|                 &index, | ||||
|                 &txn, | ||||
|                 &mut db_cache, | ||||
|                 &query_graph, | ||||
|                 TermsMatchingStrategy::Last, | ||||
|             ) | ||||
|             .unwrap(); | ||||
|             // println!("universe: {universe:?}"); | ||||
|  | ||||
|             let results = execute_search( | ||||
|                 &index, | ||||
|                 &txn, | ||||
|                 &mut db_cache, | ||||
|                 &universe, | ||||
|                 &query_graph, /*  0, 20 */ | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
|             let elapsed = start.elapsed(); | ||||
|             println!("{}us: {results:?}", elapsed.as_micros()); | ||||
|         } | ||||
|         let start = Instant::now(); | ||||
|  | ||||
|         let mut db_cache = DatabaseCache::default(); | ||||
| @@ -350,7 +274,6 @@ mod tests { | ||||
|         let query_graph = | ||||
|             make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government") | ||||
|                 .unwrap(); | ||||
|         // println!("{}", query_graph.graphviz()); | ||||
|  | ||||
|         // TODO: filters + maybe distinct attributes? | ||||
|         let universe = get_start_universe( | ||||
| @@ -361,7 +284,6 @@ mod tests { | ||||
|             TermsMatchingStrategy::Last, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         // println!("universe: {universe:?}"); | ||||
|  | ||||
|         let results = | ||||
|             execute_search(&index, &txn, &mut db_cache, &universe, &query_graph /*  0, 20 */) | ||||
| @@ -396,7 +318,7 @@ mod tests { | ||||
|         let start = Instant::now(); | ||||
|  | ||||
|         let mut s = Search::new(&txn, &index); | ||||
|         s.query("released from prison by the government"); | ||||
|         s.query("b b b b b b b b b b"); | ||||
|         s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|         s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); | ||||
|         let docs = s.execute().unwrap(); | ||||
| @@ -414,30 +336,14 @@ mod tests { | ||||
|         let index = Index::new(options, "data_movies").unwrap(); | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|         // let primary_key = "id"; | ||||
|         // let searchable_fields = vec!["title", "overview"]; | ||||
|         // let filterable_fields = vec!["release_date", "genres"]; | ||||
|         // let sortable_fields = vec[]; | ||||
|  | ||||
|         let config = IndexerConfig::default(); | ||||
|         let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|         builder.set_min_word_len_one_typo(5); | ||||
|         builder.set_min_word_len_two_typos(100); | ||||
|  | ||||
|         // builder.set_primary_key(primary_key.to_owned()); | ||||
|  | ||||
|         // let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect(); | ||||
|         // builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|         // let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect(); | ||||
|         // builder.set_filterable_fields(filterable_fields); | ||||
|  | ||||
|         builder.set_criteria(vec![Criterion::Words, Criterion::Proximity]); | ||||
|  | ||||
|         // let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); | ||||
|         // builder.set_sortable_fields(sortable_fields); | ||||
|  | ||||
|         builder.execute(|_| (), || false).unwrap(); | ||||
|     } | ||||
|  | ||||
| @@ -452,7 +358,6 @@ mod tests { | ||||
|         let primary_key = "id"; | ||||
|         let searchable_fields = vec!["title", "overview"]; | ||||
|         let filterable_fields = vec!["release_date", "genres"]; | ||||
|         // let sortable_fields = vec[]; | ||||
|  | ||||
|         let config = IndexerConfig::default(); | ||||
|         let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| use std::collections::{HashMap, HashSet, VecDeque}; | ||||
|  | ||||
| use fxhash::FxHashMap; | ||||
| use heed::{BytesDecode, RoTxn}; | ||||
| use roaring::{MultiOps, RoaringBitmap}; | ||||
| use std::collections::{HashMap, HashSet, VecDeque}; | ||||
|  | ||||
| use super::db_cache::DatabaseCache; | ||||
| use super::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; | ||||
| @@ -9,8 +10,6 @@ use super::QueryGraph; | ||||
| use crate::{Index, Result, RoaringBitmapCodec}; | ||||
|  | ||||
| // TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc. | ||||
|  | ||||
| // TODO: reuse NodeDocidsCache in between calls to resolve_query_graph | ||||
| #[derive(Default)] | ||||
| pub struct NodeDocIdsCache { | ||||
|     pub cache: FxHashMap<u32, RoaringBitmap>, | ||||
| @@ -55,11 +54,6 @@ impl NodeDocIdsCache { | ||||
|                     .into_iter() | ||||
|                     .map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap()); | ||||
|                 MultiOps::union(derivations_iter) | ||||
|                 // TODO: if `or` is empty, register that somewhere, and immediately return an empty bitmap | ||||
|                 // On the other hand, `or` *cannot* be empty, only its intersection with the universe can | ||||
|                 // | ||||
|                 // TODO: Or we don't do anything and accumulate all these operations in a tree of operations | ||||
|                 // between frozen roaring bitmap that is resolved only at the very end | ||||
|             } | ||||
|         }; | ||||
|         let _ = self.cache.insert(node_idx, docids); | ||||
| @@ -79,10 +73,7 @@ pub fn resolve_query_graph<'transaction>( | ||||
|     // TODO: there is definitely a faster way to compute this big | ||||
|     // roaring bitmap expression | ||||
|  | ||||
|     // resolve_query_graph_rec(index, txn, q, q.root_node, &mut docids, &mut cache)?; | ||||
|  | ||||
|     let mut nodes_resolved = RoaringBitmap::new(); | ||||
|     // TODO: should be given as an argument and kept between invocations of resolve query graph | ||||
|     let mut path_nodes_docids = vec![RoaringBitmap::new(); q.nodes.len()]; | ||||
|  | ||||
|     let mut next_nodes_to_visit = VecDeque::new(); | ||||
| @@ -123,100 +114,14 @@ pub fn resolve_query_graph<'transaction>( | ||||
|                 next_nodes_to_visit.push_back(succ); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // This is currently slow but could easily be implemented very efficiently | ||||
|         for prec in q.edges[node as usize].predecessors.iter() { | ||||
|             if q.edges[prec as usize].successors.is_subset(&nodes_resolved) { | ||||
|                 path_nodes_docids[prec as usize].clear(); | ||||
|             } | ||||
|         } | ||||
|         // println!("cached docids: {nodes_docids:?}"); | ||||
|     } | ||||
|  | ||||
|     panic!() | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use charabia::Tokenize; | ||||
|  | ||||
|     use super::resolve_query_graph; | ||||
|     use crate::db_snap; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::new::db_cache::DatabaseCache; | ||||
|     use crate::new::resolve_query_graph::NodeDocIdsCache; | ||||
|     use crate::search::new::query_term::{word_derivations, LocatedQueryTerm}; | ||||
|     use crate::search::new::QueryGraph; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_resolve_query_graph() { | ||||
|         let index = TempIndex::new(); | ||||
|  | ||||
|         index | ||||
|             .update_settings(|s| { | ||||
|                 s.set_searchable_fields(vec!["text".to_owned()]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
|         index | ||||
|             .add_documents(documents!([ | ||||
|                 {"id": 0, "text": "0"}, | ||||
|                 {"id": 1, "text": "1"}, | ||||
|                 {"id": 2, "text": "2"}, | ||||
|                 {"id": 3, "text": "3"}, | ||||
|                 {"id": 4, "text": "4"}, | ||||
|                 {"id": 5, "text": "5"}, | ||||
|                 {"id": 6, "text": "6"}, | ||||
|                 {"id": 7, "text": "7"}, | ||||
|                 {"id": 8, "text": "0 1 2 3 4 5 6 7"}, | ||||
|                 {"id": 9, "text": "7 6 5 4 3 2 1 0"}, | ||||
|                 {"id": 10, "text": "01 234 56 7"}, | ||||
|                 {"id": 11, "text": "7 56 0 1 23 5 4"}, | ||||
|                 {"id": 12, "text": "0 1 2 3 4 5 6"}, | ||||
|                 {"id": 13, "text": "01 23 4 5 7"}, | ||||
|             ])) | ||||
|             .unwrap(); | ||||
|         db_snap!(index, word_docids, @"7512d0b80659f6bf37d98b374ada8098"); | ||||
|  | ||||
|         let txn = index.read_txn().unwrap(); | ||||
|         let mut db_cache = DatabaseCache::default(); | ||||
|         let fst = index.words_fst(&txn).unwrap(); | ||||
|         let query = LocatedQueryTerm::from_query( | ||||
|             "no 0 1 2 3 no 4 5 6 7".tokenize(), | ||||
|             None, | ||||
|             |word, is_prefix| { | ||||
|                 word_derivations( | ||||
|                     &index, | ||||
|                     &txn, | ||||
|                     word, | ||||
|                     if word.len() < 3 { | ||||
|                         0 | ||||
|                     } else if word.len() < 6 { | ||||
|                         1 | ||||
|                     } else { | ||||
|                         2 | ||||
|                     }, | ||||
|                     is_prefix, | ||||
|                     &fst, | ||||
|                 ) | ||||
|             }, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         let graph = QueryGraph::from_query(&index, &txn, &mut db_cache, query).unwrap(); | ||||
|         println!("{}", graph.graphviz()); | ||||
|         let mut node_docids_cache = NodeDocIdsCache::default(); | ||||
|         let universe = index.documents_ids(&txn).unwrap(); | ||||
|         insta::assert_debug_snapshot!(universe, @"RoaringBitmap<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]>"); | ||||
|         let docids = resolve_query_graph( | ||||
|             &index, | ||||
|             &txn, | ||||
|             &mut db_cache, | ||||
|             &mut node_docids_cache, | ||||
|             &graph, | ||||
|             &universe, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         insta::assert_debug_snapshot!(docids, @"RoaringBitmap<[8, 9, 11]>"); | ||||
|  | ||||
|         // TODO: test with a reduced universe | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| use heed::RoTxn; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::db_cache::DatabaseCache; | ||||
| use super::{ | ||||
|     db_cache::DatabaseCache, RankingRule, RankingRuleOutput, RankingRuleOutputIter, | ||||
|     RankingRuleOutputIterWrapper, RankingRuleQueryTrait, | ||||
|     RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper, | ||||
|     RankingRuleQueryTrait, | ||||
| }; | ||||
| use crate::{ | ||||
|     // facet::FacetType, | ||||
| @@ -33,18 +34,6 @@ impl<'transaction, Query> Sort<'transaction, Query> { | ||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||
|         let field_id = fields_ids_map.id(&field_name); | ||||
|  | ||||
|         // TODO: What is this, why? | ||||
|         // let faceted_candidates = match field_id { | ||||
|         //     Some(field_id) => { | ||||
|         //         let number_faceted = | ||||
|         //             index.faceted_documents_ids(rtxn, field_id, FacetType::Number)?; | ||||
|         //         let string_faceted = | ||||
|         //             index.faceted_documents_ids(rtxn, field_id, FacetType::String)?; | ||||
|         //         number_faceted | string_faceted | ||||
|         //     } | ||||
|         //     None => RoaringBitmap::default(), | ||||
|         // }; | ||||
|  | ||||
|         Ok(Self { field_id, is_ascending, iter: None }) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -79,8 +79,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { | ||||
|             return Ok(None); | ||||
|         } | ||||
|         let Some(query_graph) = &mut self.query_graph else { panic!() }; | ||||
|         // let graphviz = query_graph.graphviz(); | ||||
|         // println!("\n===={graphviz}\n===="); | ||||
|  | ||||
|         let this_bucket = resolve_query_graph( | ||||
|             index, | ||||
|             txn, | ||||
| @@ -89,10 +88,8 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { | ||||
|             query_graph, | ||||
|             universe, | ||||
|         )?; | ||||
|         // println!("WORDS: this bucket: {this_bucket:?}"); | ||||
|  | ||||
|         let child_query_graph = query_graph.clone(); | ||||
|         // this_bucket is the one that must be returned now | ||||
|         // self.cur_bucket is set to the next bucket | ||||
|         // TODO: Check whether a position exists in the graph before removing it and | ||||
|         // returning the next bucket. | ||||
|         // while graph.does_not_contain(positions_to_remove.last()) { positions_to_remove.pop() } | ||||
| @@ -118,41 +115,3 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { | ||||
|         self.positions_to_remove = vec![]; | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     // use charabia::Tokenize; | ||||
|     // use roaring::RoaringBitmap; | ||||
|  | ||||
|     // use crate::{ | ||||
|     //     index::tests::TempIndex, | ||||
|     //     search::{criteria::CriteriaBuilder, new::QueryGraphOrPlaceholder}, | ||||
|     // }; | ||||
|  | ||||
|     // use super::Words; | ||||
|  | ||||
|     // fn placeholder() { | ||||
|     //     let qt = QueryGraphOrPlaceholder::Placeholder; | ||||
|     //     let index = TempIndex::new(); | ||||
|     //     let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|     //     let query = "a beautiful summer house by the beach overlooking what seems"; | ||||
|     //     // let mut builder = QueryTreeBuilder::new(&rtxn, &index).unwrap(); | ||||
|     //     // let (qt, parts, matching_words) = builder.build(query.tokenize()).unwrap().unwrap(); | ||||
|  | ||||
|     //     // let cb = CriteriaBuilder::new(&rtxn, &index).unwrap(); | ||||
|     //     // let x = cb | ||||
|     //     //     .build( | ||||
|     //     //         Some(qt), | ||||
|     //     //         Some(parts), | ||||
|     //     //         None, | ||||
|     //     //         None, | ||||
|     //     //         false, | ||||
|     //     //         None, | ||||
|     //     //         crate::CriterionImplementationStrategy::OnlySetBased, | ||||
|     //     //     ) | ||||
|     //     //     .unwrap(); | ||||
|  | ||||
|     //     // let rr = Words::new(&index, &RoaringBitmap::from_sorted_iter(0..1000)).unwrap(); | ||||
|     // } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user