mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Add new tests and fix construction of query graph from paths
This commit is contained in:
		| @@ -176,6 +176,9 @@ impl<T> Interner<T> { | ||||
|     pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> { | ||||
|         self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x)) | ||||
|     } | ||||
|     pub fn freeze(self) -> FixedSizeInterner<T> { | ||||
|         FixedSizeInterner { stable_store: self.stable_store } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A store of values of type `T`, each linked to a value of type `From` | ||||
|   | ||||
| @@ -4,7 +4,7 @@ use super::query_term::{ | ||||
| }; | ||||
| use super::small_bitmap::SmallBitmap; | ||||
| use super::SearchContext; | ||||
| use crate::search::new::interner::DedupInterner; | ||||
| use crate::search::new::interner::Interner; | ||||
| use crate::Result; | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::BTreeMap; | ||||
| @@ -345,29 +345,13 @@ impl QueryGraph { | ||||
|     Build a query graph from a list of paths | ||||
|  | ||||
|     The paths are composed of source and dest terms. | ||||
|     If the source term is `None`, then the last dest term is used | ||||
|     as the predecessor of the dest term. If the source is Some(_), | ||||
|     then an edge is built between the last dest term and the source, | ||||
|     and between the source and new dest term. | ||||
|  | ||||
|     Note that the resulting graph will not correspond to a perfect | ||||
|     representation of the set of paths. | ||||
|     For example, consider the following paths: | ||||
|     ```txt | ||||
|     PATH 1 :  a -> b1 -> c1 -> d -> e1 | ||||
|     PATH 2 :  a -> b2 -> c2 -> d -> e2 | ||||
|     ``` | ||||
|     Then the resulting graph will be: | ||||
|     ```txt | ||||
|               ┌────┐  ┌────┐             ┌────┐ | ||||
|            ┌──│ b1 │──│ c1 │─┐        ┌──│ e1 │ | ||||
|     ┌────┐ │  └────┘  └────┘ │ ┌────┐ │  └────┘ | ||||
|     │ a  │─┤                 ├─│ d  │─┤ | ||||
|     └────┘ │  ┌────┐  ┌────┐ │ └────┘ │  ┌────┐ | ||||
|            └──│ b2 │──│ c2 │─┘        └──│ e2 │ | ||||
|               └────┘  └────┘             └────┘ | ||||
|     ``` | ||||
|     which is different from the fully correct representation: | ||||
|     ```txt | ||||
|               ┌────┐  ┌────┐   ┌────┐   ┌────┐ | ||||
|            ┌──│ b1 │──│ c1 │───│ d  │───│ e1 │ | ||||
| @@ -383,21 +367,51 @@ impl QueryGraph { | ||||
|     pub fn build_from_paths( | ||||
|         paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>, | ||||
|     ) -> Self { | ||||
|         let mut node_data = DedupInterner::default(); | ||||
|         let root_node = node_data.insert(QueryNodeData::Start); | ||||
|         let end_node = node_data.insert(QueryNodeData::End); | ||||
|         let mut node_data = Interner::default(); | ||||
|         let root_node = node_data.push(QueryNodeData::Start); | ||||
|         let end_node = node_data.push(QueryNodeData::End); | ||||
|  | ||||
|         let mut paths_with_single_terms = vec![]; | ||||
|  | ||||
|         for path in paths { | ||||
|             let mut processed_path = vec![]; | ||||
|             let mut prev_dest_term: Option<LocatedQueryTermSubset> = None; | ||||
|             for (start_term, dest_term) in path { | ||||
|                 if let Some(prev_dest_term) = prev_dest_term.take() { | ||||
|                     if let Some(mut start_term) = start_term { | ||||
|                         if start_term.term_ids == prev_dest_term.term_ids { | ||||
|                             start_term.term_subset.intersect(&prev_dest_term.term_subset); | ||||
|                             processed_path.push(start_term); | ||||
|                         } else { | ||||
|                             processed_path.push(prev_dest_term); | ||||
|                             processed_path.push(start_term); | ||||
|                         } | ||||
|                     } else { | ||||
|                         processed_path.push(prev_dest_term); | ||||
|                     } | ||||
|                 } else if let Some(start_term) = start_term { | ||||
|                     processed_path.push(start_term); | ||||
|                 } | ||||
|                 prev_dest_term = Some(dest_term); | ||||
|             } | ||||
|             if let Some(prev_dest_term) = prev_dest_term { | ||||
|                 processed_path.push(prev_dest_term); | ||||
|             } | ||||
|             paths_with_single_terms.push(processed_path); | ||||
|         } | ||||
|  | ||||
|         // TODO: make a prefix tree of the processed paths to avoid uselessly duplicating nodes | ||||
|  | ||||
|         let mut paths_with_ids = vec![]; | ||||
|         for path in paths { | ||||
|         for path in paths_with_single_terms { | ||||
|             let mut path_with_ids = vec![]; | ||||
|             for node in path { | ||||
|                 let (start_term, end_term) = node; | ||||
|                 let src_node_id = start_term.map(|x| node_data.insert(QueryNodeData::Term(x))); | ||||
|                 let dest_node_id = node_data.insert(QueryNodeData::Term(end_term)); | ||||
|                 path_with_ids.push((src_node_id, dest_node_id)); | ||||
|             for term in path { | ||||
|                 let id = node_data.push(QueryNodeData::Term(term)); | ||||
|                 path_with_ids.push(Interned::from_raw(id.into_raw())); | ||||
|             } | ||||
|             paths_with_ids.push(path_with_ids); | ||||
|         } | ||||
|  | ||||
|         let nodes_data = node_data.freeze(); | ||||
|         let nodes_data_len = nodes_data.len(); | ||||
|         let mut nodes = nodes_data.map_move(|n| QueryNode { | ||||
| @@ -406,31 +420,22 @@ impl QueryGraph { | ||||
|             successors: SmallBitmap::new(nodes_data_len), | ||||
|         }); | ||||
|  | ||||
|         let root_node = Interned::from_raw(root_node.into_raw()); | ||||
|         let end_node = Interned::from_raw(end_node.into_raw()); | ||||
|         let root_node = Interned::<QueryNode>::from_raw(root_node.into_raw()); | ||||
|         let end_node = Interned::<QueryNode>::from_raw(end_node.into_raw()); | ||||
|  | ||||
|         for path in paths_with_ids { | ||||
|             let mut prev_node = root_node; | ||||
|             for node in path { | ||||
|                 let (start_term, dest_term) = node; | ||||
|                 let end_term = Interned::from_raw(dest_term.into_raw()); | ||||
|                 let src = if let Some(start_term) = start_term { | ||||
|                     // TODO: this is incorrect! should take the intersection | ||||
|                     // between the prev node and the start term if they refer to the same | ||||
|                     // original query term! | ||||
|                     let start_term = Interned::from_raw(start_term.into_raw()); | ||||
|                     nodes.get_mut(prev_node).successors.insert(start_term); | ||||
|                     nodes.get_mut(start_term).predecessors.insert(prev_node); | ||||
|                     start_term | ||||
|                 } else { | ||||
|                     prev_node | ||||
|                 }; | ||||
|                 nodes.get_mut(src).successors.insert(end_term); | ||||
|                 nodes.get_mut(end_term).predecessors.insert(src); | ||||
|                 prev_node = end_term; | ||||
|             let mut prev_node_id = root_node; | ||||
|             for node_id in path { | ||||
|                 let prev_node = nodes.get_mut(prev_node_id); | ||||
|                 prev_node.successors.insert(node_id); | ||||
|                 let node = nodes.get_mut(node_id); | ||||
|                 node.predecessors.insert(prev_node_id); | ||||
|                 prev_node_id = node_id; | ||||
|             } | ||||
|             nodes.get_mut(prev_node).successors.insert(end_node); | ||||
|             nodes.get_mut(end_node).predecessors.insert(prev_node); | ||||
|             let prev_node = nodes.get_mut(prev_node_id); | ||||
|             prev_node.successors.insert(end_node); | ||||
|             let node = nodes.get_mut(end_node); | ||||
|             node.predecessors.insert(prev_node_id); | ||||
|         } | ||||
|  | ||||
|         QueryGraph { root_node, end_node, nodes } | ||||
|   | ||||
| @@ -3,6 +3,8 @@ This module tests the interactions between the proximity and typo ranking rules. | ||||
|  | ||||
| The proximity ranking rule should transform the query graph such that it | ||||
| only contains the word pairs that it used to compute its bucket. | ||||
|  | ||||
| TODO: This is not currently implemented. | ||||
| */ | ||||
|  | ||||
| use crate::{ | ||||
| @@ -61,8 +63,13 @@ fn test_trap_basic() { | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("summer holiday"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     // TODO: this is incorrect, 1 should come before 0 | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"summer. holiday. sommer holidty\"", | ||||
|         "\"summer. holiday. sommer holiday\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|   | ||||
| @@ -271,13 +271,13 @@ fn test_sort() { | ||||
|         "false", | ||||
|         "true", | ||||
|         "true", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist__", | ||||
|         "null", | ||||
|         "[null,null,\"\"]", | ||||
|         "\"\"", | ||||
|         "{\"sub\":0}", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
|  | ||||
| @@ -304,13 +304,13 @@ fn test_sort() { | ||||
|         "false", | ||||
|         "\"1\"", | ||||
|         "\"0\"", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist__", | ||||
|         "null", | ||||
|         "[null,null,\"\"]", | ||||
|         "\"\"", | ||||
|         "{\"sub\":0}", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist___", | ||||
|         "__does_not_exist__", | ||||
|         "__does_not_exist__", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|   | ||||
| @@ -59,8 +59,7 @@ fn create_index() -> TempIndex { | ||||
|                 "id": 3, | ||||
|                 "text": "beautiful sommer" | ||||
|             }, | ||||
|             // The next two documents lay out an even more complex trap, which the current implementation | ||||
|             // fails to handle properly. | ||||
|             // The next two documents lay out an even more complex trap. | ||||
|             // With the user query `delicious sweet dessert`, the typo ranking rule will return one bucket of: | ||||
|             // - id 4: delicitous + sweet + dessert | ||||
|             // - id 5: beautiful + sweet + desgert | ||||
| @@ -114,13 +113,12 @@ fn test_trap_complex2() { | ||||
|     s.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     s.query("delicious sweet dessert"); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5]"); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 4]"); | ||||
|     let texts = collect_field_values(&index, &txn, "text", &documents_ids); | ||||
|     // TODO: this is incorrect. 5 should appear before 4 | ||||
|     insta::assert_debug_snapshot!(texts, @r###" | ||||
|     [ | ||||
|         "\"delicitous. sweet. dessert. delicitous sweet desgert\"", | ||||
|         "\"delicious. sweet desgert. delicious sweet desgert\"", | ||||
|         "\"delicitous. sweet. dessert. delicitous sweet desgert\"", | ||||
|     ] | ||||
|     "###); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user