mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Fix the processed distance algorithm
This commit is contained in:
		| @@ -1,4 +1,5 @@ | ||||
| use std::ops::Deref; | ||||
| use std::fmt; | ||||
| use std::borrow::Cow; | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::HashSet; | ||||
| @@ -145,7 +146,6 @@ pub struct RawDocument<'a, 'tag> { | ||||
|     pub raw_matches: &'a mut [BareMatch<'tag>], | ||||
|     pub processed_matches: Vec<SimpleMatch>, | ||||
|     /// The list of minimum `distance` found | ||||
|     /// where the `query_index` is the index | ||||
|     pub processed_distances: Vec<Option<u8>>, | ||||
| } | ||||
|  | ||||
| @@ -157,6 +157,17 @@ pub struct BareMatch<'tag> { | ||||
|     pub postings_list: Idx32<'tag>, | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for BareMatch<'_> { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         f.debug_struct("BareMatch") | ||||
|             .field("document_id", &self.document_id) | ||||
|             .field("query_index", &self.query_index) | ||||
|             .field("distance", &self.distance) | ||||
|             .field("is_exact", &self.is_exact) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| // TODO remove that | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] | ||||
| pub struct SimpleMatch { | ||||
| @@ -238,14 +249,11 @@ fn fetch_matches<'txn, 'tag>( | ||||
|     for (query_index, automaton) in automatons.iter().enumerate() { | ||||
|         let before_dfa = Instant::now(); | ||||
|         let dfa = automaton.dfa(); | ||||
|         let QueryWordAutomaton { query, is_exact, is_prefix, .. } = automaton; | ||||
|         let QueryWordAutomaton { query, is_exact, is_prefix, phrase_query } = automaton; | ||||
|         dfa_time += before_dfa.elapsed(); | ||||
|  | ||||
|         let mut number_of_words = 0; | ||||
|  | ||||
|         let before_fst_search = Instant::now(); | ||||
|         let mut stream = words.search(&dfa).into_stream(); | ||||
|         debug!("fst search took {:.02?}", before_fst_search.elapsed()); | ||||
|  | ||||
|         // while let Some(input) = stream.next() { | ||||
|         loop { | ||||
| @@ -272,7 +280,7 @@ fn fetch_matches<'txn, 'tag>( | ||||
|  | ||||
|                     let posting_list_index = arena.add(postings_list_view.range(offset, group.len())); | ||||
|                     let document_id = group[0].document_id; | ||||
|                     let stuffed = BareMatch { | ||||
|                     let bare_match = BareMatch { | ||||
|                         document_id, | ||||
|                         query_index: query_index as u16, | ||||
|                         distance, | ||||
| @@ -280,7 +288,7 @@ fn fetch_matches<'txn, 'tag>( | ||||
|                         postings_list: posting_list_index, | ||||
|                     }; | ||||
|  | ||||
|                     total_postings_lists.push(stuffed); | ||||
|                     total_postings_lists.push(bare_match); | ||||
|                     offset += group.len(); | ||||
|                 } | ||||
|             } | ||||
| @@ -434,7 +442,7 @@ fn construct_automatons2( | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if n == 1 { | ||||
|             if false && n == 1 { | ||||
|                 if let Some((left, right)) = split_best_frequency(reader, &normalized, postings_lists_store)? { | ||||
|                     let mut left_automaton = QueryWordAutomaton::exact(left); | ||||
|                     left_automaton.phrase_query = Some((0, 2)); | ||||
|   | ||||
| @@ -46,14 +46,22 @@ pub trait Criterion { | ||||
| fn prepare_query_distances( | ||||
|     documents: &mut [RawDocument], | ||||
|     query_enhancer: &QueryEnhancer, | ||||
|     automatons: &[QueryWordAutomaton], | ||||
| ) { | ||||
|     for document in documents { | ||||
|         if !document.processed_distances.is_empty() { continue } | ||||
|  | ||||
|         let mut processed = Vec::new(); | ||||
|         for m in document.raw_matches.iter() { | ||||
|             // FIXME we really need to take splitted words into account | ||||
|             //       those must be seen at the same level as the non-splitteds | ||||
|             // if automatons[m.query_index as usize].phrase_query.is_some() { | ||||
|             //     continue | ||||
|             // } | ||||
|  | ||||
|             let range = query_enhancer.replacement(m.query_index as u32); | ||||
|             processed.resize(range.end as usize, None); | ||||
|             let new_len = cmp::max(range.end as usize, processed.len()); | ||||
|             processed.resize(new_len, None); | ||||
|  | ||||
|             for index in range { | ||||
|                 let index = index as usize; | ||||
| @@ -81,7 +89,7 @@ impl Criterion for Typo { | ||||
|         query_enhancer: &QueryEnhancer, | ||||
|         automatons: &[QueryWordAutomaton], | ||||
|     ) { | ||||
|         prepare_query_distances(documents, query_enhancer); | ||||
|         prepare_query_distances(documents, query_enhancer, automatons); | ||||
|     } | ||||
|  | ||||
|     fn evaluate( | ||||
| @@ -139,7 +147,7 @@ impl Criterion for Words { | ||||
|         query_enhancer: &QueryEnhancer, | ||||
|         automatons: &[QueryWordAutomaton], | ||||
|     ) { | ||||
|         prepare_query_distances(documents, query_enhancer); | ||||
|         prepare_query_distances(documents, query_enhancer, automatons); | ||||
|     } | ||||
|  | ||||
|     fn evaluate( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user