mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Remove the raw_query functions
This commit is contained in:
		| @@ -39,7 +39,6 @@ pub fn bucket_sort<'c>( | ||||
|     synonyms_store: store::Synonyms, | ||||
| ) -> MResult<Vec<Document>> | ||||
| { | ||||
|     // let automatons = construct_automatons(query); | ||||
|     let (automatons, query_enhancer) = | ||||
|         construct_automatons2(reader, query, main_store, postings_lists_store, synonyms_store)?; | ||||
|  | ||||
| @@ -286,14 +285,11 @@ impl<'txn> PostingsListView<'txn> { | ||||
|     } | ||||
|  | ||||
|     pub fn rewrite_with(&mut self, postings_list: SetBuf<DocIndex>) { | ||||
|         *self = match self { | ||||
|             PostingsListView::Original { input, .. } => { | ||||
|                 PostingsListView::Rewritten { input: input.clone(), postings_list } | ||||
|             }, | ||||
|             PostingsListView::Rewritten { input, .. } => { | ||||
|                 PostingsListView::Rewritten { input: input.clone(), postings_list } | ||||
|             }, | ||||
|         let input = match self { | ||||
|             PostingsListView::Original { input, .. } => input.clone(), | ||||
|             PostingsListView::Rewritten { input, .. } => input.clone(), | ||||
|         }; | ||||
|         *self = PostingsListView::rewritten(input, postings_list); | ||||
|     } | ||||
|  | ||||
|     pub fn len(&self) -> usize { | ||||
| @@ -565,7 +561,8 @@ fn construct_automatons2( | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if true && n == 1 { | ||||
|             if n == 1 { | ||||
|                 // automatons for splitted words | ||||
|                 if let Some((left, right)) = split_best_frequency(reader, &normalized, postings_lists_store)? { | ||||
|                     let mut left_automaton = QueryWordAutomaton::exact(left); | ||||
|                     left_automaton.phrase_query = Some((0, 2)); | ||||
|   | ||||
| @@ -399,346 +399,6 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn raw_query<'c, FI>( | ||||
|     reader: &heed::RoTxn<MainT>, | ||||
|  | ||||
|     query: &str, | ||||
|     range: Range<usize>, | ||||
|  | ||||
|     filter: Option<FI>, | ||||
|     timeout: Option<Duration>, | ||||
|  | ||||
|     criteria: Criteria<'c>, | ||||
|     searchable_attrs: Option<ReorderedAttrs>, | ||||
|  | ||||
|     main_store: store::Main, | ||||
|     postings_lists_store: store::PostingsLists, | ||||
|     documents_fields_counts_store: store::DocumentsFieldsCounts, | ||||
|     synonyms_store: store::Synonyms, | ||||
| ) -> MResult<Vec<Document>> | ||||
| where | ||||
|     FI: Fn(DocumentId) -> bool, | ||||
| { | ||||
|     // We delegate the filter work to the distinct query builder, | ||||
|     // specifying a distinct rule that has no effect. | ||||
|     if filter.is_some() { | ||||
|         let distinct = |_| None; | ||||
|         let distinct_size = 1; | ||||
|         return raw_query_with_distinct( | ||||
|             reader, | ||||
|             query, | ||||
|             range, | ||||
|             filter, | ||||
|             distinct, | ||||
|             distinct_size, | ||||
|             timeout, | ||||
|             criteria, | ||||
|             searchable_attrs, | ||||
|             main_store, | ||||
|             postings_lists_store, | ||||
|             documents_fields_counts_store, | ||||
|             synonyms_store, | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     let start_processing = Instant::now(); | ||||
|     let mut raw_documents_processed = Vec::with_capacity(range.len()); | ||||
|  | ||||
|     let (automaton_producer, query_enhancer) = AutomatonProducer::new( | ||||
|         reader, | ||||
|         query, | ||||
|         main_store, | ||||
|         postings_lists_store, | ||||
|         synonyms_store, | ||||
|     )?; | ||||
|  | ||||
|     let automaton_producer = automaton_producer.into_iter(); | ||||
|     let mut automatons = Vec::new(); | ||||
|  | ||||
|     // aggregate automatons groups by groups after time | ||||
|     for auts in automaton_producer { | ||||
|         automatons.push(auts); | ||||
|  | ||||
|         for (i, group) in automatons.iter().enumerate() { | ||||
|             debug!("group {} automatons {:?}", i, group.automatons); | ||||
|         } | ||||
|  | ||||
|         let before_fetch_raw_documents = Instant::now(); | ||||
|         // we must retrieve the documents associated | ||||
|         // with the current automatons | ||||
|         let mut raw_documents = fetch_raw_documents( | ||||
|             reader, | ||||
|             &automatons, | ||||
|             &query_enhancer, | ||||
|             searchable_attrs.as_ref(), | ||||
|             main_store, | ||||
|             postings_lists_store, | ||||
|         )?; | ||||
|         debug!("fetch_raw_documents took {:.02?}", before_fetch_raw_documents.elapsed()); | ||||
|  | ||||
|         // stop processing when time is running out | ||||
|         if let Some(timeout) = timeout { | ||||
|             if !raw_documents_processed.is_empty() && start_processing.elapsed() > timeout { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let before_bucket_sort = Instant::now(); | ||||
|  | ||||
|         let mut groups = vec![raw_documents.as_mut_slice()]; | ||||
|  | ||||
|         'criteria: for criterion in criteria.as_ref() { | ||||
|             let tmp_groups = mem::replace(&mut groups, Vec::new()); | ||||
|             let mut documents_seen = 0; | ||||
|  | ||||
|             for group in tmp_groups { | ||||
|                 // if this group does not overlap with the requested range, | ||||
|                 // push it without sorting and splitting it | ||||
|                 if documents_seen + group.len() < range.start { | ||||
|                     documents_seen += group.len(); | ||||
|                     groups.push(group); | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 // we must pull the fields counts of these documents | ||||
|                 // TODO it would be great to had a "dependency" thing for each criterion | ||||
|                 //      and make it so that we can be lazy on pulling/computing some data. | ||||
|                 if criterion.name() == "Exact" { | ||||
|                     for document in group.iter_mut() { | ||||
|                         let mut fields_counts = Vec::new(); | ||||
|                         for result in documents_fields_counts_store.document_fields_counts(reader, document.id)? { | ||||
|                             let (attr, count) = result?; | ||||
|                             fields_counts.push(AttrCount { attr: attr.0, count }); | ||||
|                         } | ||||
|                         document.fields_counts = Some(SetBuf::new(fields_counts).unwrap()); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|  | ||||
|                 group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); | ||||
|  | ||||
|                 for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) { | ||||
|                     debug!("criterion {} produced a group of size {}", criterion.name(), group.len()); | ||||
|  | ||||
|                     documents_seen += group.len(); | ||||
|                     groups.push(group); | ||||
|  | ||||
|  | ||||
|                     // we have sort enough documents if the last document sorted is after | ||||
|                     // the end of the requested range, we can continue to the next criterion | ||||
|                     if documents_seen >= range.end { | ||||
|                         continue 'criteria; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         debug!("bucket_sort took {:.02?}", before_bucket_sort.elapsed()); | ||||
|  | ||||
|         // once we classified the documents related to the current | ||||
|         // automatons we save that as the next valid result | ||||
|         let iter = raw_documents | ||||
|             .into_iter() | ||||
|             .skip(range.start) | ||||
|             .take(range.len()); | ||||
|         raw_documents_processed.clear(); | ||||
|         raw_documents_processed.extend(iter); | ||||
|  | ||||
|         // stop processing when time is running out | ||||
|         if let Some(timeout) = timeout { | ||||
|             if start_processing.elapsed() > timeout { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // make real documents now that we know | ||||
|     // those must be returned | ||||
|     let documents = raw_documents_processed | ||||
|         .into_iter() | ||||
|         .map(Document::from_raw) | ||||
|         .collect(); | ||||
|  | ||||
|     Ok(documents) | ||||
| } | ||||
|  | ||||
| fn raw_query_with_distinct<'c, FI, FD>( | ||||
|     reader: &heed::RoTxn<MainT>, | ||||
|  | ||||
|     query: &str, | ||||
|     range: Range<usize>, | ||||
|  | ||||
|     filter: Option<FI>, | ||||
|  | ||||
|     distinct: FD, | ||||
|     distinct_size: usize, | ||||
|     timeout: Option<Duration>, | ||||
|  | ||||
|     criteria: Criteria<'c>, | ||||
|     searchable_attrs: Option<ReorderedAttrs>, | ||||
|  | ||||
|     main_store: store::Main, | ||||
|     postings_lists_store: store::PostingsLists, | ||||
|     documents_fields_counts_store: store::DocumentsFieldsCounts, | ||||
|     synonyms_store: store::Synonyms, | ||||
| ) -> MResult<Vec<Document>> | ||||
| where | ||||
|     FI: Fn(DocumentId) -> bool, | ||||
|     FD: Fn(DocumentId) -> Option<u64>, | ||||
| { | ||||
|     let start_processing = Instant::now(); | ||||
|     let mut raw_documents_processed = Vec::new(); | ||||
|  | ||||
|     let (automaton_producer, query_enhancer) = AutomatonProducer::new( | ||||
|         reader, | ||||
|         query, | ||||
|         main_store, | ||||
|         postings_lists_store, | ||||
|         synonyms_store, | ||||
|     )?; | ||||
|  | ||||
|     let automaton_producer = automaton_producer.into_iter(); | ||||
|     let mut automatons = Vec::new(); | ||||
|  | ||||
|     // aggregate automatons groups by groups after time | ||||
|     for auts in automaton_producer { | ||||
|         automatons.push(auts); | ||||
|  | ||||
|         // we must retrieve the documents associated | ||||
|         // with the current automatons | ||||
|         let mut raw_documents = fetch_raw_documents( | ||||
|             reader, | ||||
|             &automatons, | ||||
|             &query_enhancer, | ||||
|             searchable_attrs.as_ref(), | ||||
|             main_store, | ||||
|             postings_lists_store, | ||||
|         )?; | ||||
|  | ||||
|         // stop processing when time is running out | ||||
|         if let Some(timeout) = timeout { | ||||
|             if !raw_documents_processed.is_empty() && start_processing.elapsed() > timeout { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let mut groups = vec![raw_documents.as_mut_slice()]; | ||||
|         let mut key_cache = HashMap::new(); | ||||
|  | ||||
|         let mut filter_map = HashMap::new(); | ||||
|         // these two variables informs on the current distinct map and | ||||
|         // on the raw offset of the start of the group where the | ||||
|         // range.start bound is located according to the distinct function | ||||
|         let mut distinct_map = DistinctMap::new(distinct_size); | ||||
|         let mut distinct_raw_offset = 0; | ||||
|  | ||||
|         'criteria: for criterion in criteria.as_ref() { | ||||
|             let tmp_groups = mem::replace(&mut groups, Vec::new()); | ||||
|             let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map); | ||||
|             let mut documents_seen = 0; | ||||
|  | ||||
|             for group in tmp_groups { | ||||
|                 // if this group does not overlap with the requested range, | ||||
|                 // push it without sorting and splitting it | ||||
|                 if documents_seen + group.len() < distinct_raw_offset { | ||||
|                     documents_seen += group.len(); | ||||
|                     groups.push(group); | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); | ||||
|  | ||||
|                 for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) { | ||||
|                     // we must compute the real distinguished len of this sub-group | ||||
|                     for document in group.iter() { | ||||
|                         let filter_accepted = match &filter { | ||||
|                             Some(filter) => { | ||||
|                                 let entry = filter_map.entry(document.id); | ||||
|                                 *entry.or_insert_with(|| (filter)(document.id)) | ||||
|                             } | ||||
|                             None => true, | ||||
|                         }; | ||||
|  | ||||
|                         if filter_accepted { | ||||
|                             let entry = key_cache.entry(document.id); | ||||
|                             let key = entry.or_insert_with(|| (distinct)(document.id).map(Rc::new)); | ||||
|  | ||||
|                             match key.clone() { | ||||
|                                 Some(key) => buf_distinct.register(key), | ||||
|                                 None => buf_distinct.register_without_key(), | ||||
|                             }; | ||||
|                         } | ||||
|  | ||||
|                         // the requested range end is reached: stop computing distinct | ||||
|                         if buf_distinct.len() >= range.end { | ||||
|                             break; | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|                     documents_seen += group.len(); | ||||
|                     groups.push(group); | ||||
|  | ||||
|                     // if this sub-group does not overlap with the requested range | ||||
|                     // we must update the distinct map and its start index | ||||
|                     if buf_distinct.len() < range.start { | ||||
|                         buf_distinct.transfert_to_internal(); | ||||
|                         distinct_raw_offset = documents_seen; | ||||
|                     } | ||||
|  | ||||
|                     // we have sort enough documents if the last document sorted is after | ||||
|                     // the end of the requested range, we can continue to the next criterion | ||||
|                     if buf_distinct.len() >= range.end { | ||||
|                         continue 'criteria; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // once we classified the documents related to the current | ||||
|         // automatons we save that as the next valid result | ||||
|         let mut seen = BufferedDistinctMap::new(&mut distinct_map); | ||||
|         raw_documents_processed.clear(); | ||||
|  | ||||
|         for document in raw_documents.into_iter().skip(distinct_raw_offset) { | ||||
|             let filter_accepted = match &filter { | ||||
|                 Some(_) => filter_map.remove(&document.id).unwrap(), | ||||
|                 None => true, | ||||
|             }; | ||||
|  | ||||
|             if filter_accepted { | ||||
|                 let key = key_cache.remove(&document.id).unwrap(); | ||||
|                 let distinct_accepted = match key { | ||||
|                     Some(key) => seen.register(key), | ||||
|                     None => seen.register_without_key(), | ||||
|                 }; | ||||
|  | ||||
|                 if distinct_accepted && seen.len() > range.start { | ||||
|                     raw_documents_processed.push(document); | ||||
|                     if raw_documents_processed.len() == range.len() { | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // stop processing when time is running out | ||||
|         if let Some(timeout) = timeout { | ||||
|             if start_processing.elapsed() > timeout { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // make real documents now that we know | ||||
|     // those must be returned | ||||
|     let documents = raw_documents_processed | ||||
|         .into_iter() | ||||
|         .map(Document::from_raw) | ||||
|         .collect(); | ||||
|  | ||||
|     Ok(documents) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user