mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-08-02 03:40:00 +00:00
Before improving fields AttrCount
Removing the fields_count fetching reduced by 2 times the serach time, we should look at lazily pulling them form the criterions in needs ugly-test: Make the fields_count fetching lazy Just before running the exactness criterion
This commit is contained in:
committed by
Clément Renault
parent
11f3d7782d
commit
ef6a4db182
@ -6,6 +6,7 @@ use std::time::{Duration, Instant};
|
||||
use std::{cmp, mem};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use log::debug;
|
||||
use sdset::SetBuf;
|
||||
use slice_group_by::{GroupBy, GroupByMut};
|
||||
|
||||
@ -14,7 +15,7 @@ use crate::automaton::{Automaton, AutomatonGroup, AutomatonProducer, QueryEnhanc
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::levenshtein::prefix_damerau_levenshtein;
|
||||
use crate::raw_document::{raw_documents_from, RawDocument};
|
||||
use crate::{criterion::Criteria, Document, DocumentId, Highlight, TmpMatch};
|
||||
use crate::{criterion::Criteria, Document, DocumentId, Highlight, TmpMatch, AttrCount};
|
||||
use crate::{reordered_attrs::ReorderedAttrs, store, MResult};
|
||||
|
||||
pub struct QueryBuilder<'c, 'f, 'd> {
|
||||
@ -146,27 +147,18 @@ fn fetch_raw_documents(
|
||||
searchables: Option<&ReorderedAttrs>,
|
||||
main_store: store::Main,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
) -> MResult<Vec<RawDocument>> {
|
||||
let mut matches = Vec::new();
|
||||
let mut highlights = Vec::new();
|
||||
|
||||
let before_automatons_groups_loop = Instant::now();
|
||||
for group in automatons_groups {
|
||||
let AutomatonGroup {
|
||||
is_phrase_query,
|
||||
automatons,
|
||||
} = group;
|
||||
let AutomatonGroup { is_phrase_query, automatons } = group;
|
||||
let phrase_query_len = automatons.len();
|
||||
|
||||
let mut tmp_matches = Vec::new();
|
||||
for (id, automaton) in automatons.into_iter().enumerate() {
|
||||
let Automaton {
|
||||
index,
|
||||
is_exact,
|
||||
query_len,
|
||||
query,
|
||||
..
|
||||
} = automaton;
|
||||
let Automaton { index, is_exact, query_len, query, .. } = automaton;
|
||||
let dfa = automaton.dfa();
|
||||
|
||||
let words = match main_store.words_fst(reader)? {
|
||||
@ -250,26 +242,26 @@ fn fetch_raw_documents(
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!("automatons_groups_loop took {:.02?}", before_automatons_groups_loop.elapsed());
|
||||
|
||||
let before_multiword_rewrite_matches = Instant::now();
|
||||
let matches = multiword_rewrite_matches(matches, &query_enhancer);
|
||||
debug!("multiword_rewrite_matches took {:.02?}", before_multiword_rewrite_matches.elapsed());
|
||||
|
||||
let before_highlight_sorting = Instant::now();
|
||||
let highlights = {
|
||||
highlights.sort_unstable_by_key(|(id, _)| *id);
|
||||
SetBuf::new_unchecked(highlights)
|
||||
};
|
||||
debug!("highlight_sorting {:.02?}", before_highlight_sorting.elapsed());
|
||||
|
||||
let fields_counts = {
|
||||
let mut fields_counts = Vec::new();
|
||||
for group in matches.linear_group_by_key(|(id, ..)| *id) {
|
||||
let id = group[0].0;
|
||||
for result in documents_fields_counts_store.document_fields_counts(reader, id)? {
|
||||
let (attr, count) = result?;
|
||||
fields_counts.push((id, attr, count));
|
||||
}
|
||||
}
|
||||
SetBuf::new(fields_counts).unwrap()
|
||||
};
|
||||
|
||||
Ok(raw_documents_from(matches, highlights, fields_counts))
|
||||
let before_raw_documents = Instant::now();
|
||||
let raw_documents = raw_documents_from(matches, highlights);
|
||||
debug!("raw_documents took {:.02?}", before_raw_documents.elapsed());
|
||||
debug!("documents to worry about: {}", raw_documents.len());
|
||||
|
||||
Ok(raw_documents)
|
||||
}
|
||||
|
||||
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
||||
@ -434,6 +426,11 @@ where
|
||||
for auts in automaton_producer {
|
||||
automatons.push(auts);
|
||||
|
||||
for (i, group) in automatons.iter().enumerate() {
|
||||
debug!("group {} automatons {:?}", i, group.automatons);
|
||||
}
|
||||
|
||||
let before_fetch_raw_documents = Instant::now();
|
||||
// we must retrieve the documents associated
|
||||
// with the current automatons
|
||||
let mut raw_documents = fetch_raw_documents(
|
||||
@ -443,8 +440,8 @@ where
|
||||
searchable_attrs.as_ref(),
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
documents_fields_counts_store,
|
||||
)?;
|
||||
debug!("fetch_raw_documents took {:.02?}", before_fetch_raw_documents.elapsed());
|
||||
|
||||
// stop processing when time is running out
|
||||
if let Some(timeout) = timeout {
|
||||
@ -468,6 +465,20 @@ where
|
||||
continue;
|
||||
}
|
||||
|
||||
// we must pull the fields counts of these documents
|
||||
// TODO it would be great to had a "dependency" thing for each criterion
|
||||
// and make it so that we can be lazy on pulling/computing some data.
|
||||
if criterion.name() == "Exact" {
|
||||
for document in group.iter_mut() {
|
||||
let mut fields_counts = Vec::new();
|
||||
for result in documents_fields_counts_store.document_fields_counts(reader, document.id)? {
|
||||
let (attr, count) = result?;
|
||||
fields_counts.push(AttrCount { attr: attr.0, count });
|
||||
}
|
||||
document.fields_counts = Some(SetBuf::new(fields_counts).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(a, b));
|
||||
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
|
||||
@ -561,7 +572,6 @@ where
|
||||
searchable_attrs.as_ref(),
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
documents_fields_counts_store,
|
||||
)?;
|
||||
|
||||
// stop processing when time is running out
|
||||
|
Reference in New Issue
Block a user