Compute chunk size based on the input data size ant the number of indexing threads

This commit is contained in:
ManyTheFish
2024-01-22 16:23:12 +01:00
committed by Louis Dureuil
parent 023c2d755f
commit be1b054b05
13 changed files with 991 additions and 795 deletions

View File

@ -47,7 +47,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
)]
pub fn execute(
self,
mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,
new_word_docids: grenad::Merger<CursorClonableMmap, MergeFn>,
new_prefix_fst_words: &[String],
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
@ -68,7 +68,8 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
if !common_prefix_fst_words.is_empty() {
let mut current_prefixes: Option<&&[String]> = None;
let mut prefixes_cache = HashMap::new();
while let Some((word, data)) = new_word_docids_iter.move_on_next()? {
let mut new_word_docids_iter = new_word_docids.into_stream_merger_iter()?;
while let Some((word, data)) = new_word_docids_iter.next()? {
current_prefixes = match current_prefixes.take() {
Some(prefixes) if word.starts_with(prefixes[0].as_bytes()) => Some(prefixes),
_otherwise => {