mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-14 00:36:25 +00:00
introduce exact_word_docids db
This commit is contained in:
@ -10,17 +10,21 @@ use super::helpers::{
|
||||
};
|
||||
use crate::error::SerializationError;
|
||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
use crate::update::index_documents::MergeFn;
|
||||
use crate::Result;
|
||||
|
||||
/// Extracts the word and the documents ids where this word appear.
|
||||
///
|
||||
/// Returns a grenad reader with the list of extracted words and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
///
|
||||
/// The first returned reader in the one for normal word_docids, and the second one is for
|
||||
/// exact_word_docids
|
||||
#[logging_timer::time]
|
||||
pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_docids_sorter = create_sorter(
|
||||
@ -43,5 +47,9 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
word_docids_sorter.insert(word_bytes, &value_buffer)?;
|
||||
}
|
||||
|
||||
sorter_into_reader(word_docids_sorter, indexer)
|
||||
let empty_sorter = grenad::Sorter::new(merge_roaring_bitmaps as MergeFn);
|
||||
Ok((
|
||||
sorter_into_reader(word_docids_sorter, indexer)?,
|
||||
sorter_into_reader(empty_sorter, indexer)?,
|
||||
))
|
||||
}
|
||||
|
@ -86,13 +86,16 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"field-id-wordcount-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer.clone(),
|
||||
lmdb_writer_sx.clone(),
|
||||
extract_word_docids,
|
||||
merge_roaring_bitmaps,
|
||||
TypedChunk::WordDocids,
|
||||
|(word_docids_reader, exact_word_docids_reader)| TypedChunk::WordDocids {
|
||||
word_docids_reader,
|
||||
exact_word_docids_reader,
|
||||
},
|
||||
"word-docids",
|
||||
);
|
||||
|
||||
|
Reference in New Issue
Block a user