mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-08-02 11:50:03 +00:00
Simplify word level position DB into a word position DB
This commit is contained in:
@ -14,13 +14,13 @@ use crate::{DocumentId, Result};
|
||||
/// Returns a grenad reader with the list of extracted words at positions and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_word_level_position_docids<R: io::Read>(
|
||||
pub fn extract_word_position_docids<R: io::Read>(
|
||||
mut docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_level_position_docids_sorter = create_sorter(
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
merge_cbo_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
@ -37,15 +37,11 @@ pub fn extract_word_level_position_docids<R: io::Read>(
|
||||
for position in read_u32_ne_bytes(value) {
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(word_bytes);
|
||||
key_buffer.push(0); // tree level
|
||||
|
||||
// Levels are composed of left and right bounds.
|
||||
key_buffer.extend_from_slice(&position.to_be_bytes());
|
||||
key_buffer.extend_from_slice(&position.to_be_bytes());
|
||||
|
||||
word_level_position_docids_sorter.insert(&key_buffer, &document_id.to_ne_bytes())?;
|
||||
word_position_docids_sorter.insert(&key_buffer, &document_id.to_ne_bytes())?;
|
||||
}
|
||||
}
|
||||
|
||||
sorter_into_reader(word_level_position_docids_sorter, indexer)
|
||||
sorter_into_reader(word_position_docids_sorter, indexer)
|
||||
}
|
@ -5,8 +5,8 @@ mod extract_fid_docid_facet_values;
|
||||
mod extract_fid_word_count_docids;
|
||||
mod extract_geo_points;
|
||||
mod extract_word_docids;
|
||||
mod extract_word_level_position_docids;
|
||||
mod extract_word_pair_proximity_docids;
|
||||
mod extract_word_position_docids;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
@ -22,8 +22,8 @@ use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values;
|
||||
use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
|
||||
use self::extract_geo_points::extract_geo_points;
|
||||
use self::extract_word_docids::extract_word_docids;
|
||||
use self::extract_word_level_position_docids::extract_word_level_position_docids;
|
||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
||||
use self::extract_word_position_docids::extract_word_position_docids;
|
||||
use super::helpers::{
|
||||
into_clonable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
|
||||
merge_readers, merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
|
||||
@ -98,10 +98,10 @@ pub(crate) fn data_from_obkv_documents(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer.clone(),
|
||||
lmdb_writer_sx.clone(),
|
||||
extract_word_level_position_docids,
|
||||
extract_word_position_docids,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
TypedChunk::WordLevelPositionDocids,
|
||||
"word-level-position-docids",
|
||||
TypedChunk::WordPositionDocids,
|
||||
"word-position-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task(
|
||||
|
@ -27,7 +27,7 @@ pub use self::transform::{Transform, TransformOutput};
|
||||
use crate::documents::DocumentBatchReader;
|
||||
use crate::update::{
|
||||
Facets, UpdateBuilder, UpdateIndexingStep, WordPrefixDocids, WordPrefixPairProximityDocids,
|
||||
WordsLevelPositions, WordsPrefixesFst,
|
||||
WordPrefixPositionDocids, WordsPrefixesFst,
|
||||
};
|
||||
use crate::{Index, Result};
|
||||
|
||||
@ -412,8 +412,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||
});
|
||||
|
||||
// Run the words level positions update operation.
|
||||
let mut builder = WordsLevelPositions::new(self.wtxn, self.index);
|
||||
// Run the words prefix position docids update operation.
|
||||
let mut builder = WordPrefixPositionDocids::new(self.wtxn, self.index);
|
||||
builder.chunk_compression_type = self.chunk_compression_type;
|
||||
builder.chunk_compression_level = self.chunk_compression_level;
|
||||
builder.max_nb_chunks = self.max_nb_chunks;
|
||||
|
@ -22,7 +22,7 @@ pub(crate) enum TypedChunk {
|
||||
FieldIdWordcountDocids(grenad::Reader<File>),
|
||||
NewDocumentsIds(RoaringBitmap),
|
||||
WordDocids(grenad::Reader<File>),
|
||||
WordLevelPositionDocids(grenad::Reader<File>),
|
||||
WordPositionDocids(grenad::Reader<File>),
|
||||
WordPairProximityDocids(grenad::Reader<File>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||
@ -110,10 +110,10 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
index.put_words_fst(wtxn, &fst)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::WordLevelPositionDocids(word_level_position_docids_iter) => {
|
||||
TypedChunk::WordPositionDocids(word_position_docids_iter) => {
|
||||
append_entries_into_database(
|
||||
word_level_position_docids_iter,
|
||||
&index.word_level_position_docids,
|
||||
word_position_docids_iter,
|
||||
&index.word_position_docids,
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
|value, _buffer| Ok(value),
|
||||
|
Reference in New Issue
Block a user