clean PR warnings

This commit is contained in:
ManyTheFish
2023-10-10 11:23:16 +02:00
committed by Louis Dureuil
parent 66c2c82a18
commit 1c5705c164
11 changed files with 11 additions and 103 deletions

View File

@ -12,9 +12,7 @@ use serde_json::Value;
use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
use crate::error::{InternalError, SerializationError};
use crate::update::index_documents::MergeFn;
use crate::{
absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
};
use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;

View File

@ -5,11 +5,10 @@ use std::iter::FromIterator;
use heed::BytesDecode;
use obkv::KvReaderU16;
use roaring::RoaringBitmap;
use super::helpers::{
create_sorter, create_writer, merge_cbo_roaring_bitmaps, serialize_roaring_bitmap,
sorter_into_reader, try_split_array_at, writer_into_reader, GrenadParameters,
create_sorter, create_writer, merge_cbo_roaring_bitmaps, sorter_into_reader,
try_split_array_at, writer_into_reader, GrenadParameters,
};
use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec;
@ -47,7 +46,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
max_memory.map(|x| x / 3),
);
let mut key_buffer = Vec::new();
let mut value_buffer = Vec::new();
let mut words = BTreeSet::new();
let mut cursor = docid_word_positions.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
@ -66,7 +64,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
document_id,
fid,
&mut key_buffer,
&mut value_buffer,
&mut words,
&mut word_fid_docids_sorter,
)?;
@ -124,7 +121,6 @@ fn words_into_sorter(
document_id: DocumentId,
fid: FieldId,
key_buffer: &mut Vec<u8>,
value_buffer: &mut Vec<u8>,
words: &mut BTreeSet<Vec<u8>>,
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
) -> Result<()> {

View File

@ -1,53 +0,0 @@
use std::fs::File;
use std::io::{self, BufReader};
use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
try_split_array_at, GrenadParameters,
};
use crate::error::SerializationError;
use crate::index::db_name::DOCID_WORD_POSITIONS;
use crate::{relative_from_absolute_position, DocumentId, Result};
/// Extracts the word, field id, and the documents ids where this word appear at this field id.
#[logging_timer::time]
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
todo!("remove me");
let max_memory = indexer.max_memory_by_thread();
let mut word_fid_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_cbo_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
);
let mut key_buffer = Vec::new();
let mut cursor = docid_word_positions.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let (document_id_bytes, word_bytes) = try_split_array_at(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = DocumentId::from_be_bytes(document_id_bytes);
for position in read_u32_ne_bytes(value) {
key_buffer.clear();
key_buffer.extend_from_slice(word_bytes);
key_buffer.push(0);
let (fid, _) = relative_from_absolute_position(position);
key_buffer.extend_from_slice(&fid.to_be_bytes());
word_fid_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
}
}
let word_fid_docids_reader = sorter_into_reader(word_fid_docids_sorter, indexer)?;
Ok(word_fid_docids_reader)
}

View File

@ -6,7 +6,6 @@ mod extract_fid_word_count_docids;
mod extract_geo_points;
mod extract_vector_points;
mod extract_word_docids;
mod extract_word_fid_docids;
mod extract_word_pair_proximity_docids;
mod extract_word_position_docids;
@ -26,12 +25,11 @@ use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
use self::extract_geo_points::extract_geo_points;
use self::extract_vector_points::extract_vector_points;
use self::extract_word_docids::extract_word_docids;
use self::extract_word_fid_docids::extract_word_fid_docids;
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
use self::extract_word_position_docids::extract_word_position_docids;
use super::helpers::{
as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
GrenadParameters, MergeFn, MergeableReader,
as_cloneable_grenad, merge_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
MergeableReader,
};
use super::{helpers, TypedChunk};
use crate::{FieldId, Result};
@ -206,15 +204,6 @@ pub(crate) fn data_from_obkv_documents(
TypedChunk::WordPositionDocids,
"word-position-docids",
);
// spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
// docid_word_positions_chunks,
// indexer,
// lmdb_writer_sx.clone(),
// extract_word_fid_docids,
// merge_cbo_roaring_bitmaps,
// TypedChunk::WordFidDocids,
// "word-fid-docids",
// );
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_fid_facet_strings_chunks,