mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-14 00:36:25 +00:00
Finally bump grenad to v0.4.1
This commit is contained in:
@ -18,8 +18,8 @@ use crate::{absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_A
|
||||
/// Returns the generated internal documents ids and a grenad reader
|
||||
/// with the list of extracted words from the given chunk of documents.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_docid_word_positions<R: io::Read>(
|
||||
mut obkv_documents: grenad::Reader<R>,
|
||||
pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
@ -46,7 +46,8 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
||||
}
|
||||
let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default());
|
||||
|
||||
while let Some((key, value)) = obkv_documents.next()? {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
let document_id = key
|
||||
.try_into()
|
||||
.map(u32::from_be_bytes)
|
||||
|
@ -14,8 +14,8 @@ use crate::Result;
|
||||
/// Returns a grenad reader with the list of extracted facet numbers and
|
||||
/// documents ids from the given chunk of docid facet number positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_facet_number_docids<R: io::Read>(
|
||||
mut docid_fid_facet_number: grenad::Reader<R>,
|
||||
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -28,7 +28,8 @@ pub fn extract_facet_number_docids<R: io::Read>(
|
||||
max_memory,
|
||||
);
|
||||
|
||||
while let Some((key_bytes, _)) = docid_fid_facet_number.next()? {
|
||||
let mut cursor = docid_fid_facet_number.into_cursor()?;
|
||||
while let Some((key_bytes, _)) = cursor.move_on_next()? {
|
||||
let (field_id, document_id, number) =
|
||||
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||
|
||||
|
@ -16,8 +16,8 @@ use crate::{FieldId, Result};
|
||||
/// Returns a grenad reader with the list of extracted facet strings and
|
||||
/// documents ids from the given chunk of docid facet string positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_facet_string_docids<R: io::Read>(
|
||||
mut docid_fid_facet_string: grenad::Reader<R>,
|
||||
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -32,7 +32,8 @@ pub fn extract_facet_string_docids<R: io::Read>(
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut value_buffer = Vec::new();
|
||||
while let Some((key, original_value_bytes)) = docid_fid_facet_string.next()? {
|
||||
let mut cursor = docid_fid_facet_string.into_cursor()?;
|
||||
while let Some((key, original_value_bytes)) = cursor.move_on_next()? {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||
let (document_id_bytes, normalized_value_bytes) = try_split_array_at(bytes).unwrap();
|
||||
|
@ -16,8 +16,8 @@ use crate::{DocumentId, FieldId, Result};
|
||||
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
|
||||
/// and the normalized value as value extracted from the given chunk of documents.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_fid_docid_facet_values<R: io::Read>(
|
||||
mut obkv_documents: grenad::Reader<R>,
|
||||
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
@ -40,7 +40,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read>(
|
||||
);
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
while let Some((docid_bytes, value)) = obkv_documents.next()? {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
|
||||
let obkv = obkv::KvReader::new(value);
|
||||
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
|
@ -18,8 +18,8 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
|
||||
/// Returns a grenad reader with the list of extracted field id word counts
|
||||
/// and documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_fid_word_count_docids<R: io::Read>(
|
||||
mut docid_word_positions: grenad::Reader<R>,
|
||||
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -36,7 +36,8 @@ pub fn extract_fid_word_count_docids<R: io::Read>(
|
||||
let mut document_fid_wordcount = HashMap::new();
|
||||
let mut current_document_id = None;
|
||||
|
||||
while let Some((key, value)) = docid_word_positions.next()? {
|
||||
let mut cursor = docid_word_positions.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
let (document_id_bytes, _word_bytes) = try_split_array_at(key)
|
||||
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
@ -10,17 +10,20 @@ use crate::{FieldId, InternalError, Result, UserError};
|
||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
|
||||
pub fn extract_geo_points<R: io::Read>(
|
||||
mut obkv_documents: grenad::Reader<R>,
|
||||
pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
geo_field_id: FieldId,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let mut writer = tempfile::tempfile().and_then(|file| {
|
||||
create_writer(indexer.chunk_compression_type, indexer.chunk_compression_level, file)
|
||||
})?;
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
while let Some((docid_bytes, value)) = obkv_documents.next()? {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
|
||||
let obkv = obkv::KvReader::new(value);
|
||||
let point: Value = match obkv.get(geo_field_id) {
|
||||
Some(point) => serde_json::from_slice(point).map_err(InternalError::SerdeJson)?,
|
||||
|
@ -17,8 +17,8 @@ use crate::Result;
|
||||
/// Returns a grenad reader with the list of extracted words and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_word_docids<R: io::Read>(
|
||||
mut docid_word_positions: grenad::Reader<R>,
|
||||
pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -32,7 +32,8 @@ pub fn extract_word_docids<R: io::Read>(
|
||||
);
|
||||
|
||||
let mut value_buffer = Vec::new();
|
||||
while let Some((key, _value)) = docid_word_positions.next()? {
|
||||
let mut cursor = docid_word_positions.into_cursor()?;
|
||||
while let Some((key, _value)) = cursor.move_on_next()? {
|
||||
let (document_id_bytes, word_bytes) = try_split_array_at(key)
|
||||
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
@ -17,8 +17,8 @@ use crate::{DocumentId, Result};
|
||||
/// Returns a grenad reader with the list of extracted word pairs proximities and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_word_pair_proximity_docids<R: io::Read>(
|
||||
mut docid_word_positions: grenad::Reader<R>,
|
||||
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -35,7 +35,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read>(
|
||||
let mut document_word_positions_heap = BinaryHeap::new();
|
||||
let mut current_document_id = None;
|
||||
|
||||
while let Some((key, value)) = docid_word_positions.next()? {
|
||||
let mut cursor = docid_word_positions.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
let (document_id_bytes, word_bytes) = try_split_array_at(key)
|
||||
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
@ -14,8 +14,8 @@ use crate::{DocumentId, Result};
|
||||
/// Returns a grenad reader with the list of extracted words at positions and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_word_position_docids<R: io::Read>(
|
||||
mut docid_word_positions: grenad::Reader<R>,
|
||||
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -29,7 +29,8 @@ pub fn extract_word_position_docids<R: io::Read>(
|
||||
);
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
while let Some((key, value)) = docid_word_positions.next()? {
|
||||
let mut cursor = docid_word_positions.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
let (document_id_bytes, word_bytes) = try_split_array_at(key)
|
||||
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
let document_id = DocumentId::from_be_bytes(document_id_bytes);
|
||||
|
Reference in New Issue
Block a user