mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-08-02 03:40:00 +00:00
Prepare refactor of facets database
Prepare refactor of facets database
This commit is contained in:
committed by
Loïc Lecrenier
parent
004c09a8e2
commit
c3f49f766d
@ -6,7 +6,7 @@ use heed::{BytesDecode, BytesEncode};
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
||||
};
|
||||
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FieldDocIdFacetF64Codec};
|
||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
||||
use crate::Result;
|
||||
|
||||
/// Extracts the facet number and the documents ids where this facet number appear.
|
||||
@ -31,13 +31,14 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut cursor = docid_fid_facet_number.into_cursor()?;
|
||||
while let Some((key_bytes, _)) = cursor.move_on_next()? {
|
||||
let (field_id, document_id, number) =
|
||||
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||
todo!()
|
||||
// let (field_id, document_id, number) =
|
||||
// FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||
|
||||
let key = (field_id, 0, number, number);
|
||||
let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
|
||||
// let key = (field_id, 0, number, number);
|
||||
// // let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
|
||||
|
||||
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||
// facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||
}
|
||||
|
||||
sorter_into_reader(facet_number_docids_sorter, indexer)
|
||||
|
@ -4,11 +4,9 @@ use std::{io, str};
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, keep_first_prefix_value_merge_roaring_bitmaps, sorter_into_reader,
|
||||
try_split_array_at, GrenadParameters,
|
||||
};
|
||||
use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
|
||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||
// use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
|
||||
use crate::{FieldId, Result};
|
||||
|
||||
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||
@ -24,7 +22,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
keep_first_prefix_value_merge_roaring_bitmaps,
|
||||
merge_cbo_roaring_bitmaps, // TODO: check
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
@ -42,14 +40,16 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
let original_value = str::from_utf8(original_value_bytes)?;
|
||||
|
||||
key_buffer.clear();
|
||||
FacetStringLevelZeroCodec::serialize_into(
|
||||
field_id,
|
||||
str::from_utf8(normalized_value_bytes)?,
|
||||
&mut key_buffer,
|
||||
);
|
||||
// TODO
|
||||
// FacetStringLevelZeroCodec::serialize_into(
|
||||
// field_id,
|
||||
// str::from_utf8(normalized_value_bytes)?,
|
||||
// &mut key_buffer,
|
||||
// );
|
||||
|
||||
value_buffer.clear();
|
||||
encode_prefix_string(original_value, &mut value_buffer)?;
|
||||
// TODO
|
||||
// encode_prefix_string(original_value, &mut value_buffer)?;
|
||||
let bitmap = RoaringBitmap::from_iter(Some(document_id));
|
||||
bitmap.serialize_into(&mut value_buffer)?;
|
||||
|
||||
|
@ -25,8 +25,8 @@ use self::extract_word_docids::extract_word_docids;
|
||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
||||
use self::extract_word_position_docids::extract_word_position_docids;
|
||||
use super::helpers::{
|
||||
as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
|
||||
merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, MergeableReader,
|
||||
as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
|
||||
GrenadParameters, MergeFn, MergeableReader,
|
||||
};
|
||||
use super::{helpers, TypedChunk};
|
||||
use crate::{FieldId, Result};
|
||||
@ -142,7 +142,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
extract_facet_string_docids,
|
||||
keep_first_prefix_value_merge_roaring_bitmaps,
|
||||
merge_roaring_bitmaps, // TODO: check (cbo?)
|
||||
TypedChunk::FieldIdFacetStringDocids,
|
||||
"field-id-facet-string-docids",
|
||||
);
|
||||
|
@ -5,7 +5,7 @@ use std::result::Result as StdResult;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::read_u32_ne_bytes;
|
||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||
// use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::Result;
|
||||
|
||||
@ -49,32 +49,32 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
|
||||
_key: &[u8],
|
||||
values: &[Cow<'a, [u8]>],
|
||||
) -> Result<Cow<'a, [u8]>> {
|
||||
if values.len() == 1 {
|
||||
Ok(values[0].clone())
|
||||
} else {
|
||||
let original = decode_prefix_string(&values[0]).unwrap().0;
|
||||
let merged_bitmaps = values
|
||||
.iter()
|
||||
.map(AsRef::as_ref)
|
||||
.map(decode_prefix_string)
|
||||
.map(Option::unwrap)
|
||||
.map(|(_, bitmap_bytes)| bitmap_bytes)
|
||||
.map(RoaringBitmap::deserialize_from)
|
||||
.map(StdResult::unwrap)
|
||||
.reduce(|a, b| a | b)
|
||||
.unwrap();
|
||||
// pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
|
||||
// _key: &[u8],
|
||||
// values: &[Cow<'a, [u8]>],
|
||||
// ) -> Result<Cow<'a, [u8]>> {
|
||||
// if values.len() == 1 {
|
||||
// Ok(values[0].clone())
|
||||
// } else {
|
||||
// let original = decode_prefix_string(&values[0]).unwrap().0;
|
||||
// let merged_bitmaps = values
|
||||
// .iter()
|
||||
// .map(AsRef::as_ref)
|
||||
// .map(decode_prefix_string)
|
||||
// .map(Option::unwrap)
|
||||
// .map(|(_, bitmap_bytes)| bitmap_bytes)
|
||||
// .map(RoaringBitmap::deserialize_from)
|
||||
// .map(StdResult::unwrap)
|
||||
// .reduce(|a, b| a | b)
|
||||
// .unwrap();
|
||||
|
||||
let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
|
||||
let mut buffer = Vec::with_capacity(cap);
|
||||
encode_prefix_string(original, &mut buffer)?;
|
||||
merged_bitmaps.serialize_into(&mut buffer)?;
|
||||
Ok(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
// let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
|
||||
// let mut buffer = Vec::with_capacity(cap);
|
||||
// encode_prefix_string(original, &mut buffer)?;
|
||||
// merged_bitmaps.serialize_into(&mut buffer)?;
|
||||
// Ok(Cow::Owned(buffer))
|
||||
// }
|
||||
// }
|
||||
|
||||
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||
Ok(values[0].clone())
|
||||
|
@ -13,9 +13,9 @@ pub use grenad_helpers::{
|
||||
writer_into_reader, GrenadParameters, MergeableReader,
|
||||
};
|
||||
pub use merge_functions::{
|
||||
concat_u32s_array, keep_first, keep_first_prefix_value_merge_roaring_bitmaps, keep_latest_obkv,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs, merge_roaring_bitmaps, merge_two_obkvs,
|
||||
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
|
||||
merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
|
||||
serialize_roaring_bitmap, MergeFn,
|
||||
};
|
||||
|
||||
/// The maximum length a word can be
|
||||
|
@ -13,7 +13,6 @@ use super::helpers::{
|
||||
valid_lmdb_key, CursorClonableMmap,
|
||||
};
|
||||
use super::{ClonableMmap, MergeFn};
|
||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||
use crate::update::index_documents::helpers::as_cloneable_grenad;
|
||||
use crate::{
|
||||
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
|
||||
@ -197,13 +196,14 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
index_is_empty,
|
||||
|value, _buffer| Ok(value),
|
||||
|new_values, db_values, buffer| {
|
||||
let (_, new_values) = decode_prefix_string(new_values).unwrap();
|
||||
let new_values = RoaringBitmap::deserialize_from(new_values)?;
|
||||
let (db_original, db_values) = decode_prefix_string(db_values).unwrap();
|
||||
let db_values = RoaringBitmap::deserialize_from(db_values)?;
|
||||
let values = new_values | db_values;
|
||||
encode_prefix_string(db_original, buffer)?;
|
||||
Ok(values.serialize_into(buffer)?)
|
||||
todo!()
|
||||
// let (_, new_values) = decode_prefix_string(new_values).unwrap();
|
||||
// let new_values = RoaringBitmap::deserialize_from(new_values)?;
|
||||
// let (db_original, db_values) = decode_prefix_string(db_values).unwrap();
|
||||
// let db_values = RoaringBitmap::deserialize_from(db_values)?;
|
||||
// let values = new_values | db_values;
|
||||
// encode_prefix_string(db_original, buffer)?;
|
||||
// Ok(values.serialize_into(buffer)?)
|
||||
},
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
|
Reference in New Issue
Block a user