Prepare refactor of facets database

Prepare refactor of facets database
This commit is contained in:
Loïc Lecrenier
2022-08-29 16:01:54 +02:00
committed by Loïc Lecrenier
parent 004c09a8e2
commit c3f49f766d
27 changed files with 1662 additions and 1624 deletions

View File

@ -6,7 +6,7 @@ use heed::{BytesDecode, BytesEncode};
use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
};
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FieldDocIdFacetF64Codec};
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
use crate::Result;
/// Extracts the facet number and the documents ids where this facet number appear.
@ -31,13 +31,14 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
let mut cursor = docid_fid_facet_number.into_cursor()?;
while let Some((key_bytes, _)) = cursor.move_on_next()? {
let (field_id, document_id, number) =
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
todo!()
// let (field_id, document_id, number) =
// FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
let key = (field_id, 0, number, number);
let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
// let key = (field_id, 0, number, number);
// // let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
// facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
}
sorter_into_reader(facet_number_docids_sorter, indexer)

View File

@ -4,11 +4,9 @@ use std::{io, str};
use roaring::RoaringBitmap;
use super::helpers::{
create_sorter, keep_first_prefix_value_merge_roaring_bitmaps, sorter_into_reader,
try_split_array_at, GrenadParameters,
};
use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
// use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
use crate::{FieldId, Result};
/// Extracts the facet string and the documents ids where this facet string appear.
@ -24,7 +22,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let mut facet_string_docids_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
keep_first_prefix_value_merge_roaring_bitmaps,
merge_cbo_roaring_bitmaps, // TODO: check
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
@ -42,14 +40,16 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let original_value = str::from_utf8(original_value_bytes)?;
key_buffer.clear();
FacetStringLevelZeroCodec::serialize_into(
field_id,
str::from_utf8(normalized_value_bytes)?,
&mut key_buffer,
);
// TODO
// FacetStringLevelZeroCodec::serialize_into(
// field_id,
// str::from_utf8(normalized_value_bytes)?,
// &mut key_buffer,
// );
value_buffer.clear();
encode_prefix_string(original_value, &mut value_buffer)?;
// TODO
// encode_prefix_string(original_value, &mut value_buffer)?;
let bitmap = RoaringBitmap::from_iter(Some(document_id));
bitmap.serialize_into(&mut value_buffer)?;

View File

@ -25,8 +25,8 @@ use self::extract_word_docids::extract_word_docids;
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
use self::extract_word_position_docids::extract_word_position_docids;
use super::helpers::{
as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, MergeableReader,
as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
GrenadParameters, MergeFn, MergeableReader,
};
use super::{helpers, TypedChunk};
use crate::{FieldId, Result};
@ -142,7 +142,7 @@ pub(crate) fn data_from_obkv_documents(
indexer,
lmdb_writer_sx.clone(),
extract_facet_string_docids,
keep_first_prefix_value_merge_roaring_bitmaps,
merge_roaring_bitmaps, // TODO: check (cbo?)
TypedChunk::FieldIdFacetStringDocids,
"field-id-facet-string-docids",
);