mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Prepare refactor of facets database
Prepare refactor of facets database
This commit is contained in:
		
				
					committed by
					
						
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							004c09a8e2
						
					
				
				
					commit
					c3f49f766d
				
			@@ -6,7 +6,7 @@ use heed::{BytesDecode, BytesEncode};
 | 
			
		||||
use super::helpers::{
 | 
			
		||||
    create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
 | 
			
		||||
};
 | 
			
		||||
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FieldDocIdFacetF64Codec};
 | 
			
		||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
 | 
			
		||||
use crate::Result;
 | 
			
		||||
 | 
			
		||||
/// Extracts the facet number and the documents ids where this facet number appear.
 | 
			
		||||
@@ -31,13 +31,14 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
 | 
			
		||||
 | 
			
		||||
    let mut cursor = docid_fid_facet_number.into_cursor()?;
 | 
			
		||||
    while let Some((key_bytes, _)) = cursor.move_on_next()? {
 | 
			
		||||
        let (field_id, document_id, number) =
 | 
			
		||||
            FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
 | 
			
		||||
        todo!()
 | 
			
		||||
        // let (field_id, document_id, number) =
 | 
			
		||||
        //     FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
 | 
			
		||||
 | 
			
		||||
        let key = (field_id, 0, number, number);
 | 
			
		||||
        let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
 | 
			
		||||
        // let key = (field_id, 0, number, number);
 | 
			
		||||
        // // let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
 | 
			
		||||
 | 
			
		||||
        facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
 | 
			
		||||
        // facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    sorter_into_reader(facet_number_docids_sorter, indexer)
 | 
			
		||||
 
 | 
			
		||||
@@ -4,11 +4,9 @@ use std::{io, str};
 | 
			
		||||
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use super::helpers::{
 | 
			
		||||
    create_sorter, keep_first_prefix_value_merge_roaring_bitmaps, sorter_into_reader,
 | 
			
		||||
    try_split_array_at, GrenadParameters,
 | 
			
		||||
};
 | 
			
		||||
use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
 | 
			
		||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
 | 
			
		||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
 | 
			
		||||
// use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
 | 
			
		||||
use crate::{FieldId, Result};
 | 
			
		||||
 | 
			
		||||
/// Extracts the facet string and the documents ids where this facet string appear.
 | 
			
		||||
@@ -24,7 +22,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
 | 
			
		||||
 | 
			
		||||
    let mut facet_string_docids_sorter = create_sorter(
 | 
			
		||||
        grenad::SortAlgorithm::Stable,
 | 
			
		||||
        keep_first_prefix_value_merge_roaring_bitmaps,
 | 
			
		||||
        merge_cbo_roaring_bitmaps, // TODO: check
 | 
			
		||||
        indexer.chunk_compression_type,
 | 
			
		||||
        indexer.chunk_compression_level,
 | 
			
		||||
        indexer.max_nb_chunks,
 | 
			
		||||
@@ -42,14 +40,16 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
 | 
			
		||||
        let original_value = str::from_utf8(original_value_bytes)?;
 | 
			
		||||
 | 
			
		||||
        key_buffer.clear();
 | 
			
		||||
        FacetStringLevelZeroCodec::serialize_into(
 | 
			
		||||
            field_id,
 | 
			
		||||
            str::from_utf8(normalized_value_bytes)?,
 | 
			
		||||
            &mut key_buffer,
 | 
			
		||||
        );
 | 
			
		||||
        // TODO
 | 
			
		||||
        // FacetStringLevelZeroCodec::serialize_into(
 | 
			
		||||
        //     field_id,
 | 
			
		||||
        //     str::from_utf8(normalized_value_bytes)?,
 | 
			
		||||
        //     &mut key_buffer,
 | 
			
		||||
        // );
 | 
			
		||||
 | 
			
		||||
        value_buffer.clear();
 | 
			
		||||
        encode_prefix_string(original_value, &mut value_buffer)?;
 | 
			
		||||
        // TODO
 | 
			
		||||
        // encode_prefix_string(original_value, &mut value_buffer)?;
 | 
			
		||||
        let bitmap = RoaringBitmap::from_iter(Some(document_id));
 | 
			
		||||
        bitmap.serialize_into(&mut value_buffer)?;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -25,8 +25,8 @@ use self::extract_word_docids::extract_word_docids;
 | 
			
		||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
 | 
			
		||||
use self::extract_word_position_docids::extract_word_position_docids;
 | 
			
		||||
use super::helpers::{
 | 
			
		||||
    as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
 | 
			
		||||
    merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, MergeableReader,
 | 
			
		||||
    as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
 | 
			
		||||
    GrenadParameters, MergeFn, MergeableReader,
 | 
			
		||||
};
 | 
			
		||||
use super::{helpers, TypedChunk};
 | 
			
		||||
use crate::{FieldId, Result};
 | 
			
		||||
@@ -142,7 +142,7 @@ pub(crate) fn data_from_obkv_documents(
 | 
			
		||||
        indexer,
 | 
			
		||||
        lmdb_writer_sx.clone(),
 | 
			
		||||
        extract_facet_string_docids,
 | 
			
		||||
        keep_first_prefix_value_merge_roaring_bitmaps,
 | 
			
		||||
        merge_roaring_bitmaps, // TODO: check (cbo?)
 | 
			
		||||
        TypedChunk::FieldIdFacetStringDocids,
 | 
			
		||||
        "field-id-facet-string-docids",
 | 
			
		||||
    );
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ use std::result::Result as StdResult;
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use super::read_u32_ne_bytes;
 | 
			
		||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
 | 
			
		||||
// use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
 | 
			
		||||
use crate::heed_codec::CboRoaringBitmapCodec;
 | 
			
		||||
use crate::Result;
 | 
			
		||||
 | 
			
		||||
@@ -49,32 +49,32 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
 | 
			
		||||
    _key: &[u8],
 | 
			
		||||
    values: &[Cow<'a, [u8]>],
 | 
			
		||||
) -> Result<Cow<'a, [u8]>> {
 | 
			
		||||
    if values.len() == 1 {
 | 
			
		||||
        Ok(values[0].clone())
 | 
			
		||||
    } else {
 | 
			
		||||
        let original = decode_prefix_string(&values[0]).unwrap().0;
 | 
			
		||||
        let merged_bitmaps = values
 | 
			
		||||
            .iter()
 | 
			
		||||
            .map(AsRef::as_ref)
 | 
			
		||||
            .map(decode_prefix_string)
 | 
			
		||||
            .map(Option::unwrap)
 | 
			
		||||
            .map(|(_, bitmap_bytes)| bitmap_bytes)
 | 
			
		||||
            .map(RoaringBitmap::deserialize_from)
 | 
			
		||||
            .map(StdResult::unwrap)
 | 
			
		||||
            .reduce(|a, b| a | b)
 | 
			
		||||
            .unwrap();
 | 
			
		||||
// pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
 | 
			
		||||
//     _key: &[u8],
 | 
			
		||||
//     values: &[Cow<'a, [u8]>],
 | 
			
		||||
// ) -> Result<Cow<'a, [u8]>> {
 | 
			
		||||
//     if values.len() == 1 {
 | 
			
		||||
//         Ok(values[0].clone())
 | 
			
		||||
//     } else {
 | 
			
		||||
//         let original = decode_prefix_string(&values[0]).unwrap().0;
 | 
			
		||||
//         let merged_bitmaps = values
 | 
			
		||||
//             .iter()
 | 
			
		||||
//             .map(AsRef::as_ref)
 | 
			
		||||
//             .map(decode_prefix_string)
 | 
			
		||||
//             .map(Option::unwrap)
 | 
			
		||||
//             .map(|(_, bitmap_bytes)| bitmap_bytes)
 | 
			
		||||
//             .map(RoaringBitmap::deserialize_from)
 | 
			
		||||
//             .map(StdResult::unwrap)
 | 
			
		||||
//             .reduce(|a, b| a | b)
 | 
			
		||||
//             .unwrap();
 | 
			
		||||
 | 
			
		||||
        let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
 | 
			
		||||
        let mut buffer = Vec::with_capacity(cap);
 | 
			
		||||
        encode_prefix_string(original, &mut buffer)?;
 | 
			
		||||
        merged_bitmaps.serialize_into(&mut buffer)?;
 | 
			
		||||
        Ok(Cow::Owned(buffer))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
//         let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
 | 
			
		||||
//         let mut buffer = Vec::with_capacity(cap);
 | 
			
		||||
//         encode_prefix_string(original, &mut buffer)?;
 | 
			
		||||
//         merged_bitmaps.serialize_into(&mut buffer)?;
 | 
			
		||||
//         Ok(Cow::Owned(buffer))
 | 
			
		||||
//     }
 | 
			
		||||
// }
 | 
			
		||||
 | 
			
		||||
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
 | 
			
		||||
    Ok(values[0].clone())
 | 
			
		||||
 
 | 
			
		||||
@@ -13,9 +13,9 @@ pub use grenad_helpers::{
 | 
			
		||||
    writer_into_reader, GrenadParameters, MergeableReader,
 | 
			
		||||
};
 | 
			
		||||
pub use merge_functions::{
 | 
			
		||||
    concat_u32s_array, keep_first, keep_first_prefix_value_merge_roaring_bitmaps, keep_latest_obkv,
 | 
			
		||||
    merge_cbo_roaring_bitmaps, merge_obkvs, merge_roaring_bitmaps, merge_two_obkvs,
 | 
			
		||||
    roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
 | 
			
		||||
    concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
 | 
			
		||||
    merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
 | 
			
		||||
    serialize_roaring_bitmap, MergeFn,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// The maximum length a word can be
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,6 @@ use super::helpers::{
 | 
			
		||||
    valid_lmdb_key, CursorClonableMmap,
 | 
			
		||||
};
 | 
			
		||||
use super::{ClonableMmap, MergeFn};
 | 
			
		||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
 | 
			
		||||
use crate::update::index_documents::helpers::as_cloneable_grenad;
 | 
			
		||||
use crate::{
 | 
			
		||||
    lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
 | 
			
		||||
@@ -197,13 +196,14 @@ pub(crate) fn write_typed_chunk_into_index(
 | 
			
		||||
                index_is_empty,
 | 
			
		||||
                |value, _buffer| Ok(value),
 | 
			
		||||
                |new_values, db_values, buffer| {
 | 
			
		||||
                    let (_, new_values) = decode_prefix_string(new_values).unwrap();
 | 
			
		||||
                    let new_values = RoaringBitmap::deserialize_from(new_values)?;
 | 
			
		||||
                    let (db_original, db_values) = decode_prefix_string(db_values).unwrap();
 | 
			
		||||
                    let db_values = RoaringBitmap::deserialize_from(db_values)?;
 | 
			
		||||
                    let values = new_values | db_values;
 | 
			
		||||
                    encode_prefix_string(db_original, buffer)?;
 | 
			
		||||
                    Ok(values.serialize_into(buffer)?)
 | 
			
		||||
                    todo!()
 | 
			
		||||
                    // let (_, new_values) = decode_prefix_string(new_values).unwrap();
 | 
			
		||||
                    // let new_values = RoaringBitmap::deserialize_from(new_values)?;
 | 
			
		||||
                    // let (db_original, db_values) = decode_prefix_string(db_values).unwrap();
 | 
			
		||||
                    // let db_values = RoaringBitmap::deserialize_from(db_values)?;
 | 
			
		||||
                    // let values = new_values | db_values;
 | 
			
		||||
                    // encode_prefix_string(db_original, buffer)?;
 | 
			
		||||
                    // Ok(values.serialize_into(buffer)?)
 | 
			
		||||
                },
 | 
			
		||||
            )?;
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user