mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Add FacetsUpdate type that wraps incremental and bulk indexing methods
This commit is contained in:
		
				
					committed by
					
						
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							3d145d7f48
						
					
				
				
					commit
					9b55e582cd
				
			@@ -1,5 +1,4 @@
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::convert::TryInto;
 | 
			
		||||
use std::fs::File;
 | 
			
		||||
use std::io;
 | 
			
		||||
@@ -14,12 +13,12 @@ use super::helpers::{
 | 
			
		||||
    valid_lmdb_key, CursorClonableMmap,
 | 
			
		||||
};
 | 
			
		||||
use super::{ClonableMmap, MergeFn};
 | 
			
		||||
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
 | 
			
		||||
use crate::facet::FacetType;
 | 
			
		||||
use crate::update::facet::FacetsUpdate;
 | 
			
		||||
use crate::update::index_documents::helpers::as_cloneable_grenad;
 | 
			
		||||
use crate::update::FacetsUpdateIncremental;
 | 
			
		||||
use crate::{
 | 
			
		||||
    lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint,
 | 
			
		||||
    Index, Result,
 | 
			
		||||
    lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
 | 
			
		||||
    Result,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
pub(crate) enum TypedChunk {
 | 
			
		||||
@@ -138,78 +137,14 @@ pub(crate) fn write_typed_chunk_into_index(
 | 
			
		||||
            )?;
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
        TypedChunk::FieldIdFacetNumberDocids(facet_id_f64_docids_iter) => {
 | 
			
		||||
            // merge cbo roaring bitmaps is not the correct merger because the data in the DB
 | 
			
		||||
            // is FacetGroupValue and not RoaringBitmap
 | 
			
		||||
            // so I need to create my own merging function
 | 
			
		||||
 | 
			
		||||
            // facet_id_string_docids is encoded as:
 | 
			
		||||
            // key: FacetKeyCodec<StrRefCodec>
 | 
			
		||||
            // value: CboRoaringBitmapCodec
 | 
			
		||||
            // basically
 | 
			
		||||
 | 
			
		||||
            // TODO: a condition saying "if I have more than 1/50th of the DB to add,
 | 
			
		||||
            // then I do it in bulk, otherwise I do it incrementally". But instead of 1/50,
 | 
			
		||||
            // it is a ratio I determine empirically
 | 
			
		||||
 | 
			
		||||
            // for now I only do it incrementally, to see if things work
 | 
			
		||||
            let indexer = FacetsUpdateIncremental::new(
 | 
			
		||||
                index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
 | 
			
		||||
 | 
			
		||||
            let mut cursor = facet_id_f64_docids_iter.into_cursor()?;
 | 
			
		||||
            while let Some((key, value)) = cursor.move_on_next()? {
 | 
			
		||||
                let key =
 | 
			
		||||
                    FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                let docids =
 | 
			
		||||
                    CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
 | 
			
		||||
                *new_faceted_docids.entry(key.field_id).or_default() |= docids;
 | 
			
		||||
            }
 | 
			
		||||
            for (field_id, new_docids) in new_faceted_docids {
 | 
			
		||||
                let mut docids = index.number_faceted_documents_ids(wtxn, field_id)?;
 | 
			
		||||
                docids |= new_docids;
 | 
			
		||||
                index.put_number_faceted_documents_ids(wtxn, field_id, &docids)?;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
 | 
			
		||||
            let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
 | 
			
		||||
            indexer.execute(wtxn)?;
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
        TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids) => {
 | 
			
		||||
            // merge cbo roaring bitmaps is not the correct merger because the data in the DB
 | 
			
		||||
            // is FacetGroupValue and not RoaringBitmap
 | 
			
		||||
            // so I need to create my own merging function
 | 
			
		||||
 | 
			
		||||
            // facet_id_string_docids is encoded as:
 | 
			
		||||
            // key: FacetKeyCodec<StrRefCodec>
 | 
			
		||||
            // value: CboRoaringBitmapCodec
 | 
			
		||||
            // basically
 | 
			
		||||
 | 
			
		||||
            // TODO: a condition saying "if I have more than 1/50th of the DB to add,
 | 
			
		||||
            // then I do it in bulk, otherwise I do it incrementally". But instead of 1/50,
 | 
			
		||||
            // it is a ratio I determine empirically
 | 
			
		||||
 | 
			
		||||
            // for now I only do it incrementally, to see if things work
 | 
			
		||||
            let indexer = FacetsUpdateIncremental::new(
 | 
			
		||||
                index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
 | 
			
		||||
            );
 | 
			
		||||
            let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
 | 
			
		||||
 | 
			
		||||
            let mut cursor = facet_id_string_docids.into_cursor()?;
 | 
			
		||||
            while let Some((key, value)) = cursor.move_on_next()? {
 | 
			
		||||
                let key =
 | 
			
		||||
                    FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                let docids =
 | 
			
		||||
                    CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
 | 
			
		||||
                *new_faceted_docids.entry(key.field_id).or_default() |= docids;
 | 
			
		||||
            }
 | 
			
		||||
            for (field_id, new_docids) in new_faceted_docids {
 | 
			
		||||
                let mut docids = index.string_faceted_documents_ids(wtxn, field_id)?;
 | 
			
		||||
                docids |= new_docids;
 | 
			
		||||
                index.put_string_faceted_documents_ids(wtxn, field_id, &docids)?;
 | 
			
		||||
            }
 | 
			
		||||
        TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => {
 | 
			
		||||
            let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter);
 | 
			
		||||
            indexer.execute(wtxn)?;
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
        TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user