mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	Introduce the CboRoaringBitmapCodec merge_deladd_into and use it
This commit is contained in:
		
				
					committed by
					
						 Louis Dureuil
						Louis Dureuil
					
				
			
			
				
	
			
			
			
						parent
						
							2d3f15f82c
						
					
				
				
					commit
					560e8f5613
				
			| @@ -6,6 +6,7 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::heed_codec::BytesDecodeOwned; | use crate::heed_codec::BytesDecodeOwned; | ||||||
|  | use crate::update::del_add::{DelAdd, KvReaderDelAdd}; | ||||||
|  |  | ||||||
| /// This is the limit where using a byteorder became less size efficient | /// This is the limit where using a byteorder became less size efficient | ||||||
| /// than using a direct roaring encoding, it is also the point where we are able | /// than using a direct roaring encoding, it is also the point where we are able | ||||||
| @@ -99,6 +100,28 @@ impl CboRoaringBitmapCodec { | |||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Merges a DelAdd delta into a CboRoaringBitmap. | ||||||
|  |     pub fn merge_deladd_into( | ||||||
|  |         deladd: KvReaderDelAdd<'_>, | ||||||
|  |         previous: &[u8], | ||||||
|  |         buffer: &mut Vec<u8>, | ||||||
|  |     ) -> io::Result<()> { | ||||||
|  |         // Deserialize the bitmap that is already there | ||||||
|  |         let mut previous = Self::deserialize_from(previous)?; | ||||||
|  |  | ||||||
|  |         // Remove integers we no more want in the previous bitmap | ||||||
|  |         if let Some(value) = deladd.get(DelAdd::Deletion) { | ||||||
|  |             previous -= Self::deserialize_from(value)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Insert the new integers we want in the previous bitmap | ||||||
|  |         if let Some(value) = deladd.get(DelAdd::Addition) { | ||||||
|  |             previous |= Self::deserialize_from(value)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         previous.serialize_into(buffer) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { | impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { | ||||||
|   | |||||||
| @@ -134,7 +134,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
| @@ -153,7 +153,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|  |  | ||||||
|             let exact_word_docids_iter = unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?; |             let exact_word_docids_iter = unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?; | ||||||
| @@ -163,7 +163,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|  |  | ||||||
|             let word_fid_docids_iter = unsafe { as_cloneable_grenad(&word_fid_docids_reader) }?; |             let word_fid_docids_iter = unsafe { as_cloneable_grenad(&word_fid_docids_reader) }?; | ||||||
| @@ -173,7 +173,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|  |  | ||||||
|             // create fst from word docids |             // create fst from word docids | ||||||
| @@ -195,7 +195,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
| @@ -216,7 +216,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
| @@ -227,7 +227,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
| @@ -238,7 +238,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
| @@ -249,7 +249,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_is_empty, |                 index_is_empty, | ||||||
|                 deladd_serialize_add_side, |                 deladd_serialize_add_side, | ||||||
|                 merge_cbo_roaring_bitmaps, |                 merge_deladd_cbo_roaring_bitmaps, | ||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
| @@ -388,17 +388,6 @@ fn merge_word_docids_reader_into_fst( | |||||||
|     Ok(builder.into_set()) |     Ok(builder.into_set()) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn merge_cbo_roaring_bitmaps( |  | ||||||
|     new_value: &[u8], |  | ||||||
|     db_value: &[u8], |  | ||||||
|     buffer: &mut Vec<u8>, |  | ||||||
| ) -> Result<()> { |  | ||||||
|     Ok(CboRoaringBitmapCodec::merge_into( |  | ||||||
|         &[Cow::Borrowed(db_value), Cow::Borrowed(new_value)], |  | ||||||
|         buffer, |  | ||||||
|     )?) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// A function that extracts and returns the Add side of a DelAdd obkv. | /// A function that extracts and returns the Add side of a DelAdd obkv. | ||||||
| /// This is useful when there are no previous value in the database and | /// This is useful when there are no previous value in the database and | ||||||
| /// therefore we don't need to do a diff with what's already there. | /// therefore we don't need to do a diff with what's already there. | ||||||
| @@ -409,6 +398,22 @@ fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec<u8>) -> Resul | |||||||
|     Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default()) |     Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// A function that merges a DelAdd of bitmao into an already existing bitmap. | ||||||
|  | /// | ||||||
|  | /// The first argument is the DelAdd obkv of CboRoaringBitmaps and | ||||||
|  | /// the second one is the CboRoaringBitmap to merge into. | ||||||
|  | fn merge_deladd_cbo_roaring_bitmaps( | ||||||
|  |     deladd_obkv: &[u8], | ||||||
|  |     previous: &[u8], | ||||||
|  |     buffer: &mut Vec<u8>, | ||||||
|  | ) -> Result<()> { | ||||||
|  |     Ok(CboRoaringBitmapCodec::merge_deladd_into( | ||||||
|  |         KvReaderDelAdd::new(deladd_obkv), | ||||||
|  |         previous, | ||||||
|  |         buffer, | ||||||
|  |     )?) | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Write provided entries in database using serialize_value function. | /// Write provided entries in database using serialize_value function. | ||||||
| /// merge_values function is used if an entry already exist in the database. | /// merge_values function is used if an entry already exist in the database. | ||||||
| fn write_entries_into_database<R, K, V, FS, FM>( | fn write_entries_into_database<R, K, V, FS, FM>( | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user