mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Rename facet codecs and refine FacetsUpdate API
This commit is contained in:
		
				
					committed by
					
						 Loïc Lecrenier
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							485a72306d
						
					
				
				
					commit
					330c9eb1b2
				
			| @@ -4,7 +4,9 @@ use heed::Result; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level}; | use super::{get_first_facet_value, get_highest_level}; | ||||||
| use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec}; | use crate::heed_codec::facet::{ | ||||||
|  |     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, | ||||||
|  | }; | ||||||
|  |  | ||||||
| pub fn iterate_over_facet_distribution<'t, CB>( | pub fn iterate_over_facet_distribution<'t, CB>( | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
| @@ -78,7 +80,8 @@ where | |||||||
|         if level == 0 { |         if level == 0 { | ||||||
|             return self.iterate_level_0(candidates, starting_bound, group_size); |             return self.iterate_level_0(candidates, starting_bound, group_size); | ||||||
|         } |         } | ||||||
|         let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound }; |         let starting_key = | ||||||
|  |             FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound }; | ||||||
|         let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size); |         let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size); | ||||||
|  |  | ||||||
|         for el in iter { |         for el in iter { | ||||||
| @@ -109,16 +112,14 @@ where | |||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod tests { | mod tests { | ||||||
|     use std::ops::ControlFlow; |     use super::iterate_over_facet_distribution; | ||||||
|  |     use crate::heed_codec::facet::OrderedF64Codec; | ||||||
|  |     use crate::milli_snap; | ||||||
|  |     use crate::search::facet::test::FacetIndex; | ||||||
|     use heed::BytesDecode; |     use heed::BytesDecode; | ||||||
|     use rand::{Rng, SeedableRng}; |     use rand::{Rng, SeedableRng}; | ||||||
|     use roaring::RoaringBitmap; |     use roaring::RoaringBitmap; | ||||||
|  |     use std::ops::ControlFlow; | ||||||
|     use super::iterate_over_facet_distribution; |  | ||||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; |  | ||||||
|     use crate::milli_snap; |  | ||||||
|     use crate::search::facet::test::FacetIndex; |  | ||||||
|  |  | ||||||
|     fn get_simple_index() -> FacetIndex<OrderedF64Codec> { |     fn get_simple_index() -> FacetIndex<OrderedF64Codec> { | ||||||
|         let index = FacetIndex::<OrderedF64Codec>::new(4, 8); |         let index = FacetIndex::<OrderedF64Codec>::new(4, 8); | ||||||
|   | |||||||
| @@ -4,7 +4,9 @@ use heed::BytesEncode; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||||
| use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef}; | use crate::heed_codec::facet::{ | ||||||
|  |     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, | ||||||
|  | }; | ||||||
| use crate::Result; | use crate::Result; | ||||||
|  |  | ||||||
| pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( | pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( | ||||||
| @@ -117,7 +119,8 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | |||||||
|             return self.run_level_0(starting_left_bound, group_size); |             return self.run_level_0(starting_left_bound, group_size); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound }; |         let left_key = | ||||||
|  |             FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound }; | ||||||
|         let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); |         let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); | ||||||
|  |  | ||||||
|         let (mut previous_key, mut previous_value) = iter.next().unwrap()?; |         let (mut previous_key, mut previous_value) = iter.next().unwrap()?; | ||||||
| @@ -258,8 +261,8 @@ mod tests { | |||||||
|     use roaring::RoaringBitmap; |     use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|     use super::find_docids_of_facet_within_bounds; |     use super::find_docids_of_facet_within_bounds; | ||||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; |  | ||||||
|     use crate::heed_codec::facet::FacetGroupKeyCodec; |     use crate::heed_codec::facet::FacetGroupKeyCodec; | ||||||
|  |     use crate::heed_codec::facet::OrderedF64Codec; | ||||||
|     use crate::milli_snap; |     use crate::milli_snap; | ||||||
|     use crate::search::facet::test::FacetIndex; |     use crate::search::facet::test::FacetIndex; | ||||||
|     use crate::snapshot_tests::display_bitmap; |     use crate::snapshot_tests::display_bitmap; | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level}; | use super::{get_first_facet_value, get_highest_level}; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, |     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub fn ascending_facet_sort<'t>( | pub fn ascending_facet_sort<'t>( | ||||||
| @@ -86,7 +86,7 @@ mod tests { | |||||||
|     use rand::{Rng, SeedableRng}; |     use rand::{Rng, SeedableRng}; | ||||||
|     use roaring::RoaringBitmap; |     use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; |     use crate::heed_codec::facet::OrderedF64Codec; | ||||||
|     use crate::milli_snap; |     use crate::milli_snap; | ||||||
|     use crate::search::facet::facet_sort_ascending::ascending_facet_sort; |     use crate::search::facet::facet_sort_ascending::ascending_facet_sort; | ||||||
|     use crate::search::facet::test::FacetIndex; |     use crate::search::facet::test::FacetIndex; | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, |     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub fn descending_facet_sort<'t>( | pub fn descending_facet_sort<'t>( | ||||||
| @@ -37,7 +37,9 @@ struct DescendingFacetSort<'t> { | |||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     stack: Vec<( |     stack: Vec<( | ||||||
|         RoaringBitmap, |         RoaringBitmap, | ||||||
|         std::iter::Take<heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>, |         std::iter::Take< | ||||||
|  |             heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||||
|  |         >, | ||||||
|         Bound<&'t [u8]>, |         Bound<&'t [u8]>, | ||||||
|     )>, |     )>, | ||||||
| } | } | ||||||
| @@ -72,7 +74,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> { | |||||||
|                     if level == 0 { |                     if level == 0 { | ||||||
|                         return Some(Ok(bitmap)); |                         return Some(Ok(bitmap)); | ||||||
|                     } |                     } | ||||||
|                     let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound }; |                     let starting_key_below = | ||||||
|  |                         FacetGroupKey { field_id, level: level - 1, left_bound }; | ||||||
|  |  | ||||||
|                     let end_key_kelow = match *right_bound { |                     let end_key_kelow = match *right_bound { | ||||||
|                         Bound::Included(right) => Bound::Included(FacetGroupKey { |                         Bound::Included(right) => Bound::Included(FacetGroupKey { | ||||||
| @@ -89,15 +92,17 @@ impl<'t> Iterator for DescendingFacetSort<'t> { | |||||||
|                     }; |                     }; | ||||||
|                     let prev_right_bound = *right_bound; |                     let prev_right_bound = *right_bound; | ||||||
|                     *right_bound = Bound::Excluded(left_bound); |                     *right_bound = Bound::Excluded(left_bound); | ||||||
|                     let iter = |                     let iter = match self | ||||||
|                         match self.db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>().rev_range( |                         .db | ||||||
|  |                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||||
|  |                         .rev_range( | ||||||
|                             &self.rtxn, |                             &self.rtxn, | ||||||
|                             &(Bound::Included(starting_key_below), end_key_kelow), |                             &(Bound::Included(starting_key_below), end_key_kelow), | ||||||
|                         ) { |                         ) { | ||||||
|                             Ok(iter) => iter, |                         Ok(iter) => iter, | ||||||
|                             Err(e) => return Some(Err(e.into())), |                         Err(e) => return Some(Err(e.into())), | ||||||
|                         } |                     } | ||||||
|                         .take(group_size as usize); |                     .take(group_size as usize); | ||||||
|  |  | ||||||
|                     self.stack.push((bitmap, iter, prev_right_bound)); |                     self.stack.push((bitmap, iter, prev_right_bound)); | ||||||
|                     continue 'outer; |                     continue 'outer; | ||||||
| @@ -114,8 +119,8 @@ mod tests { | |||||||
|     use rand::{Rng, SeedableRng}; |     use rand::{Rng, SeedableRng}; | ||||||
|     use roaring::RoaringBitmap; |     use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; |     use crate::heed_codec::facet::OrderedF64Codec; | ||||||
|     use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef}; |     use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; | ||||||
|     use crate::milli_snap; |     use crate::milli_snap; | ||||||
|     use crate::search::facet::facet_sort_descending::descending_facet_sort; |     use crate::search::facet::facet_sort_descending::descending_facet_sort; | ||||||
|     use crate::search::facet::test::FacetIndex; |     use crate::search::facet::test::FacetIndex; | ||||||
|   | |||||||
| @@ -1,30 +1,24 @@ | |||||||
|  | use crate::facet::FacetType; | ||||||
|  | use crate::heed_codec::facet::{ | ||||||
|  |     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
|  | }; | ||||||
|  | use crate::update::index_documents::{create_writer, writer_into_reader}; | ||||||
|  | use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | ||||||
|  | use grenad::CompressionType; | ||||||
|  | use heed::types::ByteSlice; | ||||||
|  | use heed::{BytesEncode, Error, RoTxn, RwTxn}; | ||||||
|  | use log::debug; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
| use std::cmp; | use std::cmp; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::num::NonZeroUsize; |  | ||||||
|  |  | ||||||
| use grenad::CompressionType; |  | ||||||
| use heed::types::{ByteSlice, DecodeIgnore}; |  | ||||||
| use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn}; |  | ||||||
| use log::debug; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
|  |  | ||||||
| use crate::error::InternalError; |  | ||||||
| use crate::facet::FacetType; |  | ||||||
| use crate::heed_codec::facet::{ |  | ||||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, |  | ||||||
| }; |  | ||||||
| use crate::update::index_documents::{ |  | ||||||
|     create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader, |  | ||||||
| }; |  | ||||||
| use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; |  | ||||||
|  |  | ||||||
| pub struct FacetsUpdateBulk<'i> { | pub struct FacetsUpdateBulk<'i> { | ||||||
|     index: &'i Index, |     index: &'i Index, | ||||||
|     database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||||
|     level_group_size: usize, |     level_group_size: u8, | ||||||
|     min_level_size: usize, |     min_level_size: u8, | ||||||
|     facet_type: FacetType, |     facet_type: FacetType, | ||||||
|     // None if level 0 does not need to be updated |     // None if level 0 does not need to be updated | ||||||
|     new_data: Option<grenad::Reader<File>>, |     new_data: Option<grenad::Reader<File>>, | ||||||
| @@ -39,9 +33,9 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|         FacetsUpdateBulk { |         FacetsUpdateBulk { | ||||||
|             index, |             index, | ||||||
|             database: match facet_type { |             database: match facet_type { | ||||||
|                 FacetType::String => { |                 FacetType::String => index | ||||||
|                     index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                     .facet_id_string_docids | ||||||
|                 } |                     .remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), | ||||||
|                 FacetType::Number => { |                 FacetType::Number => { | ||||||
|                     index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                     index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||||
|                 } |                 } | ||||||
| @@ -60,9 +54,9 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|         FacetsUpdateBulk { |         FacetsUpdateBulk { | ||||||
|             index, |             index, | ||||||
|             database: match facet_type { |             database: match facet_type { | ||||||
|                 FacetType::String => { |                 FacetType::String => index | ||||||
|                     index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                     .facet_id_string_docids | ||||||
|                 } |                     .remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), | ||||||
|                 FacetType::Number => { |                 FacetType::Number => { | ||||||
|                     index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                     index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||||
|                 } |                 } | ||||||
| @@ -77,14 +71,14 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|     /// The number of elements from the level below that are represented by a single element in the level above |     /// The number of elements from the level below that are represented by a single element in the level above | ||||||
|     /// |     /// | ||||||
|     /// This setting is always greater than or equal to 2. |     /// This setting is always greater than or equal to 2. | ||||||
|     pub fn level_group_size(&mut self, value: NonZeroUsize) -> &mut Self { |     pub fn level_group_size(mut self, value: u8) -> Self { | ||||||
|         self.level_group_size = cmp::max(value.get(), 2); |         self.level_group_size = cmp::max(value, 2); | ||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// The minimum number of elements that a level is allowed to have. |     /// The minimum number of elements that a level is allowed to have. | ||||||
|     pub fn min_level_size(&mut self, value: NonZeroUsize) -> &mut Self { |     pub fn min_level_size(mut self, value: u8) -> Self { | ||||||
|         self.min_level_size = value.get(); |         self.min_level_size = cmp::max(value, 1); | ||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -109,8 +103,6 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|         } |         } | ||||||
|         self.update_level0(wtxn)?; |         self.update_level0(wtxn)?; | ||||||
|  |  | ||||||
|         // let mut nested_wtxn = self.index.env.nested_write_txn(wtxn)?; |  | ||||||
|  |  | ||||||
|         for &field_id in faceted_fields.iter() { |         for &field_id in faceted_fields.iter() { | ||||||
|             let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, &wtxn)?; |             let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, &wtxn)?; | ||||||
|  |  | ||||||
| @@ -119,10 +111,6 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|             for level_reader in level_readers { |             for level_reader in level_readers { | ||||||
|                 let mut cursor = level_reader.into_cursor()?; |                 let mut cursor = level_reader.into_cursor()?; | ||||||
|                 while let Some((k, v)) = cursor.move_on_next()? { |                 while let Some((k, v)) = cursor.move_on_next()? { | ||||||
|                     let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap(); |  | ||||||
|                     let value = FacetGroupValueCodec::bytes_decode(v).unwrap(); |  | ||||||
|                     println!("inserting {key:?} {value:?}"); |  | ||||||
|  |  | ||||||
|                     self.database.remap_types::<ByteSlice, ByteSlice>().put(wtxn, k, v)?; |                     self.database.remap_types::<ByteSlice, ByteSlice>().put(wtxn, k, v)?; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| @@ -141,14 +129,12 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|             let mut database = self.database.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>(); |             let mut database = self.database.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>(); | ||||||
|             let mut cursor = new_data.into_cursor()?; |             let mut cursor = new_data.into_cursor()?; | ||||||
|             while let Some((key, value)) = cursor.move_on_next()? { |             while let Some((key, value)) = cursor.move_on_next()? { | ||||||
|                 if valid_lmdb_key(key) { |                 buffer.clear(); | ||||||
|                     buffer.clear(); |                 // the group size for level 0 | ||||||
|                     // the group size for level 0 |                 buffer.push(1); | ||||||
|                     buffer.push(1); |                 // then we extend the buffer with the docids bitmap | ||||||
|                     // then we extend the buffer with the docids bitmap |                 buffer.extend_from_slice(value); | ||||||
|                     buffer.extend_from_slice(value); |                 unsafe { database.append(key, &buffer)? }; | ||||||
|                     unsafe { database.append(key, &buffer)? }; |  | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             let mut buffer = Vec::new(); |             let mut buffer = Vec::new(); | ||||||
| @@ -156,25 +142,24 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|  |  | ||||||
|             let mut cursor = new_data.into_cursor()?; |             let mut cursor = new_data.into_cursor()?; | ||||||
|             while let Some((key, value)) = cursor.move_on_next()? { |             while let Some((key, value)) = cursor.move_on_next()? { | ||||||
|                 if valid_lmdb_key(key) { |                 // the value is a CboRoaringBitmap, but I still need to prepend the | ||||||
|                     buffer.clear(); |                 // group size for level 0 (= 1) to it | ||||||
|                     // the group size for level 0 |                 buffer.clear(); | ||||||
|                     buffer.push(1); |                 buffer.push(1); | ||||||
|                     // then we extend the buffer with the docids bitmap |                 // then we extend the buffer with the docids bitmap | ||||||
|                     match database.get(wtxn, key)? { |                 match database.get(wtxn, key)? { | ||||||
|                         Some(prev_value) => { |                     Some(prev_value) => { | ||||||
|                             let old_bitmap = &prev_value[1..]; |                         let old_bitmap = &prev_value[1..]; | ||||||
|                             CboRoaringBitmapCodec::merge_into( |                         CboRoaringBitmapCodec::merge_into( | ||||||
|                                 &[Cow::Borrowed(value), Cow::Borrowed(old_bitmap)], |                             &[Cow::Borrowed(value), Cow::Borrowed(old_bitmap)], | ||||||
|                                 &mut buffer, |                             &mut buffer, | ||||||
|                             )?; |                         )?; | ||||||
|                         } |                     } | ||||||
|                         None => { |                     None => { | ||||||
|                             buffer.extend_from_slice(value); |                         buffer.extend_from_slice(value); | ||||||
|                         } |                     } | ||||||
|                     }; |                 }; | ||||||
|                     database.put(wtxn, key, &buffer)?; |                 database.put(wtxn, key, &buffer)?; | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -186,7 +171,7 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|         txn: &RoTxn, |         txn: &RoTxn, | ||||||
|     ) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> { |     ) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> { | ||||||
|         // TODO: first check whether there is anything in level 0 |         // TODO: first check whether there is anything in level 0? | ||||||
|         let algo = ComputeHigherLevels { |         let algo = ComputeHigherLevels { | ||||||
|             rtxn: txn, |             rtxn: txn, | ||||||
|             db: &self.database, |             db: &self.database, | ||||||
| @@ -212,8 +197,8 @@ struct ComputeHigherLevels<'t> { | |||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     level_group_size: usize, |     level_group_size: u8, | ||||||
|     min_level_size: usize, |     min_level_size: u8, | ||||||
| } | } | ||||||
| impl<'t> ComputeHigherLevels<'t> { | impl<'t> ComputeHigherLevels<'t> { | ||||||
|     fn read_level_0( |     fn read_level_0( | ||||||
| @@ -248,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> { | |||||||
|             } |             } | ||||||
|             bitmaps.push(docids); |             bitmaps.push(docids); | ||||||
|  |  | ||||||
|             if bitmaps.len() == self.level_group_size { |             if bitmaps.len() == self.level_group_size as usize { | ||||||
|                 handle_group(&bitmaps, left_bound)?; |                 handle_group(&bitmaps, left_bound)?; | ||||||
|                 first_iteration_for_new_group = true; |                 first_iteration_for_new_group = true; | ||||||
|                 bitmaps.clear(); |                 bitmaps.clear(); | ||||||
| @@ -265,9 +250,8 @@ impl<'t> ComputeHigherLevels<'t> { | |||||||
|     /// Compute the content of the database levels from its level 0 for the given field id. |     /// Compute the content of the database levels from its level 0 for the given field id. | ||||||
|     /// |     /// | ||||||
|     /// ## Returns: |     /// ## Returns: | ||||||
|     /// 1. a vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1` |     /// A vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1` | ||||||
|     /// that must be inserted into the database. |     /// that must be inserted into the database. | ||||||
|     /// 2. a roaring bitmap of all the document ids present in the database |  | ||||||
|     fn compute_higher_levels( |     fn compute_higher_levels( | ||||||
|         &self, |         &self, | ||||||
|         level: u8, |         level: u8, | ||||||
| @@ -302,7 +286,7 @@ impl<'t> ComputeHigherLevels<'t> { | |||||||
|                 left_bounds.push(left_bound); |                 left_bounds.push(left_bound); | ||||||
|  |  | ||||||
|                 bitmaps.push(combined_bitmap); |                 bitmaps.push(combined_bitmap); | ||||||
|                 if bitmaps.len() != self.level_group_size { |                 if bitmaps.len() != self.level_group_size as usize { | ||||||
|                     return Ok(()); |                     return Ok(()); | ||||||
|                 } |                 } | ||||||
|                 let left_bound = left_bounds.first().unwrap(); |                 let left_bound = left_bounds.first().unwrap(); | ||||||
| @@ -312,8 +296,8 @@ impl<'t> ComputeHigherLevels<'t> { | |||||||
|                     bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) |                     bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) | ||||||
|                 { |                 { | ||||||
|                     let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; |                     let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; | ||||||
|                     let key = |                     let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key) | ||||||
|                         FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?; |                         .ok_or(Error::Encoding)?; | ||||||
|                     let value = FacetGroupValue { size: group_size, bitmap }; |                     let value = FacetGroupValue { size: group_size, bitmap }; | ||||||
|                     let value = |                     let value = | ||||||
|                         FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; |                         FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; | ||||||
| @@ -330,8 +314,8 @@ impl<'t> ComputeHigherLevels<'t> { | |||||||
|                 bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) |                 bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) | ||||||
|             { |             { | ||||||
|                 let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; |                 let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; | ||||||
|                 let key = |                 let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key) | ||||||
|                     FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?; |                     .ok_or(Error::Encoding)?; | ||||||
|                 let value = FacetGroupValue { size: group_size, bitmap }; |                 let value = FacetGroupValue { size: group_size, bitmap }; | ||||||
|                 let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; |                 let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; | ||||||
|                 cur_writer.insert(key, value)?; |                 cur_writer.insert(key, value)?; | ||||||
| @@ -340,6 +324,10 @@ impl<'t> ComputeHigherLevels<'t> { | |||||||
|         } |         } | ||||||
|         if cur_writer_len > self.min_level_size { |         if cur_writer_len > self.min_level_size { | ||||||
|             sub_writers.push(writer_into_reader(cur_writer)?); |             sub_writers.push(writer_into_reader(cur_writer)?); | ||||||
|  |         } else { | ||||||
|  |             if !bitmaps.is_empty() { | ||||||
|  |                 handle_group(&bitmaps, left_bounds.first().unwrap())?; | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|         return Ok(sub_writers); |         return Ok(sub_writers); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ use heed::{BytesDecode, Error, RoTxn, RwTxn}; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, |     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
| }; | }; | ||||||
| use crate::search::facet::get_highest_level; | use crate::search::facet::get_highest_level; | ||||||
| use crate::Result; | use crate::Result; | ||||||
| @@ -20,14 +20,26 @@ enum DeletionResult { | |||||||
|  |  | ||||||
| pub struct FacetsUpdateIncremental { | pub struct FacetsUpdateIncremental { | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||||
|     group_size: usize, |     group_size: u8, | ||||||
|     min_level_size: usize, |     min_level_size: u8, | ||||||
|     max_group_size: usize, |     max_group_size: u8, | ||||||
| } | } | ||||||
| impl FacetsUpdateIncremental { | impl FacetsUpdateIncremental { | ||||||
|     pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self { |     pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self { | ||||||
|         Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 } |         Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 } | ||||||
|     } |     } | ||||||
|  |     pub fn group_size(mut self, size: u8) -> Self { | ||||||
|  |         self.group_size = size; | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |     pub fn min_level_size(mut self, size: u8) -> Self { | ||||||
|  |         self.min_level_size = size; | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |     pub fn max_group_size(mut self, size: u8) -> Self { | ||||||
|  |         self.max_group_size = size; | ||||||
|  |         self | ||||||
|  |     } | ||||||
| } | } | ||||||
| impl FacetsUpdateIncremental { | impl FacetsUpdateIncremental { | ||||||
|     fn find_insertion_key_value( |     fn find_insertion_key_value( | ||||||
| @@ -178,12 +190,7 @@ impl FacetsUpdateIncremental { | |||||||
|         let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap(); |         let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap(); | ||||||
|  |  | ||||||
|         updated_value.size += 1; |         updated_value.size += 1; | ||||||
|         if updated_value.size as usize == max_group_size { |         if updated_value.size == max_group_size { | ||||||
|             // need to split it |  | ||||||
|             // recompute left element and right element |  | ||||||
|             // replace current group by left element |  | ||||||
|             // add one more group to the right |  | ||||||
|  |  | ||||||
|             let size_left = max_group_size / 2; |             let size_left = max_group_size / 2; | ||||||
|             let size_right = max_group_size - size_left; |             let size_right = max_group_size - size_left; | ||||||
|  |  | ||||||
| @@ -201,7 +208,7 @@ impl FacetsUpdateIncremental { | |||||||
|                 )? |                 )? | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
|  |  | ||||||
|             let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size); |             let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size as usize); | ||||||
|  |  | ||||||
|             let group_left = { |             let group_left = { | ||||||
|                 let mut values_left = RoaringBitmap::new(); |                 let mut values_left = RoaringBitmap::new(); | ||||||
| @@ -234,8 +241,11 @@ impl FacetsUpdateIncremental { | |||||||
|                     values_right |= &value.bitmap; |                     values_right |= &value.bitmap; | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 let key = |                 let key = FacetGroupKey { | ||||||
|                     FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() }; |                     field_id, | ||||||
|  |                     level, | ||||||
|  |                     left_bound: right_start_key.unwrap().to_vec(), | ||||||
|  |                 }; | ||||||
|                 let value = FacetGroupValue { size: size_right as u8, bitmap: values_right }; |                 let value = FacetGroupValue { size: size_right as u8, bitmap: values_right }; | ||||||
|                 (key, value) |                 (key, value) | ||||||
|             }; |             }; | ||||||
| @@ -288,7 +298,7 @@ impl FacetsUpdateIncremental { | |||||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? |             .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? | ||||||
|             .count(); |             .count(); | ||||||
|  |  | ||||||
|         if size_highest_level < self.group_size * self.min_level_size { |         if size_highest_level < self.group_size as usize * self.min_level_size as usize { | ||||||
|             return Ok(()); |             return Ok(()); | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -438,7 +448,7 @@ impl FacetsUpdateIncremental { | |||||||
|                 .as_polymorph() |                 .as_polymorph() | ||||||
|                 .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? |                 .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? | ||||||
|                 .count() |                 .count() | ||||||
|                 >= self.group_size |                 >= self.min_level_size as usize | ||||||
|         { |         { | ||||||
|             return Ok(()); |             return Ok(()); | ||||||
|         } |         } | ||||||
| @@ -450,7 +460,9 @@ impl FacetsUpdateIncremental { | |||||||
|         while let Some(el) = iter.next() { |         while let Some(el) = iter.next() { | ||||||
|             let (k, _) = el?; |             let (k, _) = el?; | ||||||
|             to_delete.push( |             to_delete.push( | ||||||
|                 FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(), |                 FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k) | ||||||
|  |                     .ok_or(Error::Encoding)? | ||||||
|  |                     .into_owned(), | ||||||
|             ); |             ); | ||||||
|         } |         } | ||||||
|         drop(iter); |         drop(iter); | ||||||
| @@ -469,9 +481,9 @@ mod tests { | |||||||
|     use rand::{Rng, SeedableRng}; |     use rand::{Rng, SeedableRng}; | ||||||
|     use roaring::RoaringBitmap; |     use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; |     use crate::heed_codec::facet::OrderedF64Codec; | ||||||
|     use crate::heed_codec::facet::str_ref::StrRefCodec; |     use crate::heed_codec::facet::StrRefCodec; | ||||||
|     use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef}; |     use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; | ||||||
|     use crate::milli_snap; |     use crate::milli_snap; | ||||||
|     use crate::search::facet::get_highest_level; |     use crate::search::facet::get_highest_level; | ||||||
|     use crate::search::facet::test::FacetIndex; |     use crate::search::facet::test::FacetIndex; | ||||||
|   | |||||||
| @@ -4,7 +4,6 @@ use crate::{ | |||||||
|     heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}, |     heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}, | ||||||
|     CboRoaringBitmapCodec, FieldId, Index, Result, |     CboRoaringBitmapCodec, FieldId, Index, Result, | ||||||
| }; | }; | ||||||
| use grenad::CompressionType; |  | ||||||
| use heed::BytesDecode; | use heed::BytesDecode; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use std::{collections::HashMap, fs::File}; | use std::{collections::HashMap, fs::File}; | ||||||
| @@ -42,26 +41,17 @@ impl<'i> FacetsUpdate<'i> { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // /// The number of elements from the level below that are represented by a single element in the level above |  | ||||||
|     // /// |  | ||||||
|     // /// This setting is always greater than or equal to 2. |  | ||||||
|     // pub fn level_group_size(&mut self, value: u8) -> &mut Self { |  | ||||||
|     //     self.level_group_size = std::cmp::max(value, 2); |  | ||||||
|     //     self |  | ||||||
|     // } |  | ||||||
|  |  | ||||||
|     // /// The minimum number of elements that a level is allowed to have. |  | ||||||
|     // pub fn min_level_size(&mut self, value: u8) -> &mut Self { |  | ||||||
|     //     self.min_level_size = std::cmp::max(value, 1); |  | ||||||
|     //     self |  | ||||||
|     // } |  | ||||||
|  |  | ||||||
|     pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> { |     pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> { | ||||||
|  |         // here, come up with a better condition! | ||||||
|         if self.database.is_empty(wtxn)? { |         if self.database.is_empty(wtxn)? { | ||||||
|             let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data); |             let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data) | ||||||
|  |                 .level_group_size(self.level_group_size) | ||||||
|  |                 .min_level_size(self.min_level_size); | ||||||
|             bulk_update.execute(wtxn)?; |             bulk_update.execute(wtxn)?; | ||||||
|         } else { |         } else { | ||||||
|             let indexer = FacetsUpdateIncremental::new(self.database); |             let indexer = FacetsUpdateIncremental::new(self.database) | ||||||
|  |                 .max_group_size(self.max_level_group_size) | ||||||
|  |                 .min_level_size(self.min_level_size); | ||||||
|  |  | ||||||
|             let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default(); |             let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,8 +16,4 @@ source: milli/src/update/facet/incremental.rs | |||||||
| 0  0  k12       1    "[12, ]" | 0  0  k12       1    "[12, ]" | ||||||
| 0  0  k13       1    "[13, ]" | 0  0  k13       1    "[13, ]" | ||||||
| 0  0  k14       1    "[14, ]" | 0  0  k14       1    "[14, ]" | ||||||
| 0  1  k0        4    "[0, 1, 2, 3, ]" |  | ||||||
| 0  1  k4        4    "[4, 5, 6, 7, ]" |  | ||||||
| 0  1  k8        4    "[8, 9, 10, 11, ]" |  | ||||||
| 0  1  k12       3    "[12, 13, 14, ]" |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user