mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-29 23:16:26 +00:00 
			
		
		
		
	Start porting facet distribution and sort to new database structure
This commit is contained in:
		
				
					committed by
					
						 Loïc Lecrenier
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							7913d6365c
						
					
				
				
					commit
					63ef0aba18
				
			
							
								
								
									
										199
									
								
								milli/src/search/facet/facet_distribution_iter.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										199
									
								
								milli/src/search/facet/facet_distribution_iter.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,199 @@ | ||||
| use roaring::RoaringBitmap; | ||||
| use std::ops::ControlFlow; | ||||
|  | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; | ||||
|  | ||||
| use super::{get_first_facet_value, get_highest_level}; | ||||
|  | ||||
| pub fn iterate_over_facet_distribution<'t, CB>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     candidates: &RoaringBitmap, | ||||
|     callback: CB, | ||||
| ) where | ||||
|     CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, | ||||
| { | ||||
|     let mut fd = FacetDistribution { rtxn, db, field_id, callback }; | ||||
|     let highest_level = | ||||
|         get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id); | ||||
|  | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id) { | ||||
|         fd.iterate(candidates, highest_level, first_bound, usize::MAX); | ||||
|         return; | ||||
|     } else { | ||||
|         return; | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct FacetDistribution<'t, CB> | ||||
| where | ||||
|     CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, | ||||
| { | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     callback: CB, | ||||
| } | ||||
|  | ||||
| impl<'t, CB> FacetDistribution<'t, CB> | ||||
| where | ||||
|     CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, | ||||
| { | ||||
|     fn iterate_level_0( | ||||
|         &mut self, | ||||
|         candidates: &RoaringBitmap, | ||||
|         starting_bound: &'t [u8], | ||||
|         group_size: usize, | ||||
|     ) -> ControlFlow<()> { | ||||
|         let starting_key = | ||||
|             FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound }; | ||||
|         let iter = self.db.range(self.rtxn, &(starting_key..)).unwrap().take(group_size); | ||||
|         for el in iter { | ||||
|             let (key, value) = el.unwrap(); | ||||
|             // The range is unbounded on the right and the group size for the highest level is MAX, | ||||
|             // so we need to check that we are not iterating over the next field id | ||||
|             if key.field_id != self.field_id { | ||||
|                 return ControlFlow::Break(()); | ||||
|             } | ||||
|             let docids_in_common = value.bitmap.intersection_len(candidates); | ||||
|             if docids_in_common > 0 { | ||||
|                 match (self.callback)(key.left_bound, docids_in_common) { | ||||
|                     ControlFlow::Continue(_) => {} | ||||
|                     ControlFlow::Break(_) => return ControlFlow::Break(()), | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         return ControlFlow::Continue(()); | ||||
|     } | ||||
|     fn iterate( | ||||
|         &mut self, | ||||
|         candidates: &RoaringBitmap, | ||||
|         level: u8, | ||||
|         starting_bound: &'t [u8], | ||||
|         group_size: usize, | ||||
|     ) -> ControlFlow<()> { | ||||
|         if level == 0 { | ||||
|             return self.iterate_level_0(candidates, starting_bound, group_size); | ||||
|         } | ||||
|         let starting_key = FacetKey { field_id: self.field_id, level, left_bound: starting_bound }; | ||||
|         let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size); | ||||
|  | ||||
|         for el in iter { | ||||
|             let (key, value) = el.unwrap(); | ||||
|             // The range is unbounded on the right and the group size for the highest level is MAX, | ||||
|             // so we need to check that we are not iterating over the next field id | ||||
|             if key.field_id != self.field_id { | ||||
|                 return ControlFlow::Break(()); | ||||
|             } | ||||
|             let docids_in_common = value.bitmap & candidates; | ||||
|             if docids_in_common.len() > 0 { | ||||
|                 let cf = | ||||
|                     self.iterate(&docids_in_common, level - 1, key.left_bound, value.size as usize); | ||||
|                 match cf { | ||||
|                     ControlFlow::Continue(_) => {} | ||||
|                     ControlFlow::Break(_) => return ControlFlow::Break(()), | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return ControlFlow::Continue(()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use crate::{codec::U16Codec, Index}; | ||||
|     use heed::BytesDecode; | ||||
|     use roaring::RoaringBitmap; | ||||
|     use std::ops::ControlFlow; | ||||
|  | ||||
|     use super::iterate_over_facet_distribution; | ||||
|  | ||||
|     fn get_simple_index() -> Index<U16Codec> { | ||||
|         let index = Index::<U16Codec>::new(4, 8); | ||||
|         let mut txn = index.env.write_txn().unwrap(); | ||||
|         for i in 0..256u16 { | ||||
|             let mut bitmap = RoaringBitmap::new(); | ||||
|             bitmap.insert(i as u32); | ||||
|             index.insert(&mut txn, 0, &i, &bitmap); | ||||
|         } | ||||
|         txn.commit().unwrap(); | ||||
|         index | ||||
|     } | ||||
|     fn get_random_looking_index() -> Index<U16Codec> { | ||||
|         let index = Index::<U16Codec>::new(4, 8); | ||||
|         let mut txn = index.env.write_txn().unwrap(); | ||||
|  | ||||
|         let rng = fastrand::Rng::with_seed(0); | ||||
|         let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::<Vec<u32>>(); | ||||
|  | ||||
|         for (_i, key) in keys.into_iter().enumerate() { | ||||
|             let mut bitmap = RoaringBitmap::new(); | ||||
|             bitmap.insert(key); | ||||
|             bitmap.insert(key + 100); | ||||
|             index.insert(&mut txn, 0, &(key as u16), &bitmap); | ||||
|         } | ||||
|         txn.commit().unwrap(); | ||||
|         index | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn random_looking_index_snap() { | ||||
|         let index = get_random_looking_index(); | ||||
|         insta::assert_display_snapshot!(index) | ||||
|     } | ||||
|     #[test] | ||||
|     fn filter_distribution_all() { | ||||
|         let indexes = [get_simple_index(), get_random_looking_index()]; | ||||
|         for (i, index) in indexes.into_iter().enumerate() { | ||||
|             let txn = index.env.read_txn().unwrap(); | ||||
|             let candidates = (0..=255).into_iter().collect::<RoaringBitmap>(); | ||||
|             let mut results = String::new(); | ||||
|             iterate_over_facet_distribution( | ||||
|                 &txn, | ||||
|                 &index.db.content, | ||||
|                 0, | ||||
|                 &candidates, | ||||
|                 |facet, count| { | ||||
|                     let facet = U16Codec::bytes_decode(facet).unwrap(); | ||||
|                     results.push_str(&format!("{facet}: {count}\n")); | ||||
|                     ControlFlow::Continue(()) | ||||
|                 }, | ||||
|             ); | ||||
|             insta::assert_snapshot!(format!("filter_distribution_{i}_all"), results); | ||||
|  | ||||
|             txn.commit().unwrap(); | ||||
|         } | ||||
|     } | ||||
|     #[test] | ||||
|     fn filter_distribution_all_stop_early() { | ||||
|         let indexes = [get_simple_index(), get_random_looking_index()]; | ||||
|         for (i, index) in indexes.into_iter().enumerate() { | ||||
|             let txn = index.env.read_txn().unwrap(); | ||||
|             let candidates = (0..=255).into_iter().collect::<RoaringBitmap>(); | ||||
|             let mut results = String::new(); | ||||
|             let mut nbr_facets = 0; | ||||
|             iterate_over_facet_distribution( | ||||
|                 &txn, | ||||
|                 &index.db.content, | ||||
|                 0, | ||||
|                 &candidates, | ||||
|                 |facet, count| { | ||||
|                     let facet = U16Codec::bytes_decode(facet).unwrap(); | ||||
|                     if nbr_facets == 100 { | ||||
|                         return ControlFlow::Break(()); | ||||
|                     } else { | ||||
|                         nbr_facets += 1; | ||||
|                         results.push_str(&format!("{facet}: {count}\n")); | ||||
|  | ||||
|                         ControlFlow::Continue(()) | ||||
|                     } | ||||
|                 }, | ||||
|             ); | ||||
|             insta::assert_snapshot!(format!("filter_distribution_{i}_all_stop_early"), results); | ||||
|  | ||||
|             txn.commit().unwrap(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,335 +0,0 @@ | ||||
| // use std::ops::Bound::{self, Excluded, Included, Unbounded}; | ||||
|  | ||||
| // use either::Either::{self, Left, Right}; | ||||
| // use heed::types::{ByteSlice, DecodeIgnore}; | ||||
| // use heed::{BytesDecode, BytesEncode, Database, Lazy, LazyDecode, RoRange, RoRevRange}; | ||||
| // use obkv::Key; | ||||
| // use roaring::RoaringBitmap; | ||||
|  | ||||
| // use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
| // use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec}; | ||||
| // use crate::heed_codec::CboRoaringBitmapCodec; | ||||
| // use crate::{FieldId, Index}; | ||||
|  | ||||
| // pub struct FacetNumberRange<'t, 'e> { | ||||
| //     rtxn: &'t heed::RoTxn<'e>, | ||||
| //     db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
| //     iter: RoRange<'t, FacetKeyCodec<OrderedF64Codec>, LazyDecode<FacetGroupValueCodec>>, | ||||
| //     max_bound: f64, | ||||
| //     previous: Option<(FacetKey<f64>, Lazy<'t, FacetGroupValueCodec>)>, | ||||
| //     field_id: FieldId, | ||||
| //     end: Bound<f64>, | ||||
| // } | ||||
|  | ||||
| // impl<'t, 'e> FacetNumberRange<'t, 'e> { | ||||
| //     pub fn new( | ||||
| //         rtxn: &'t heed::RoTxn<'e>, | ||||
| //         db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
| //         field_id: FieldId, | ||||
| //         level: u8, | ||||
| //         left: Bound<f64>, | ||||
| //         right: Bound<f64>, | ||||
| //     ) -> heed::Result<FacetNumberRange<'t, 'e>> { | ||||
| //         let left_bound = match left { | ||||
| //             Included(left_bound) => Included(FacetKey { field_id, level, left_bound }), | ||||
| //             Excluded(left_bound) => Excluded(FacetKey { field_id, level, left_bound }), | ||||
| //             Unbounded => Included(FacetKey { field_id, level, left_bound: f64::MIN }), | ||||
| //         }; | ||||
|  | ||||
| //         let mut iter = db.lazily_decode_data().range(rtxn, &(left_bound, Unbounded))?; | ||||
| //         let mut previous = iter.next().transpose()?; | ||||
|  | ||||
| //         // Compute the maximum end bound by looking at the key of the last element in level 0 | ||||
| //         let mut prefix_level_0 = vec![]; | ||||
| //         prefix_level_0.extend_from_slice(&field_id.to_be_bytes()); | ||||
| //         prefix_level_0.push(level); | ||||
|  | ||||
| //         let mut rev_iter = | ||||
| //             db.as_polymorph().rev_prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, &prefix_level_0)?; | ||||
|  | ||||
| //         let rev_iter_first = rev_iter.next().transpose()?; | ||||
| //         let max_bound = if let Some((max_bound_key, _)) = rev_iter_first { | ||||
| //             let max_bound_key = | ||||
| //                 FacetKeyCodec::<OrderedF64Codec>::bytes_decode(max_bound_key).unwrap(); | ||||
| //             max_bound_key.left_bound | ||||
| //         } else { | ||||
| //             // I can't imagine when that would happen, but let's handle it correctly anyway | ||||
| //             // by making the iterator empty | ||||
| //             previous = None; | ||||
| //             0.0 // doesn't matter since previous = None so the iterator will always early exit | ||||
| //                 // and return None itself | ||||
| //         }; | ||||
|  | ||||
| //         Ok(FacetNumberRange { rtxn, db, iter, field_id, previous, max_bound, end: right }) | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t, 'e> Iterator for FacetNumberRange<'t, 'e> { | ||||
| //     type Item = heed::Result<(FacetKey<f64>, RoaringBitmap)>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         // The idea here is to return the **previous** element only if the left | ||||
| //         // bound of the current key fits within the range given to the iter | ||||
| //         // if it doesn't, then there is still a chance that it must be returned, | ||||
| //         // but we need to check the actual right bound of the group by looking for | ||||
| //         // the key preceding the first key of the next group in level 0 | ||||
|  | ||||
| //         let (prev_key, prev_value) = self.previous?; | ||||
|  | ||||
| //         let (next_left_bound, next_previous) = if let Some(next) = self.iter.next() { | ||||
| //             let (key, group_value) = match next { | ||||
| //                 Ok(n) => n, | ||||
| //                 Err(e) => return Some(Err(e)), | ||||
| //             }; | ||||
| //             (key.left_bound, Some((key, group_value))) | ||||
| //         } else { | ||||
| //             // we're at the end of the level iter, so we need to fetch the max bound instead | ||||
| //             (self.max_bound, None) | ||||
| //         }; | ||||
| //         let must_be_returned = match self.end { | ||||
| //             Included(end) => next_left_bound <= end, | ||||
| //             Excluded(end) => next_left_bound < end, | ||||
| //             Unbounded => true, | ||||
| //         }; | ||||
| //         if must_be_returned { | ||||
| //             match prev_value.decode() { | ||||
| //                 Ok(group_value) => { | ||||
| //                     self.previous = next_previous; | ||||
| //                     Some(Ok((prev_key, group_value.bitmap))) | ||||
| //                 } | ||||
| //                 Err(e) => Some(Err(e)), | ||||
| //             } | ||||
| //         } else { | ||||
| //             // it still possible that we want to return the value (one last time) | ||||
| //             // but to do so, we need to fetch the right bound of the current group | ||||
| //             // this is done by getting the first element at level 0 of the next group | ||||
| //             // then iterating in reverse from it | ||||
| //             // once we have the right bound, we can compare it, and then return or not | ||||
| //             // then we still set self.previous to None so that no other element can return | ||||
| //             // from it? | ||||
| //             let mut level_0_key_prefix = vec![]; | ||||
| //             level_0_key_prefix.extend_from_slice(&self.field_id.to_be_bytes()); | ||||
| //             level_0_key_prefix.push(0); | ||||
| //             let key = | ||||
| //                 FacetKey::<f64> { field_id: self.field_id, level: 0, left_bound: next_left_bound }; | ||||
| //             let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap(); | ||||
| //             level_0_key_prefix.extend_from_slice(&key_bytes); | ||||
|  | ||||
| //             let mut rev_iter_next_group_level_0 = self | ||||
| //                 .db | ||||
| //                 .as_polymorph() | ||||
| //                 .rev_prefix_iter::<_, ByteSlice, ByteSlice>(&self.rtxn, &level_0_key_prefix) | ||||
| //                 .unwrap(); | ||||
| //             let (key_for_right_bound, _) = rev_iter_next_group_level_0.next().unwrap().unwrap(); | ||||
| //             let key_for_right_bound = | ||||
| //                 FacetKeyCodec::<OrderedF64Codec>::bytes_decode(key_for_right_bound).unwrap(); | ||||
| //             let right_bound = key_for_right_bound.left_bound; | ||||
| //             let must_be_returned = match self.end { | ||||
| //                 Included(end) => right_bound <= end, | ||||
| //                 Excluded(end) => right_bound < end, | ||||
| //                 Unbounded => unreachable!(), | ||||
| //             }; | ||||
| //             self.previous = None; | ||||
| //             if must_be_returned { | ||||
| //                 match prev_value.decode() { | ||||
| //                     Ok(group_value) => Some(Ok((prev_key, group_value.bitmap))), | ||||
| //                     Err(e) => Some(Err(e)), | ||||
| //                 } | ||||
| //             } else { | ||||
| //                 None | ||||
| //             } | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // pub struct FacetNumberRevRange<'t> { | ||||
| //     iter: RoRevRange<'t, FacetKeyCodec<OrderedF64Codec>, LazyDecode<FacetGroupValueCodec>>, | ||||
| //     end: Bound<f64>, | ||||
| // } | ||||
|  | ||||
| // impl<'t> FacetNumberRevRange<'t> { | ||||
| //     pub fn new( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
| //         field_id: FieldId, | ||||
| //         level: u8, | ||||
| //         left: Bound<f64>, | ||||
| //         right: Bound<f64>, | ||||
| //     ) -> heed::Result<FacetNumberRevRange<'t>> { | ||||
| //         let left_bound = match left { | ||||
| //             Included(left) => Included(FacetKey { field_id, level, left_bound: left }), | ||||
| //             Excluded(left) => Excluded(FacetKey { field_id, level, left_bound: left }), | ||||
| //             Unbounded => Included(FacetKey { field_id, level, left_bound: f64::MIN }), | ||||
| //         }; | ||||
| //         let right_bound = Included(FacetKey { field_id, level, left_bound: f64::MAX }); | ||||
| //         let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?; | ||||
| //         Ok(FacetNumberRevRange { iter, end: right }) | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t> Iterator for FacetNumberRevRange<'t> { | ||||
| //     type Item = heed::Result<(FacetKey<f64>, RoaringBitmap)>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         loop { | ||||
| //             match self.iter.next() { | ||||
| //                 Some(Ok((FacetKey { field_id, level, left_bound }, docids))) => { | ||||
| //                     let must_be_returned = match self.end { | ||||
| //                         Included(end) => todo!(), //right <= end, | ||||
| //                         Excluded(end) => todo!(), //right < end, | ||||
| //                         Unbounded => true, | ||||
| //                     }; | ||||
| //                     if must_be_returned { | ||||
| //                         match docids.decode() { | ||||
| //                             Ok(docids) => { | ||||
| //                                 return Some(Ok(( | ||||
| //                                     FacetKey { field_id, level, left_bound }, | ||||
| //                                     docids.bitmap, | ||||
| //                                 ))) | ||||
| //                             } | ||||
| //                             Err(e) => return Some(Err(e)), | ||||
| //                         } | ||||
| //                     } | ||||
| //                     continue; | ||||
| //                 } | ||||
| //                 Some(Err(e)) => return Some(Err(e)), | ||||
| //                 None => return None, | ||||
| //             } | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // pub struct FacetNumberIter<'t, 'e> { | ||||
| //     rtxn: &'t heed::RoTxn<'t>, | ||||
| //     db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
| //     field_id: FieldId, | ||||
| //     level_iters: Vec<(RoaringBitmap, Either<FacetNumberRange<'t, 'e>, FacetNumberRevRange<'t>>)>, | ||||
| //     must_reduce: bool, | ||||
| // } | ||||
|  | ||||
| // impl<'t, 'e> FacetNumberIter<'t, 'e> { | ||||
| //     /// Create a `FacetNumberIter` that will iterate on the different facet entries | ||||
| //     /// (facet value + documents ids) and that will reduce the given documents ids | ||||
| //     /// while iterating on the different facet levels. | ||||
| //     pub fn new_reducing( | ||||
| //         rtxn: &'t heed::RoTxn<'e>, | ||||
| //         index: &'t Index, | ||||
| //         field_id: FieldId, | ||||
| //         documents_ids: RoaringBitmap, | ||||
| //     ) -> heed::Result<FacetNumberIter<'t, 'e>> { | ||||
| //         let db = index.facet_id_f64_docids; | ||||
| //         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
| //         let highest_iter = | ||||
| //             FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||
| //         let level_iters = vec![(documents_ids, Left(highest_iter))]; | ||||
| //         Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true }) | ||||
| //     } | ||||
|  | ||||
| //     /// Create a `FacetNumberIter` that will iterate on the different facet entries in reverse | ||||
| //     /// (facet value + documents ids) and that will reduce the given documents ids | ||||
| //     /// while iterating on the different facet levels. | ||||
| //     pub fn new_reverse_reducing( | ||||
| //         rtxn: &'t heed::RoTxn<'e>, | ||||
| //         index: &'t Index, | ||||
| //         field_id: FieldId, | ||||
| //         documents_ids: RoaringBitmap, | ||||
| //     ) -> heed::Result<FacetNumberIter<'t, 'e>> { | ||||
| //         let db = index.facet_id_f64_docids; | ||||
| //         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
| //         let highest_iter = | ||||
| //             FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||
| //         let level_iters = vec![(documents_ids, Right(highest_iter))]; | ||||
| //         Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true }) | ||||
| //     } | ||||
|  | ||||
| //     /// Create a `FacetNumberIter` that will iterate on the different facet entries | ||||
| //     /// (facet value + documents ids) and that will not reduce the given documents ids | ||||
| //     /// while iterating on the different facet levels, possibly returning multiple times | ||||
| //     /// a document id associated with multiple facet values. | ||||
| //     pub fn new_non_reducing( | ||||
| //         rtxn: &'t heed::RoTxn<'e>, | ||||
| //         index: &'t Index, | ||||
| //         field_id: FieldId, | ||||
| //         documents_ids: RoaringBitmap, | ||||
| //     ) -> heed::Result<FacetNumberIter<'t, 'e>> { | ||||
| //         let db = index.facet_id_f64_docids; | ||||
| //         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
| //         let highest_iter = | ||||
| //             FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||
| //         let level_iters = vec![(documents_ids, Left(highest_iter))]; | ||||
| //         Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: false }) | ||||
| //     } | ||||
|  | ||||
| //     fn highest_level<X>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         db: Database<FacetKeyCodec<OrderedF64Codec>, X>, | ||||
| //         fid: FieldId, | ||||
| //     ) -> heed::Result<Option<u8>> { | ||||
| //         let level = db | ||||
| //             .remap_types::<ByteSlice, DecodeIgnore>() | ||||
| //             .prefix_iter(rtxn, &fid.to_be_bytes())? | ||||
| //             .remap_key_type::<FacetKeyCodec<OrderedF64Codec>>() | ||||
| //             .last() | ||||
| //             .transpose()? | ||||
| //             .map(|(key, _)| key.level); | ||||
| //         Ok(level) | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t, 'e> Iterator for FacetNumberIter<'t, 'e> { | ||||
| //     type Item = heed::Result<(f64, RoaringBitmap)>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         'outer: loop { | ||||
| //             let (documents_ids, last) = self.level_iters.last_mut()?; | ||||
| //             let is_ascending = last.is_left(); | ||||
| //             for result in last { | ||||
| //                 // If the last iterator must find an empty set of documents it means | ||||
| //                 // that we found all the documents in the sub level iterations already, | ||||
| //                 // we can pop this level iterator. | ||||
| //                 if documents_ids.is_empty() { | ||||
| //                     break; | ||||
| //                 } | ||||
|  | ||||
| //                 match result { | ||||
| //                     Ok((key, mut docids)) => { | ||||
| //                         docids &= &*documents_ids; | ||||
| //                         if !docids.is_empty() { | ||||
| //                             if self.must_reduce { | ||||
| //                                 *documents_ids -= &docids; | ||||
| //                             } | ||||
|  | ||||
| //                             if level == 0 { | ||||
| //                                 return Some(Ok((left, docids))); | ||||
| //                             } | ||||
|  | ||||
| //                             let rtxn = self.rtxn; | ||||
| //                             let db = self.db; | ||||
| //                             let fid = self.field_id; | ||||
| //                             let left = Included(left); | ||||
| //                             let right = Included(right); | ||||
|  | ||||
| //                             let result = if is_ascending { | ||||
| //                                 FacetNumberRange::new(rtxn, db, fid, level - 1, left, right) | ||||
| //                                     .map(Left) | ||||
| //                             } else { | ||||
| //                                 FacetNumberRevRange::new(rtxn, db, fid, level - 1, left, right) | ||||
| //                                     .map(Right) | ||||
| //                             }; | ||||
|  | ||||
| //                             match result { | ||||
| //                                 Ok(iter) => { | ||||
| //                                     self.level_iters.push((docids, iter)); | ||||
| //                                     continue 'outer; | ||||
| //                                 } | ||||
| //                                 Err(e) => return Some(Err(e)), | ||||
| //                             } | ||||
| //                         } | ||||
| //                     } | ||||
| //                     Err(e) => return Some(Err(e)), | ||||
| //                 } | ||||
| //             } | ||||
| //             self.level_iters.pop(); | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
							
								
								
									
										147
									
								
								milli/src/search/facet/facet_sort_ascending.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								milli/src/search/facet/facet_sort_ascending.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::facet::new::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, | ||||
| }; | ||||
|  | ||||
| use super::{get_first_facet_value, get_highest_level}; | ||||
|  | ||||
| pub fn ascending_facet_sort<'t>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     candidates: RoaringBitmap, | ||||
| ) -> Box<dyn Iterator<Item = (&'t [u8], RoaringBitmap)> + 't> { | ||||
|     let highest_level = | ||||
|         get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id); | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>( | ||||
|         rtxn, | ||||
|         &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), | ||||
|         field_id, | ||||
|     ) { | ||||
|         let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; | ||||
|         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); | ||||
|  | ||||
|         Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }) | ||||
|     } else { | ||||
|         return Box::new(std::iter::empty()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct AscendingFacetSort<'t, 'e> { | ||||
|     rtxn: &'t heed::RoTxn<'e>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     stack: Vec<( | ||||
|         RoaringBitmap, | ||||
|         std::iter::Take<heed::RoRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>, | ||||
|     )>, | ||||
| } | ||||
|  | ||||
| impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { | ||||
|     type Item = (&'t [u8], RoaringBitmap); | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         'outer: loop { | ||||
|             let (documents_ids, deepest_iter) = self.stack.last_mut()?; | ||||
|             for result in deepest_iter { | ||||
|                 let ( | ||||
|                     FacetKey { level, left_bound, field_id }, | ||||
|                     FacetGroupValue { size: group_size, mut bitmap }, | ||||
|                 ) = result.unwrap(); | ||||
|                 // The range is unbounded on the right and the group size for the highest level is MAX, | ||||
|                 // so we need to check that we are not iterating over the next field id | ||||
|                 if field_id != self.field_id { | ||||
|                     return None; | ||||
|                 } | ||||
|  | ||||
|                 // If the last iterator found an empty set of documents it means | ||||
|                 // that we found all the documents in the sub level iterations already, | ||||
|                 // we can pop this level iterator. | ||||
|                 if documents_ids.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|  | ||||
|                 bitmap &= &*documents_ids; | ||||
|                 if !bitmap.is_empty() { | ||||
|                     *documents_ids -= &bitmap; | ||||
|  | ||||
|                     if level == 0 { | ||||
|                         return Some((left_bound, bitmap)); | ||||
|                     } | ||||
|                     let starting_key_below = | ||||
|                         FacetKey { field_id: self.field_id, level: level - 1, left_bound }; | ||||
|                     let iter = self | ||||
|                         .db | ||||
|                         .range(&self.rtxn, &(starting_key_below..)) | ||||
|                         .unwrap() | ||||
|                         .take(group_size as usize); | ||||
|  | ||||
|                     self.stack.push((bitmap, iter)); | ||||
|                     continue 'outer; | ||||
|                 } | ||||
|             } | ||||
|             self.stack.pop(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use crate::{ | ||||
|         ascending_facet_sort::ascending_facet_sort, codec::U16Codec, display_bitmap, Index, | ||||
|     }; | ||||
|     use heed::BytesDecode; | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     fn get_simple_index() -> Index<U16Codec> { | ||||
|         let index = Index::<U16Codec>::new(4, 8); | ||||
|         let mut txn = index.env.write_txn().unwrap(); | ||||
|         for i in 0..256u16 { | ||||
|             let mut bitmap = RoaringBitmap::new(); | ||||
|             bitmap.insert(i as u32); | ||||
|             index.insert(&mut txn, 0, &i, &bitmap); | ||||
|         } | ||||
|         txn.commit().unwrap(); | ||||
|         index | ||||
|     } | ||||
|     fn get_random_looking_index() -> Index<U16Codec> { | ||||
|         let index = Index::<U16Codec>::new(4, 8); | ||||
|         let mut txn = index.env.write_txn().unwrap(); | ||||
|  | ||||
|         let rng = fastrand::Rng::with_seed(0); | ||||
|         let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::<Vec<u32>>(); | ||||
|  | ||||
|         for (_i, key) in keys.into_iter().enumerate() { | ||||
|             let mut bitmap = RoaringBitmap::new(); | ||||
|             bitmap.insert(key); | ||||
|             bitmap.insert(key + 100); | ||||
|             index.insert(&mut txn, 0, &(key as u16), &bitmap); | ||||
|         } | ||||
|         txn.commit().unwrap(); | ||||
|         index | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn random_looking_index_snap() { | ||||
|         let index = get_random_looking_index(); | ||||
|         insta::assert_display_snapshot!(index) | ||||
|     } | ||||
|     #[test] | ||||
|     fn filter_sort() { | ||||
|         let indexes = [get_simple_index(), get_random_looking_index()]; | ||||
|         for (i, index) in indexes.into_iter().enumerate() { | ||||
|             let txn = index.env.read_txn().unwrap(); | ||||
|             let candidates = (200..=300).into_iter().collect::<RoaringBitmap>(); | ||||
|             let mut results = String::new(); | ||||
|             let iter = ascending_facet_sort(&txn, &index.db.content, 0, candidates); | ||||
|             for (facet, docids) in iter { | ||||
|                 let facet = U16Codec::bytes_decode(facet).unwrap(); | ||||
|                 results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids))); | ||||
|             } | ||||
|             insta::assert_snapshot!(format!("filter_sort_{i}_ascending"), results); | ||||
|  | ||||
|             txn.commit().unwrap(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										172
									
								
								milli/src/search/facet/facet_sort_descending.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								milli/src/search/facet/facet_sort_descending.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,172 @@ | ||||
| use std::ops::Bound; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::facet::new::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, | ||||
| }; | ||||
|  | ||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||
|  | ||||
| fn descending_facet_sort<'t>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     candidates: RoaringBitmap, | ||||
| ) -> Box<dyn Iterator<Item = (&'t [u8], RoaringBitmap)> + 't> { | ||||
|     let highest_level = get_highest_level(rtxn, db, field_id); | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id) { | ||||
|         let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; | ||||
|         let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id).unwrap(); | ||||
|         let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound }; | ||||
|         let iter = db.rev_range(rtxn, &(first_key..=last_key)).unwrap().take(usize::MAX); | ||||
|         Box::new(DescendingFacetSort { | ||||
|             rtxn, | ||||
|             db, | ||||
|             field_id, | ||||
|             stack: vec![(candidates, iter, Bound::Included(last_bound))], | ||||
|         }) | ||||
|     } else { | ||||
|         return Box::new(std::iter::empty()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct DescendingFacetSort<'t> { | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     stack: Vec<( | ||||
|         RoaringBitmap, | ||||
|         std::iter::Take<heed::RoRevRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>, | ||||
|         Bound<&'t [u8]>, | ||||
|     )>, | ||||
| } | ||||
|  | ||||
| impl<'t> Iterator for DescendingFacetSort<'t> { | ||||
|     type Item = (&'t [u8], RoaringBitmap); | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         'outer: loop { | ||||
|             let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?; | ||||
|             while let Some(result) = deepest_iter.next() { | ||||
|                 let ( | ||||
|                     FacetKey { level, left_bound, field_id }, | ||||
|                     FacetGroupValue { size: group_size, mut bitmap }, | ||||
|                 ) = result.unwrap(); | ||||
|                 // The range is unbounded on the right and the group size for the highest level is MAX, | ||||
|                 // so we need to check that we are not iterating over the next field id | ||||
|                 if field_id != self.field_id { | ||||
|                     return None; | ||||
|                 } | ||||
|                 // If the last iterator found an empty set of documents it means | ||||
|                 // that we found all the documents in the sub level iterations already, | ||||
|                 // we can pop this level iterator. | ||||
|                 if documents_ids.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|  | ||||
|                 bitmap &= &*documents_ids; | ||||
|                 if !bitmap.is_empty() { | ||||
|                     *documents_ids -= &bitmap; | ||||
|  | ||||
|                     if level == 0 { | ||||
|                         return Some((left_bound, bitmap)); | ||||
|                     } | ||||
|                     let starting_key_below = FacetKey { field_id, level: level - 1, left_bound }; | ||||
|  | ||||
|                     let end_key_kelow = match *right_bound { | ||||
|                         Bound::Included(right) => Bound::Included(FacetKey { | ||||
|                             field_id, | ||||
|                             level: level - 1, | ||||
|                             left_bound: right, | ||||
|                         }), | ||||
|                         Bound::Excluded(right) => Bound::Excluded(FacetKey { | ||||
|                             field_id, | ||||
|                             level: level - 1, | ||||
|                             left_bound: right, | ||||
|                         }), | ||||
|                         Bound::Unbounded => Bound::Unbounded, | ||||
|                     }; | ||||
|                     let prev_right_bound = *right_bound; | ||||
|                     *right_bound = Bound::Excluded(left_bound); | ||||
|                     let iter = self | ||||
|                         .db | ||||
|                         .rev_range( | ||||
|                             &self.rtxn, | ||||
|                             &(Bound::Included(starting_key_below), end_key_kelow), | ||||
|                         ) | ||||
|                         .unwrap() | ||||
|                         .take(group_size as usize); | ||||
|  | ||||
|                     self.stack.push((bitmap, iter, prev_right_bound)); | ||||
|                     continue 'outer; | ||||
|                 } | ||||
|                 *right_bound = Bound::Excluded(left_bound); | ||||
|             } | ||||
|             self.stack.pop(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use crate::{ | ||||
|         codec::{MyByteSlice, U16Codec}, | ||||
|         descending_facet_sort::descending_facet_sort, | ||||
|         display_bitmap, FacetKeyCodec, Index, | ||||
|     }; | ||||
|     use heed::BytesDecode; | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     fn get_simple_index() -> Index<U16Codec> { | ||||
|         let index = Index::<U16Codec>::new(4, 8); | ||||
|         let mut txn = index.env.write_txn().unwrap(); | ||||
|         for i in 0..256u16 { | ||||
|             let mut bitmap = RoaringBitmap::new(); | ||||
|             bitmap.insert(i as u32); | ||||
|             index.insert(&mut txn, 0, &i, &bitmap); | ||||
|         } | ||||
|         txn.commit().unwrap(); | ||||
|         index | ||||
|     } | ||||
|     fn get_random_looking_index() -> Index<U16Codec> { | ||||
|         let index = Index::<U16Codec>::new(4, 8); | ||||
|         let mut txn = index.env.write_txn().unwrap(); | ||||
|  | ||||
|         let rng = fastrand::Rng::with_seed(0); | ||||
|         let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::<Vec<u32>>(); | ||||
|  | ||||
|         for (_i, key) in keys.into_iter().enumerate() { | ||||
|             let mut bitmap = RoaringBitmap::new(); | ||||
|             bitmap.insert(key); | ||||
|             bitmap.insert(key + 100); | ||||
|             index.insert(&mut txn, 0, &(key as u16), &bitmap); | ||||
|         } | ||||
|         txn.commit().unwrap(); | ||||
|         index | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn random_looking_index_snap() { | ||||
|         let index = get_random_looking_index(); | ||||
|         insta::assert_display_snapshot!(index) | ||||
|     } | ||||
|     #[test] | ||||
|     fn filter_sort_descending() { | ||||
|         let indexes = [get_simple_index(), get_random_looking_index()]; | ||||
|         for (i, index) in indexes.into_iter().enumerate() { | ||||
|             let txn = index.env.read_txn().unwrap(); | ||||
|             let candidates = (200..=300).into_iter().collect::<RoaringBitmap>(); | ||||
|             let mut results = String::new(); | ||||
|             let db = index.db.content.remap_key_type::<FacetKeyCodec<MyByteSlice>>(); | ||||
|             let iter = descending_facet_sort(&txn, &db, 0, candidates); | ||||
|             for (facet, docids) in iter { | ||||
|                 let facet = U16Codec::bytes_decode(facet).unwrap(); | ||||
|                 results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids))); | ||||
|             } | ||||
|             insta::assert_snapshot!(format!("filter_sort_{i}_descending"), results); | ||||
|  | ||||
|             txn.commit().unwrap(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,649 +0,0 @@ | ||||
| // //! This module contains helpers iterators for facet strings. | ||||
| // //! | ||||
| // //! The purpose is to help iterate over the quite complex system of facets strings. A simple | ||||
| // //! description of the system would be that every facet string value is stored into an LMDB database | ||||
| // //! and that every value is associated with the document ids which are associated with this facet | ||||
| // //! string value. | ||||
| // //! | ||||
| // //! In reality it is a little bit more complex as we have to create aggregations of runs of facet | ||||
| // //! string values, those aggregations helps in choosing the right groups of facets to follow. | ||||
| // //! | ||||
| // //! ## A typical algorithm run | ||||
| // //! | ||||
| // //! If a group of aggregated facets values contains one of the documents ids, we must continue | ||||
| // //! iterating over the sub-groups. | ||||
| // //! | ||||
| // //! If this group is the lowest level and contain at least one document id we yield the associated | ||||
| // //! facet documents ids. | ||||
| // //! | ||||
| // //! If the group doesn't contain one of our documents ids, we continue to the next group at this | ||||
| // //! same level. | ||||
| // //! | ||||
| // //! ## The complexity comes from the strings | ||||
| // //! | ||||
| // //! This algorithm is exactly the one that we use for facet numbers. It is quite easy to create | ||||
| // //! aggregated facet number, groups of facets are easy to define in the LMDB key, we just put the | ||||
| // //! two numbers bounds, the left and the right bound of the group, both inclusive. | ||||
| // //! | ||||
| // //! It is easy to make sure that the groups are ordered, LMDB sort its keys lexicographically and | ||||
| // //! puting two numbers big-endian encoded one after the other gives us ordered groups. The values | ||||
| // //! are simple unions of the documents ids coming from the groups below. | ||||
| // //! | ||||
| // //! ### Example of what a facet number LMDB database contain | ||||
| // //! | ||||
| // //! | level | left-bound | right-bound | documents ids    | | ||||
| // //! |-------|------------|-------------|------------------| | ||||
| // //! | 0     | 0          | _skipped_   | 1, 2             | | ||||
| // //! | 0     | 1          | _skipped_   | 6, 7             | | ||||
| // //! | 0     | 3          | _skipped_   | 4, 7             | | ||||
| // //! | 0     | 5          | _skipped_   | 2, 3, 4          | | ||||
| // //! | 1     | 0          | 1           | 1, 2, 6, 7       | | ||||
| // //! | 1     | 3          | 5           | 2, 3, 4, 7       | | ||||
| // //! | 2     | 0          | 5           | 1, 2, 3, 4, 6, 7 | | ||||
| // //! | ||||
| // //! As you can see the level 0 have two equal bounds, therefore we skip serializing the second | ||||
| // //! bound, that's the base level where you can directly fetch the documents ids associated with an | ||||
| // //! exact number. | ||||
| // //! | ||||
| // //! The next levels have two different bounds and the associated documents ids are simply the result | ||||
| // //! of an union of all the documents ids associated with the aggregated groups above. | ||||
| // //! | ||||
| // //! ## The complexity of defining groups for facet strings | ||||
| // //! | ||||
| // //! As explained above, defining groups of facet numbers is easy, LMDB stores the keys in | ||||
| // //! lexicographical order, it means that whatever the key represent the bytes are read in their raw | ||||
| // //! form and a simple `strcmp` will define the order in which keys will be read from the store. | ||||
| // //! | ||||
| // //! That's easy for types with a known size, like floats or integers, they are 64 bytes long and | ||||
| // //! appending one after the other in big-endian is consistent. LMDB will simply sort the keys by the | ||||
| // //! first number then by the second if the the first number is equal on two keys. | ||||
| // //! | ||||
| // //! For strings it is a lot more complex as those types are unsized, it means that the size of facet | ||||
| // //! strings is different for each facet value. | ||||
| // //! | ||||
| // //! ### Basic approach: padding the keys | ||||
| // //! | ||||
| // //! A first approach would be to simply define the maximum size of a facet string and pad the keys | ||||
| // //! with zeroes. The big problem of this approach is that it: | ||||
| // //!  1. reduces the maximum size of facet strings by half, as we need to put two keys one after the | ||||
| // //!     other. | ||||
| // //!  2. makes the keys of facet strings very big (approximately 250 bytes), impacting a lot LMDB | ||||
| // //!     performances. | ||||
| // //! | ||||
| // //! ### Better approach: number the facet groups | ||||
| // //! | ||||
| // //! A better approach would be to number the groups, this way we don't have the downsides of the | ||||
| // //! previously described approach but we need to be able to describe the groups by using a number. | ||||
| // //! | ||||
| // //! #### Example of facet strings with numbered groups | ||||
| // //! | ||||
| // //! | level | left-bound | right-bound | left-string | right-string | documents ids    | | ||||
| // //! |-------|------------|-------------|-------------|--------------|------------------| | ||||
| // //! | 0     | alpha      | _skipped_   | _skipped_   | _skipped_    | 1, 2             | | ||||
| // //! | 0     | beta       | _skipped_   | _skipped_   | _skipped_    | 6, 7             | | ||||
| // //! | 0     | gamma      | _skipped_   | _skipped_   | _skipped_    | 4, 7             | | ||||
| // //! | 0     | omega      | _skipped_   | _skipped_   | _skipped_    | 2, 3, 4          | | ||||
| // //! | 1     | 0          | 1           | alpha       | beta         | 1, 2, 6, 7       | | ||||
| // //! | 1     | 2          | 3           | gamma       | omega        | 2, 3, 4, 7       | | ||||
| // //! | 2     | 0          | 3           | _skipped_   | _skipped_    | 1, 2, 3, 4, 6, 7 | | ||||
| // //! | ||||
| // //! As you can see the level 0 doesn't actually change much, we skip nearly everything, we do not | ||||
| // //! need to store the facet string value two times. | ||||
| // //! | ||||
| // //! The number in the left-bound and right-bound columns are incremental numbers representing the | ||||
| // //! level 0 strings, .i.e. alpha is 0, beta is 1. Those numbers are just here to keep the ordering | ||||
| // //! of the LMDB keys. | ||||
| // //! | ||||
| // //! In the value, not in the key, you can see that we added two new values: the left-string and the | ||||
| // //! right-string, which defines the original facet strings associated with the given group. | ||||
| // //! | ||||
| // //! We put those two strings inside of the value, this way we do not limit the maximum size of the | ||||
| // //! facet string values, and the impact on performances is not important as, IIRC, LMDB put big | ||||
| // //! values on another page, this helps in iterating over keys fast enough and only fetch the page | ||||
| // //! with the values when required. | ||||
| // //! | ||||
| // //! The other little advantage with this solution is that there is no a big overhead, compared with | ||||
| // //! the facet number levels, we only duplicate the facet strings once for the level 1. | ||||
| // //! | ||||
| // //! #### A typical algorithm run | ||||
| // //! | ||||
| // //! Note that the algorithm is always moving from the highest level to the lowest one, one level | ||||
| // //! by one level, this is why it is ok to only store the facets string on the level 1. | ||||
| // //! | ||||
| // //! If a group of aggregated facets values, a group with numbers contains one of the documents ids, | ||||
| // //! we must continue iterating over the sub-groups. To do so: | ||||
| // //!   - If we are at a level >= 2, we just do the same as with the facet numbers, get both bounds | ||||
| // //!     and iterate over the facet groups defined by these numbers over the current level - 1. | ||||
| // //!   - If we are at level 1, we retrieve both keys, the left-string and right-string, from the | ||||
| // //!     value and just do the same as with the facet numbers but with strings: iterate over the | ||||
| // //!     current level - 1 with both keys. | ||||
| // //! | ||||
| // //! If this group is the lowest level (level 0) and contain at least one document id we yield the | ||||
| // //! associated facet documents ids. | ||||
| // //! | ||||
| // //! If the group doesn't contain one of our documents ids, we continue to the next group at this | ||||
| // //! same level. | ||||
| // //! | ||||
|  | ||||
| // use std::num::NonZeroU8; | ||||
| // use std::ops::Bound; | ||||
| // use std::ops::Bound::{Excluded, Included, Unbounded}; | ||||
|  | ||||
| // use either::{Either, Left, Right}; | ||||
| // use heed::types::{ByteSlice, DecodeIgnore}; | ||||
| // use heed::{Database, LazyDecode, RoRange, RoRevRange}; | ||||
| // use roaring::RoaringBitmap; | ||||
|  | ||||
| // use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec; | ||||
| // use crate::heed_codec::CboRoaringBitmapCodec; | ||||
| // use crate::{FieldId, Index}; | ||||
|  | ||||
| // /// An iterator that is used to explore the facets level strings | ||||
| // /// from the level 1 to infinity. | ||||
| // /// | ||||
| // /// It yields the level, group id that an entry covers, the optional group strings | ||||
| // /// that it covers of the level 0 only if it is an entry from the level 1 and | ||||
| // /// the roaring bitmap associated. | ||||
| // pub struct FacetStringGroupRange<'t> { | ||||
| //     iter: RoRange< | ||||
| //         't, | ||||
| //         FacetLevelValueU32Codec, | ||||
| //         LazyDecode<FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>>, | ||||
| //     >, | ||||
| //     end: Bound<u32>, | ||||
| // } | ||||
|  | ||||
| // impl<'t> FacetStringGroupRange<'t> { | ||||
| //     pub fn new<X, Y>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         db: Database<X, Y>, | ||||
| //         field_id: FieldId, | ||||
| //         level: NonZeroU8, | ||||
| //         left: Bound<u32>, | ||||
| //         right: Bound<u32>, | ||||
| //     ) -> heed::Result<FacetStringGroupRange<'t>> { | ||||
| //         let db = db.remap_types::< | ||||
| //             FacetLevelValueU32Codec, | ||||
| //             FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>, | ||||
| //         >(); | ||||
| //         let left_bound = match left { | ||||
| //             Included(left) => Included((field_id, level, left, u32::MIN)), | ||||
| //             Excluded(left) => Excluded((field_id, level, left, u32::MIN)), | ||||
| //             Unbounded => Included((field_id, level, u32::MIN, u32::MIN)), | ||||
| //         }; | ||||
| //         let right_bound = Included((field_id, level, u32::MAX, u32::MAX)); | ||||
| //         let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?; | ||||
| //         Ok(FacetStringGroupRange { iter, end: right }) | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t> Iterator for FacetStringGroupRange<'t> { | ||||
| //     type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         match self.iter.next() { | ||||
| //             Some(Ok(((_fid, level, left, right), docids))) => { | ||||
| //                 let must_be_returned = match self.end { | ||||
| //                     Included(end) => right <= end, | ||||
| //                     Excluded(end) => right < end, | ||||
| //                     Unbounded => true, | ||||
| //                 }; | ||||
| //                 if must_be_returned { | ||||
| //                     match docids.decode() { | ||||
| //                         Ok((bounds, docids)) => Some(Ok(((level, left, right), (bounds, docids)))), | ||||
| //                         Err(e) => Some(Err(e)), | ||||
| //                     } | ||||
| //                 } else { | ||||
| //                     None | ||||
| //                 } | ||||
| //             } | ||||
| //             Some(Err(e)) => Some(Err(e)), | ||||
| //             None => None, | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // pub struct FacetStringGroupRevRange<'t> { | ||||
| //     iter: RoRevRange< | ||||
| //         't, | ||||
| //         FacetLevelValueU32Codec, | ||||
| //         LazyDecode<FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>>, | ||||
| //     >, | ||||
| //     end: Bound<u32>, | ||||
| // } | ||||
|  | ||||
| // impl<'t> FacetStringGroupRevRange<'t> { | ||||
| //     pub fn new<X, Y>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         db: Database<X, Y>, | ||||
| //         field_id: FieldId, | ||||
| //         level: NonZeroU8, | ||||
| //         left: Bound<u32>, | ||||
| //         right: Bound<u32>, | ||||
| //     ) -> heed::Result<FacetStringGroupRevRange<'t>> { | ||||
| //         let db = db.remap_types::< | ||||
| //             FacetLevelValueU32Codec, | ||||
| //             FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>, | ||||
| //         >(); | ||||
| //         let left_bound = match left { | ||||
| //             Included(left) => Included((field_id, level, left, u32::MIN)), | ||||
| //             Excluded(left) => Excluded((field_id, level, left, u32::MIN)), | ||||
| //             Unbounded => Included((field_id, level, u32::MIN, u32::MIN)), | ||||
| //         }; | ||||
| //         let right_bound = Included((field_id, level, u32::MAX, u32::MAX)); | ||||
| //         let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?; | ||||
| //         Ok(FacetStringGroupRevRange { iter, end: right }) | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t> Iterator for FacetStringGroupRevRange<'t> { | ||||
| //     type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         loop { | ||||
| //             match self.iter.next() { | ||||
| //                 Some(Ok(((_fid, level, left, right), docids))) => { | ||||
| //                     let must_be_returned = match self.end { | ||||
| //                         Included(end) => right <= end, | ||||
| //                         Excluded(end) => right < end, | ||||
| //                         Unbounded => true, | ||||
| //                     }; | ||||
| //                     if must_be_returned { | ||||
| //                         match docids.decode() { | ||||
| //                             Ok((bounds, docids)) => { | ||||
| //                                 return Some(Ok(((level, left, right), (bounds, docids)))) | ||||
| //                             } | ||||
| //                             Err(e) => return Some(Err(e)), | ||||
| //                         } | ||||
| //                     } | ||||
| //                     continue; | ||||
| //                 } | ||||
| //                 Some(Err(e)) => return Some(Err(e)), | ||||
| //                 None => return None, | ||||
| //             } | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // /// An iterator that is used to explore the level 0 of the facets string database. | ||||
| // /// | ||||
| // /// It yields the facet string and the roaring bitmap associated with it. | ||||
| // pub struct FacetStringLevelZeroRange<'t> { | ||||
| //     iter: RoRange<'t, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>, | ||||
| // } | ||||
|  | ||||
| // impl<'t> FacetStringLevelZeroRange<'t> { | ||||
| //     pub fn new<X, Y>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         db: Database<X, Y>, | ||||
| //         field_id: FieldId, | ||||
| //         left: Bound<&str>, | ||||
| //         right: Bound<&str>, | ||||
| //     ) -> heed::Result<FacetStringLevelZeroRange<'t>> { | ||||
| //         fn encode_value<'a>(buffer: &'a mut Vec<u8>, field_id: FieldId, value: &str) -> &'a [u8] { | ||||
| //             buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
| //             buffer.push(0); | ||||
| //             buffer.extend_from_slice(value.as_bytes()); | ||||
| //             &buffer[..] | ||||
| //         } | ||||
|  | ||||
| //         let mut left_buffer = Vec::new(); | ||||
| //         let left_bound = match left { | ||||
| //             Included(value) => Included(encode_value(&mut left_buffer, field_id, value)), | ||||
| //             Excluded(value) => Excluded(encode_value(&mut left_buffer, field_id, value)), | ||||
| //             Unbounded => { | ||||
| //                 left_buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
| //                 left_buffer.push(0); | ||||
| //                 Included(&left_buffer[..]) | ||||
| //             } | ||||
| //         }; | ||||
|  | ||||
| //         let mut right_buffer = Vec::new(); | ||||
| //         let right_bound = match right { | ||||
| //             Included(value) => Included(encode_value(&mut right_buffer, field_id, value)), | ||||
| //             Excluded(value) => Excluded(encode_value(&mut right_buffer, field_id, value)), | ||||
| //             Unbounded => { | ||||
| //                 right_buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
| //                 right_buffer.push(1); // we must only get the level 0 | ||||
| //                 Excluded(&right_buffer[..]) | ||||
| //             } | ||||
| //         }; | ||||
|  | ||||
| //         let iter = db | ||||
| //             .remap_key_type::<ByteSlice>() | ||||
| //             .range(rtxn, &(left_bound, right_bound))? | ||||
| //             .remap_types::<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>(); | ||||
|  | ||||
| //         Ok(FacetStringLevelZeroRange { iter }) | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t> Iterator for FacetStringLevelZeroRange<'t> { | ||||
| //     type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         match self.iter.next() { | ||||
| //             Some(Ok(((_fid, normalized), (original, docids)))) => { | ||||
| //                 Some(Ok((normalized, original, docids))) | ||||
| //             } | ||||
| //             Some(Err(e)) => Some(Err(e)), | ||||
| //             None => None, | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // pub struct FacetStringLevelZeroRevRange<'t> { | ||||
| //     iter: RoRevRange<'t, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>, | ||||
| // } | ||||
|  | ||||
| // impl<'t> FacetStringLevelZeroRevRange<'t> { | ||||
| //     pub fn new<X, Y>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         db: Database<X, Y>, | ||||
| //         field_id: FieldId, | ||||
| //         left: Bound<&str>, | ||||
| //         right: Bound<&str>, | ||||
| //     ) -> heed::Result<FacetStringLevelZeroRevRange<'t>> { | ||||
| //         fn encode_value<'a>(buffer: &'a mut Vec<u8>, field_id: FieldId, value: &str) -> &'a [u8] { | ||||
| //             buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
| //             buffer.push(0); | ||||
| //             buffer.extend_from_slice(value.as_bytes()); | ||||
| //             &buffer[..] | ||||
| //         } | ||||
|  | ||||
| //         let mut left_buffer = Vec::new(); | ||||
| //         let left_bound = match left { | ||||
| //             Included(value) => Included(encode_value(&mut left_buffer, field_id, value)), | ||||
| //             Excluded(value) => Excluded(encode_value(&mut left_buffer, field_id, value)), | ||||
| //             Unbounded => { | ||||
| //                 left_buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
| //                 left_buffer.push(0); | ||||
| //                 Included(&left_buffer[..]) | ||||
| //             } | ||||
| //         }; | ||||
|  | ||||
| //         let mut right_buffer = Vec::new(); | ||||
| //         let right_bound = match right { | ||||
| //             Included(value) => Included(encode_value(&mut right_buffer, field_id, value)), | ||||
| //             Excluded(value) => Excluded(encode_value(&mut right_buffer, field_id, value)), | ||||
| //             Unbounded => { | ||||
| //                 right_buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
| //                 right_buffer.push(1); // we must only get the level 0 | ||||
| //                 Excluded(&right_buffer[..]) | ||||
| //             } | ||||
| //         }; | ||||
|  | ||||
| //         let iter = db | ||||
| //             .remap_key_type::<ByteSlice>() | ||||
| //             .rev_range(rtxn, &(left_bound, right_bound))? | ||||
| //             .remap_types::<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>(); | ||||
|  | ||||
| //         Ok(FacetStringLevelZeroRevRange { iter }) | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t> Iterator for FacetStringLevelZeroRevRange<'t> { | ||||
| //     type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         match self.iter.next() { | ||||
| //             Some(Ok(((_fid, normalized), (original, docids)))) => { | ||||
| //                 Some(Ok((normalized, original, docids))) | ||||
| //             } | ||||
| //             Some(Err(e)) => Some(Err(e)), | ||||
| //             None => None, | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // type EitherStringRange<'t> = Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>; | ||||
| // type EitherStringRevRange<'t> = | ||||
| //     Either<FacetStringGroupRevRange<'t>, FacetStringLevelZeroRevRange<'t>>; | ||||
|  | ||||
| // /// An iterator that is used to explore the facet strings level by level, | ||||
| // /// it will only return facets strings that are associated with the | ||||
| // /// candidates documents ids given. | ||||
| // pub struct FacetStringIter<'t> { | ||||
| //     rtxn: &'t heed::RoTxn<'t>, | ||||
| //     db: Database<ByteSlice, ByteSlice>, | ||||
| //     field_id: FieldId, | ||||
| //     level_iters: Vec<(RoaringBitmap, Either<EitherStringRange<'t>, EitherStringRevRange<'t>>)>, | ||||
| //     must_reduce: bool, | ||||
| // } | ||||
|  | ||||
| // impl<'t> FacetStringIter<'t> { | ||||
| //     pub fn new_reducing( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         index: &'t Index, | ||||
| //         field_id: FieldId, | ||||
| //         documents_ids: RoaringBitmap, | ||||
| //     ) -> heed::Result<FacetStringIter<'t>> { | ||||
| //         let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>(); | ||||
| //         let highest_iter = Self::highest_iter(rtxn, index, db, field_id)?; | ||||
| //         Ok(FacetStringIter { | ||||
| //             rtxn, | ||||
| //             db, | ||||
| //             field_id, | ||||
| //             level_iters: vec![(documents_ids, Left(highest_iter))], | ||||
| //             must_reduce: true, | ||||
| //         }) | ||||
| //     } | ||||
|  | ||||
| //     pub fn new_reverse_reducing( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         index: &'t Index, | ||||
| //         field_id: FieldId, | ||||
| //         documents_ids: RoaringBitmap, | ||||
| //     ) -> heed::Result<FacetStringIter<'t>> { | ||||
| //         let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>(); | ||||
| //         let highest_reverse_iter = Self::highest_reverse_iter(rtxn, index, db, field_id)?; | ||||
| //         Ok(FacetStringIter { | ||||
| //             rtxn, | ||||
| //             db, | ||||
| //             field_id, | ||||
| //             level_iters: vec![(documents_ids, Right(highest_reverse_iter))], | ||||
| //             must_reduce: true, | ||||
| //         }) | ||||
| //     } | ||||
|  | ||||
| //     pub fn new_non_reducing( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         index: &'t Index, | ||||
| //         field_id: FieldId, | ||||
| //         documents_ids: RoaringBitmap, | ||||
| //     ) -> heed::Result<FacetStringIter<'t>> { | ||||
| //         let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>(); | ||||
| //         let highest_iter = Self::highest_iter(rtxn, index, db, field_id)?; | ||||
| //         Ok(FacetStringIter { | ||||
| //             rtxn, | ||||
| //             db, | ||||
| //             field_id, | ||||
| //             level_iters: vec![(documents_ids, Left(highest_iter))], | ||||
| //             must_reduce: false, | ||||
| //         }) | ||||
| //     } | ||||
|  | ||||
| //     fn highest_level<X, Y>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         db: Database<X, Y>, | ||||
| //         fid: FieldId, | ||||
| //     ) -> heed::Result<Option<u8>> { | ||||
| //         Ok(db | ||||
| //             .remap_types::<ByteSlice, DecodeIgnore>() | ||||
| //             .prefix_iter(rtxn, &fid.to_be_bytes())? // the field id is the first two bits | ||||
| //             .last() | ||||
| //             .transpose()? | ||||
| //             .map(|(key_bytes, _)| key_bytes[2])) // the level is the third bit | ||||
| //     } | ||||
|  | ||||
| //     fn highest_iter<X, Y>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         index: &'t Index, | ||||
| //         db: Database<X, Y>, | ||||
| //         field_id: FieldId, | ||||
| //     ) -> heed::Result<Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>> { | ||||
| //         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
| //         match NonZeroU8::new(highest_level) { | ||||
| //             Some(highest_level) => FacetStringGroupRange::new( | ||||
| //                 rtxn, | ||||
| //                 index.facet_id_string_docids, | ||||
| //                 field_id, | ||||
| //                 highest_level, | ||||
| //                 Unbounded, | ||||
| //                 Unbounded, | ||||
| //             ) | ||||
| //             .map(Left), | ||||
| //             None => FacetStringLevelZeroRange::new( | ||||
| //                 rtxn, | ||||
| //                 index.facet_id_string_docids, | ||||
| //                 field_id, | ||||
| //                 Unbounded, | ||||
| //                 Unbounded, | ||||
| //             ) | ||||
| //             .map(Right), | ||||
| //         } | ||||
| //     } | ||||
|  | ||||
| //     fn highest_reverse_iter<X, Y>( | ||||
| //         rtxn: &'t heed::RoTxn, | ||||
| //         index: &'t Index, | ||||
| //         db: Database<X, Y>, | ||||
| //         field_id: FieldId, | ||||
| //     ) -> heed::Result<Either<FacetStringGroupRevRange<'t>, FacetStringLevelZeroRevRange<'t>>> { | ||||
| //         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
| //         match NonZeroU8::new(highest_level) { | ||||
| //             Some(highest_level) => FacetStringGroupRevRange::new( | ||||
| //                 rtxn, | ||||
| //                 index.facet_id_string_docids, | ||||
| //                 field_id, | ||||
| //                 highest_level, | ||||
| //                 Unbounded, | ||||
| //                 Unbounded, | ||||
| //             ) | ||||
| //             .map(Left), | ||||
| //             None => FacetStringLevelZeroRevRange::new( | ||||
| //                 rtxn, | ||||
| //                 index.facet_id_string_docids, | ||||
| //                 field_id, | ||||
| //                 Unbounded, | ||||
| //                 Unbounded, | ||||
| //             ) | ||||
| //             .map(Right), | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // impl<'t> Iterator for FacetStringIter<'t> { | ||||
| //     type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>; | ||||
|  | ||||
| //     fn next(&mut self) -> Option<Self::Item> { | ||||
| //         'outer: loop { | ||||
| //             let (documents_ids, last) = self.level_iters.last_mut()?; | ||||
| //             let is_ascending = last.is_left(); | ||||
|  | ||||
| //             // We remap the different iterator types to make | ||||
| //             // the algorithm less complex to understand. | ||||
| //             let last = match last { | ||||
| //                 Left(ascending) => match ascending { | ||||
| //                     Left(group) => Left(Left(group)), | ||||
| //                     Right(zero_level) => Right(Left(zero_level)), | ||||
| //                 }, | ||||
| //                 Right(descending) => match descending { | ||||
| //                     Left(group) => Left(Right(group)), | ||||
| //                     Right(zero_level) => Right(Right(zero_level)), | ||||
| //                 }, | ||||
| //             }; | ||||
|  | ||||
| //             match last { | ||||
| //                 Left(group) => { | ||||
| //                     for result in group { | ||||
| //                         match result { | ||||
| //                             Ok(((level, left, right), (string_bounds, mut docids))) => { | ||||
| //                                 docids &= &*documents_ids; | ||||
| //                                 if !docids.is_empty() { | ||||
| //                                     if self.must_reduce { | ||||
| //                                         *documents_ids -= &docids; | ||||
| //                                     } | ||||
|  | ||||
| //                                     let result = if is_ascending { | ||||
| //                                         match string_bounds { | ||||
| //                                             Some((left, right)) => FacetStringLevelZeroRange::new( | ||||
| //                                                 self.rtxn, | ||||
| //                                                 self.db, | ||||
| //                                                 self.field_id, | ||||
| //                                                 Included(left), | ||||
| //                                                 Included(right), | ||||
| //                                             ) | ||||
| //                                             .map(Right), | ||||
| //                                             None => FacetStringGroupRange::new( | ||||
| //                                                 self.rtxn, | ||||
| //                                                 self.db, | ||||
| //                                                 self.field_id, | ||||
| //                                                 NonZeroU8::new(level.get() - 1).unwrap(), | ||||
| //                                                 Included(left), | ||||
| //                                                 Included(right), | ||||
| //                                             ) | ||||
| //                                             .map(Left), | ||||
| //                                         } | ||||
| //                                         .map(Left) | ||||
| //                                     } else { | ||||
| //                                         match string_bounds { | ||||
| //                                             Some((left, right)) => { | ||||
| //                                                 FacetStringLevelZeroRevRange::new( | ||||
| //                                                     self.rtxn, | ||||
| //                                                     self.db, | ||||
| //                                                     self.field_id, | ||||
| //                                                     Included(left), | ||||
| //                                                     Included(right), | ||||
| //                                                 ) | ||||
| //                                                 .map(Right) | ||||
| //                                             } | ||||
| //                                             None => FacetStringGroupRevRange::new( | ||||
| //                                                 self.rtxn, | ||||
| //                                                 self.db, | ||||
| //                                                 self.field_id, | ||||
| //                                                 NonZeroU8::new(level.get() - 1).unwrap(), | ||||
| //                                                 Included(left), | ||||
| //                                                 Included(right), | ||||
| //                                             ) | ||||
| //                                             .map(Left), | ||||
| //                                         } | ||||
| //                                         .map(Right) | ||||
| //                                     }; | ||||
|  | ||||
| //                                     match result { | ||||
| //                                         Ok(iter) => { | ||||
| //                                             self.level_iters.push((docids, iter)); | ||||
| //                                             continue 'outer; | ||||
| //                                         } | ||||
| //                                         Err(e) => return Some(Err(e)), | ||||
| //                                     } | ||||
| //                                 } | ||||
| //                             } | ||||
| //                             Err(e) => return Some(Err(e)), | ||||
| //                         } | ||||
| //                     } | ||||
| //                 } | ||||
| //                 Right(zero_level) => { | ||||
| //                     // level zero only | ||||
| //                     for result in zero_level { | ||||
| //                         match result { | ||||
| //                             Ok((normalized, original, mut docids)) => { | ||||
| //                                 docids &= &*documents_ids; | ||||
| //                                 if !docids.is_empty() { | ||||
| //                                     if self.must_reduce { | ||||
| //                                         *documents_ids -= &docids; | ||||
| //                                     } | ||||
| //                                     return Some(Ok((normalized, original, docids))); | ||||
| //                                 } | ||||
| //                             } | ||||
| //                             Err(e) => return Some(Err(e)), | ||||
| //                         } | ||||
| //                     } | ||||
| //                 } | ||||
| //             } | ||||
|  | ||||
| //             self.level_iters.pop(); | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
| @@ -1,9 +1,79 @@ | ||||
| use heed::types::ByteSlice; | ||||
| use heed::{BytesDecode, RoTxn}; | ||||
|  | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; | ||||
|  | ||||
| pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; | ||||
| // pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; | ||||
| // pub use self::facet_string::FacetStringIter; | ||||
| pub use self::filter::Filter; | ||||
|  | ||||
| mod facet_distribution; | ||||
| mod facet_number; | ||||
| mod facet_string; | ||||
| mod facet_distribution_iter; | ||||
| mod facet_sort_ascending; | ||||
| mod facet_sort_descending; | ||||
| mod filter; | ||||
|  | ||||
| fn get_first_facet_value<'t, BoundCodec>( | ||||
|     txn: &'t RoTxn, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
| ) -> Option<BoundCodec::DItem> | ||||
| where | ||||
|     BoundCodec: BytesDecode<'t>, | ||||
| { | ||||
|     let mut level0prefix = vec![]; | ||||
|     level0prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|     level0prefix.push(0); | ||||
|     let mut level0_iter_forward = db | ||||
|         .as_polymorph() | ||||
|         .prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice()) | ||||
|         .unwrap(); | ||||
|     if let Some(first) = level0_iter_forward.next() { | ||||
|         let (first_key, _) = first.unwrap(); | ||||
|         let first_key = FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).unwrap(); | ||||
|         Some(first_key.left_bound) | ||||
|     } else { | ||||
|         None | ||||
|     } | ||||
| } | ||||
| fn get_last_facet_value<'t, BoundCodec>( | ||||
|     txn: &'t RoTxn, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
| ) -> Option<BoundCodec::DItem> | ||||
| where | ||||
|     BoundCodec: BytesDecode<'t>, | ||||
| { | ||||
|     let mut level0prefix = vec![]; | ||||
|     level0prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|     level0prefix.push(0); | ||||
|     let mut level0_iter_backward = db | ||||
|         .as_polymorph() | ||||
|         .rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice()) | ||||
|         .unwrap(); | ||||
|     if let Some(last) = level0_iter_backward.next() { | ||||
|         let (last_key, _) = last.unwrap(); | ||||
|         let last_key = FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).unwrap(); | ||||
|         Some(last_key.left_bound) | ||||
|     } else { | ||||
|         None | ||||
|     } | ||||
| } | ||||
| fn get_highest_level<'t>( | ||||
|     txn: &'t RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
| ) -> u8 { | ||||
|     let field_id_prefix = &field_id.to_be_bytes(); | ||||
|     db.as_polymorph() | ||||
|         .rev_prefix_iter::<_, ByteSlice, ByteSlice>(&txn, field_id_prefix) | ||||
|         .unwrap() | ||||
|         .next() | ||||
|         .map(|el| { | ||||
|             let (key, _) = el.unwrap(); | ||||
|             let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(key).unwrap(); | ||||
|             key.level | ||||
|         }) | ||||
|         .unwrap_or(0) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user