mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Refactor facet-related codecs
This commit is contained in:
		
				
					committed by
					
						 Loïc Lecrenier
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							9b55e582cd
						
					
				
				
					commit
					485a72306d
				
			| @@ -1,25 +1,19 @@ | ||||
| // mod facet_level_value_f64_codec; | ||||
| // mod facet_level_value_u32_codec; | ||||
| // mod facet_string_level_zero_codec; | ||||
| // mod facet_string_level_zero_value_codec; | ||||
| // mod facet_string_zero_bounds_value_codec; | ||||
| mod field_doc_id_facet_f64_codec; | ||||
| mod field_doc_id_facet_string_codec; | ||||
| mod ordered_f64_codec; | ||||
| mod str_ref; | ||||
|  | ||||
| pub mod new; | ||||
|  | ||||
| use heed::types::OwnedType; | ||||
|  | ||||
| // pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec; | ||||
| // pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec; | ||||
| // pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec; | ||||
| // pub use self::facet_string_level_zero_value_codec::{ | ||||
| //     decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec, | ||||
| // }; | ||||
| // pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec; | ||||
| pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; | ||||
| pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; | ||||
| use crate::BEU16; | ||||
| pub use self::ordered_f64_codec::OrderedF64Codec; | ||||
| pub use self::str_ref::StrRefCodec; | ||||
| use crate::{CboRoaringBitmapCodec, BEU16}; | ||||
| use heed::types::OwnedType; | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use roaring::RoaringBitmap; | ||||
| use std::borrow::Cow; | ||||
| use std::convert::TryFrom; | ||||
| use std::marker::PhantomData; | ||||
|  | ||||
| pub type FieldIdCodec = OwnedType<BEU16>; | ||||
|  | ||||
| @@ -32,3 +26,109 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> { | ||||
|         None | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] | ||||
| pub struct FacetGroupKey<T> { | ||||
|     pub field_id: u16, | ||||
|     pub level: u8, | ||||
|     pub left_bound: T, | ||||
| } | ||||
| impl<'a> FacetGroupKey<&'a [u8]> { | ||||
|     pub fn into_owned(self) -> FacetGroupKey<Vec<u8>> { | ||||
|         FacetGroupKey { | ||||
|             field_id: self.field_id, | ||||
|             level: self.level, | ||||
|             left_bound: self.left_bound.to_vec(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> FacetGroupKey<Vec<u8>> { | ||||
|     pub fn as_ref(&self) -> FacetGroupKey<&[u8]> { | ||||
|         FacetGroupKey { | ||||
|             field_id: self.field_id, | ||||
|             level: self.level, | ||||
|             left_bound: self.left_bound.as_slice(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct FacetGroupValue { | ||||
|     pub size: u8, | ||||
|     pub bitmap: RoaringBitmap, | ||||
| } | ||||
|  | ||||
| pub struct FacetGroupKeyCodec<T> { | ||||
|     _phantom: PhantomData<T>, | ||||
| } | ||||
|  | ||||
| impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T> | ||||
| where | ||||
|     T: BytesEncode<'a>, | ||||
|     T::EItem: Sized, | ||||
| { | ||||
|     type EItem = FacetGroupKey<T::EItem>; | ||||
|  | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         let mut v = vec![]; | ||||
|         v.extend_from_slice(&value.field_id.to_be_bytes()); | ||||
|         v.extend_from_slice(&[value.level]); | ||||
|  | ||||
|         let bound = T::bytes_encode(&value.left_bound)?; | ||||
|         v.extend_from_slice(&bound); | ||||
|  | ||||
|         Some(Cow::Owned(v)) | ||||
|     } | ||||
| } | ||||
| impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> | ||||
| where | ||||
|     T: BytesDecode<'a>, | ||||
| { | ||||
|     type DItem = FacetGroupKey<T::DItem>; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); | ||||
|         let level = bytes[2]; | ||||
|         let bound = T::bytes_decode(&bytes[3..])?; | ||||
|         Some(FacetGroupKey { field_id: fid, level, left_bound: bound }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct FacetGroupValueCodec; | ||||
| impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | ||||
|     type EItem = FacetGroupValue; | ||||
|  | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         let mut v = vec![]; | ||||
|         v.push(value.size); | ||||
|         CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); | ||||
|         Some(Cow::Owned(v)) | ||||
|     } | ||||
| } | ||||
| impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | ||||
|     type DItem = FacetGroupValue; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let size = bytes[0]; | ||||
|         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; | ||||
|         Some(FacetGroupValue { size, bitmap }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct ByteSliceRef; | ||||
|  | ||||
| impl<'a> BytesEncode<'a> for ByteSliceRef { | ||||
|     type EItem = &'a [u8]; | ||||
|  | ||||
|     fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         Some(Cow::Borrowed(item)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> BytesDecode<'a> for ByteSliceRef { | ||||
|     type DItem = &'a [u8]; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         Some(bytes) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,120 +0,0 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::convert::TryFrom; | ||||
| use std::marker::PhantomData; | ||||
|  | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::CboRoaringBitmapCodec; | ||||
|  | ||||
| pub mod ordered_f64_codec; | ||||
| pub mod str_ref; | ||||
| // TODO: these codecs were quickly written and not fast/resilient enough | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] | ||||
| pub struct FacetKey<T> { | ||||
|     pub field_id: u16, | ||||
|     pub level: u8, | ||||
|     pub left_bound: T, | ||||
| } | ||||
| impl<'a> FacetKey<&'a [u8]> { | ||||
|     pub fn into_owned(self) -> FacetKey<Vec<u8>> { | ||||
|         FacetKey { | ||||
|             field_id: self.field_id, | ||||
|             level: self.level, | ||||
|             left_bound: self.left_bound.to_vec(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> FacetKey<Vec<u8>> { | ||||
|     pub fn as_ref(&self) -> FacetKey<&[u8]> { | ||||
|         FacetKey { | ||||
|             field_id: self.field_id, | ||||
|             level: self.level, | ||||
|             left_bound: self.left_bound.as_slice(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct FacetGroupValue { | ||||
|     pub size: u8, | ||||
|     pub bitmap: RoaringBitmap, | ||||
| } | ||||
|  | ||||
| pub struct FacetKeyCodec<T> { | ||||
|     _phantom: PhantomData<T>, | ||||
| } | ||||
|  | ||||
| impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec<T> | ||||
| where | ||||
|     T: BytesEncode<'a>, | ||||
|     T::EItem: Sized, | ||||
| { | ||||
|     type EItem = FacetKey<T::EItem>; | ||||
|  | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         let mut v = vec![]; | ||||
|         v.extend_from_slice(&value.field_id.to_be_bytes()); | ||||
|         v.extend_from_slice(&[value.level]); | ||||
|  | ||||
|         let bound = T::bytes_encode(&value.left_bound)?; | ||||
|         v.extend_from_slice(&bound); | ||||
|  | ||||
|         Some(Cow::Owned(v)) | ||||
|     } | ||||
| } | ||||
| impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec<T> | ||||
| where | ||||
|     T: BytesDecode<'a>, | ||||
| { | ||||
|     type DItem = FacetKey<T::DItem>; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); | ||||
|         let level = bytes[2]; | ||||
|         let bound = T::bytes_decode(&bytes[3..])?; | ||||
|         Some(FacetKey { field_id: fid, level, left_bound: bound }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct FacetGroupValueCodec; | ||||
| impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | ||||
|     type EItem = FacetGroupValue; | ||||
|  | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         let mut v = vec![]; | ||||
|         v.push(value.size); | ||||
|         CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); | ||||
|         Some(Cow::Owned(v)) | ||||
|     } | ||||
| } | ||||
| impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | ||||
|     type DItem = FacetGroupValue; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let size = bytes[0]; | ||||
|         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; | ||||
|         Some(FacetGroupValue { size, bitmap }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| // TODO: get rid of this codec as it is named confusingly + should really be part of heed | ||||
| // or even replace the current ByteSlice codec | ||||
| pub struct MyByteSlice; | ||||
|  | ||||
| impl<'a> BytesEncode<'a> for MyByteSlice { | ||||
|     type EItem = &'a [u8]; | ||||
|  | ||||
|     fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         Some(Cow::Borrowed(item)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> BytesDecode<'a> for MyByteSlice { | ||||
|     type DItem = &'a [u8]; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         Some(bytes) | ||||
|     } | ||||
| } | ||||
| @@ -14,15 +14,10 @@ use time::OffsetDateTime; | ||||
| use crate::error::{InternalError, UserError}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::fields_ids_map::FieldsIdsMap; | ||||
| use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::new::str_ref::StrRefCodec; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec}; | ||||
| use crate::heed_codec::facet::{ | ||||
|     // FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, | ||||
|     FieldDocIdFacetF64Codec, | ||||
|     FieldDocIdFacetStringCodec, | ||||
|     FieldIdCodec, | ||||
| }; | ||||
| use crate::heed_codec::facet::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::StrRefCodec; | ||||
| use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec}; | ||||
| use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec}; | ||||
| use crate::{ | ||||
|     default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, | ||||
|     DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, | ||||
| @@ -130,9 +125,9 @@ pub struct Index { | ||||
|     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||
|  | ||||
|     /// Maps the facet field id and ranges of numbers with the docids that corresponds to them. | ||||
|     pub facet_id_f64_docids: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
|     pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
|     /// Maps the facet field id and ranges of strings with the docids that corresponds to them. | ||||
|     pub facet_id_string_docids: Database<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>, | ||||
|     pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>, | ||||
|  | ||||
|     /// Maps the document id, the facet field id and the numbers. | ||||
|     pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>, | ||||
|   | ||||
| @@ -7,7 +7,7 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{Criterion, CriterionParameters, CriterionResult}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; | ||||
| use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef}; | ||||
| use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; | ||||
| use crate::search::facet::facet_sort_ascending::ascending_facet_sort; | ||||
| use crate::search::facet::facet_sort_descending::descending_facet_sort; | ||||
| @@ -196,14 +196,14 @@ fn facet_ordered<'t>( | ||||
|  | ||||
|         let number_iter = make_iter( | ||||
|             rtxn, | ||||
|             index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), | ||||
|             index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), | ||||
|             field_id, | ||||
|             candidates.clone(), | ||||
|         )?; | ||||
|  | ||||
|         let string_iter = make_iter( | ||||
|             rtxn, | ||||
|             index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), | ||||
|             index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), | ||||
|             field_id, | ||||
|             candidates, | ||||
|         )?; | ||||
|   | ||||
| @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{Distinct, DocIter}; | ||||
| use crate::error::InternalError; | ||||
| use crate::heed_codec::facet::new::FacetKey; | ||||
| use crate::heed_codec::facet::FacetGroupKey; | ||||
| use crate::heed_codec::facet::*; | ||||
| use crate::index::db_name; | ||||
| use crate::{DocumentId, FieldId, Index, Result}; | ||||
| @@ -48,7 +48,7 @@ impl<'a> FacetDistinctIter<'a> { | ||||
|     fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> { | ||||
|         self.index | ||||
|             .facet_id_string_docids | ||||
|             .get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key }) | ||||
|             .get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key }) | ||||
|             .map(|opt| opt.map(|v| v.bitmap)) | ||||
|     } | ||||
|  | ||||
| @@ -56,7 +56,7 @@ impl<'a> FacetDistinctIter<'a> { | ||||
|         // get facet docids on level 0 | ||||
|         self.index | ||||
|             .facet_id_f64_docids | ||||
|             .get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key }) | ||||
|             .get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key }) | ||||
|             .map(|opt| opt.map(|v| v.bitmap)) | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -8,12 +8,11 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::error::UserError; | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::new::str_ref::StrRefCodec; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; | ||||
| use crate::heed_codec::facet::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::StrRefCodec; | ||||
| use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; | ||||
| use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec}; | ||||
| use crate::search::facet::facet_distribution_iter; | ||||
| // use crate::search::facet::FacetStringIter; | ||||
| use crate::{FieldId, Index, Result}; | ||||
|  | ||||
| /// The default number of values by facets that will | ||||
| @@ -138,7 +137,7 @@ impl<'a> FacetDistribution<'a> { | ||||
|     ) -> heed::Result<()> { | ||||
|         facet_distribution_iter::iterate_over_facet_distribution( | ||||
|             self.rtxn, | ||||
|             self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), | ||||
|             self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), | ||||
|             field_id, | ||||
|             candidates, | ||||
|             |facet_key, nbr_docids| { | ||||
| @@ -161,7 +160,7 @@ impl<'a> FacetDistribution<'a> { | ||||
|     ) -> heed::Result<()> { | ||||
|         facet_distribution_iter::iterate_over_facet_distribution( | ||||
|             self.rtxn, | ||||
|             self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), | ||||
|             self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), | ||||
|             field_id, | ||||
|             candidates, | ||||
|             |facet_key, nbr_docids| { | ||||
| @@ -191,7 +190,7 @@ impl<'a> FacetDistribution<'a> { | ||||
|         let iter = db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? | ||||
|             .remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>(); | ||||
|             .remap_types::<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>(); | ||||
|  | ||||
|         for result in iter { | ||||
|             let (key, value) = result?; | ||||
| @@ -206,7 +205,7 @@ impl<'a> FacetDistribution<'a> { | ||||
|             .facet_id_string_docids | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? | ||||
|             .remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>(); | ||||
|             .remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>(); | ||||
|  | ||||
|         // TODO: get the original value of the facet somewhere (in the documents DB?) | ||||
|         for result in iter { | ||||
|   | ||||
| @@ -4,11 +4,11 @@ use heed::Result; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{get_first_facet_value, get_highest_level}; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; | ||||
| use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec}; | ||||
|  | ||||
| pub fn iterate_over_facet_distribution<'t, CB>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     candidates: &RoaringBitmap, | ||||
|     callback: CB, | ||||
| @@ -18,9 +18,9 @@ where | ||||
| { | ||||
|     let mut fd = FacetDistribution { rtxn, db, field_id, callback }; | ||||
|     let highest_level = | ||||
|         get_highest_level(rtxn, db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?; | ||||
|         get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?; | ||||
|  | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { | ||||
|     if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { | ||||
|         fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; | ||||
|         return Ok(()); | ||||
|     } else { | ||||
| @@ -33,7 +33,7 @@ where | ||||
|     CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, | ||||
| { | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     callback: CB, | ||||
| } | ||||
| @@ -49,7 +49,7 @@ where | ||||
|         group_size: usize, | ||||
|     ) -> Result<ControlFlow<()>> { | ||||
|         let starting_key = | ||||
|             FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound }; | ||||
|             FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound }; | ||||
|         let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size); | ||||
|         for el in iter { | ||||
|             let (key, value) = el?; | ||||
| @@ -78,7 +78,7 @@ where | ||||
|         if level == 0 { | ||||
|             return self.iterate_level_0(candidates, starting_bound, group_size); | ||||
|         } | ||||
|         let starting_key = FacetKey { field_id: self.field_id, level, left_bound: starting_bound }; | ||||
|         let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound }; | ||||
|         let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size); | ||||
|  | ||||
|         for el in iter { | ||||
| @@ -116,7 +116,7 @@ mod tests { | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     use super::iterate_over_facet_distribution; | ||||
|     use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::milli_snap; | ||||
|     use crate::search::facet::test::FacetIndex; | ||||
|  | ||||
|   | ||||
| @@ -4,12 +4,12 @@ use heed::BytesEncode; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; | ||||
| use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef}; | ||||
| use crate::Result; | ||||
|  | ||||
| pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||
|     right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||
| @@ -42,13 +42,13 @@ where | ||||
|         } | ||||
|         Bound::Unbounded => Bound::Unbounded, | ||||
|     }; | ||||
|     let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(); | ||||
|     let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(); | ||||
|     let mut docids = RoaringBitmap::new(); | ||||
|     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids }; | ||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||
|  | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { | ||||
|         let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap(); | ||||
|     if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { | ||||
|         let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap(); | ||||
|         f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?; | ||||
|         Ok(docids) | ||||
|     } else { | ||||
| @@ -59,7 +59,7 @@ where | ||||
| /// Fetch the document ids that have a facet with a value between the two given bounds | ||||
| struct FacetRangeSearch<'t, 'b, 'bitmap> { | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     left: Bound<&'b [u8]>, | ||||
|     right: Bound<&'b [u8]>, | ||||
| @@ -68,7 +68,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> { | ||||
| impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | ||||
|     fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { | ||||
|         let left_key = | ||||
|             FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound }; | ||||
|             FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound }; | ||||
|         let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); | ||||
|         for el in iter { | ||||
|             let (key, value) = el?; | ||||
| @@ -117,7 +117,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | ||||
|             return self.run_level_0(starting_left_bound, group_size); | ||||
|         } | ||||
|  | ||||
|         let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound }; | ||||
|         let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound }; | ||||
|         let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); | ||||
|  | ||||
|         let (mut previous_key, mut previous_value) = iter.next().unwrap()?; | ||||
| @@ -258,8 +258,8 @@ mod tests { | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     use super::find_docids_of_facet_within_bounds; | ||||
|     use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::new::FacetKeyCodec; | ||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::FacetGroupKeyCodec; | ||||
|     use crate::milli_snap; | ||||
|     use crate::search::facet::test::FacetIndex; | ||||
|     use crate::snapshot_tests::display_bitmap; | ||||
| @@ -310,7 +310,7 @@ mod tests { | ||||
|                 let end = Bound::Included(i); | ||||
|                 let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||
|                     &txn, | ||||
|                     index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), | ||||
|                     index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), | ||||
|                     0, | ||||
|                     &start, | ||||
|                     &end, | ||||
| @@ -326,7 +326,7 @@ mod tests { | ||||
|                 let end = Bound::Excluded(i); | ||||
|                 let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||
|                     &txn, | ||||
|                     index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), | ||||
|                     index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), | ||||
|                     0, | ||||
|                     &start, | ||||
|                     &end, | ||||
| @@ -352,7 +352,7 @@ mod tests { | ||||
|                 let end = Bound::Included(255.); | ||||
|                 let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||
|                     &txn, | ||||
|                     index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), | ||||
|                     index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), | ||||
|                     0, | ||||
|                     &start, | ||||
|                     &end, | ||||
| @@ -371,7 +371,7 @@ mod tests { | ||||
|                 let end = Bound::Excluded(255.); | ||||
|                 let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||
|                     &txn, | ||||
|                     index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), | ||||
|                     index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), | ||||
|                     0, | ||||
|                     &start, | ||||
|                     &end, | ||||
| @@ -399,7 +399,7 @@ mod tests { | ||||
|                 let end = Bound::Included(255. - i); | ||||
|                 let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||
|                     &txn, | ||||
|                     index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), | ||||
|                     index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), | ||||
|                     0, | ||||
|                     &start, | ||||
|                     &end, | ||||
| @@ -418,7 +418,7 @@ mod tests { | ||||
|                 let end = Bound::Excluded(255. - i); | ||||
|                 let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||
|                     &txn, | ||||
|                     index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), | ||||
|                     index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), | ||||
|                     0, | ||||
|                     &start, | ||||
|                     &end, | ||||
|   | ||||
| @@ -2,19 +2,19 @@ use heed::Result; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{get_first_facet_value, get_highest_level}; | ||||
| use crate::heed_codec::facet::new::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, | ||||
| }; | ||||
|  | ||||
| pub fn ascending_facet_sort<'t>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     candidates: RoaringBitmap, | ||||
| ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { | ||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { | ||||
|         let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; | ||||
|     if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { | ||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||
|         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); | ||||
|  | ||||
|         Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] })) | ||||
| @@ -25,11 +25,11 @@ pub fn ascending_facet_sort<'t>( | ||||
|  | ||||
| struct AscendingFacetSort<'t, 'e> { | ||||
|     rtxn: &'t heed::RoTxn<'e>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     stack: Vec<( | ||||
|         RoaringBitmap, | ||||
|         std::iter::Take<heed::RoRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>, | ||||
|         std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>, | ||||
|     )>, | ||||
| } | ||||
|  | ||||
| @@ -41,7 +41,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { | ||||
|             let (documents_ids, deepest_iter) = self.stack.last_mut()?; | ||||
|             for result in deepest_iter { | ||||
|                 let ( | ||||
|                     FacetKey { level, left_bound, field_id }, | ||||
|                     FacetGroupKey { level, left_bound, field_id }, | ||||
|                     FacetGroupValue { size: group_size, mut bitmap }, | ||||
|                 ) = result.unwrap(); | ||||
|                 // The range is unbounded on the right and the group size for the highest level is MAX, | ||||
| @@ -65,7 +65,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { | ||||
|                         return Some(Ok(bitmap)); | ||||
|                     } | ||||
|                     let starting_key_below = | ||||
|                         FacetKey { field_id: self.field_id, level: level - 1, left_bound }; | ||||
|                         FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound }; | ||||
|                     let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) { | ||||
|                         Ok(iter) => iter, | ||||
|                         Err(e) => return Some(Err(e.into())), | ||||
| @@ -86,7 +86,7 @@ mod tests { | ||||
|     use rand::{Rng, SeedableRng}; | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::milli_snap; | ||||
|     use crate::search::facet::facet_sort_ascending::ascending_facet_sort; | ||||
|     use crate::search::facet::test::FacetIndex; | ||||
|   | ||||
| @@ -4,21 +4,21 @@ use heed::Result; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||
| use crate::heed_codec::facet::new::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, | ||||
| }; | ||||
|  | ||||
| pub fn descending_facet_sort<'t>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     candidates: RoaringBitmap, | ||||
| ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { | ||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { | ||||
|         let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; | ||||
|         let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap(); | ||||
|         let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound }; | ||||
|     if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { | ||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||
|         let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap(); | ||||
|         let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; | ||||
|         let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); | ||||
|         Ok(Box::new(DescendingFacetSort { | ||||
|             rtxn, | ||||
| @@ -33,11 +33,11 @@ pub fn descending_facet_sort<'t>( | ||||
|  | ||||
| struct DescendingFacetSort<'t> { | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     stack: Vec<( | ||||
|         RoaringBitmap, | ||||
|         std::iter::Take<heed::RoRevRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>, | ||||
|         std::iter::Take<heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>, | ||||
|         Bound<&'t [u8]>, | ||||
|     )>, | ||||
| } | ||||
| @@ -50,7 +50,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { | ||||
|             let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?; | ||||
|             while let Some(result) = deepest_iter.next() { | ||||
|                 let ( | ||||
|                     FacetKey { level, left_bound, field_id }, | ||||
|                     FacetGroupKey { level, left_bound, field_id }, | ||||
|                     FacetGroupValue { size: group_size, mut bitmap }, | ||||
|                 ) = result.unwrap(); | ||||
|                 // The range is unbounded on the right and the group size for the highest level is MAX, | ||||
| @@ -72,15 +72,15 @@ impl<'t> Iterator for DescendingFacetSort<'t> { | ||||
|                     if level == 0 { | ||||
|                         return Some(Ok(bitmap)); | ||||
|                     } | ||||
|                     let starting_key_below = FacetKey { field_id, level: level - 1, left_bound }; | ||||
|                     let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound }; | ||||
|  | ||||
|                     let end_key_kelow = match *right_bound { | ||||
|                         Bound::Included(right) => Bound::Included(FacetKey { | ||||
|                         Bound::Included(right) => Bound::Included(FacetGroupKey { | ||||
|                             field_id, | ||||
|                             level: level - 1, | ||||
|                             left_bound: right, | ||||
|                         }), | ||||
|                         Bound::Excluded(right) => Bound::Excluded(FacetKey { | ||||
|                         Bound::Excluded(right) => Bound::Excluded(FacetGroupKey { | ||||
|                             field_id, | ||||
|                             level: level - 1, | ||||
|                             left_bound: right, | ||||
| @@ -90,7 +90,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { | ||||
|                     let prev_right_bound = *right_bound; | ||||
|                     *right_bound = Bound::Excluded(left_bound); | ||||
|                     let iter = | ||||
|                         match self.db.remap_key_type::<FacetKeyCodec<MyByteSlice>>().rev_range( | ||||
|                         match self.db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>().rev_range( | ||||
|                             &self.rtxn, | ||||
|                             &(Bound::Included(starting_key_below), end_key_kelow), | ||||
|                         ) { | ||||
| @@ -114,8 +114,8 @@ mod tests { | ||||
|     use rand::{Rng, SeedableRng}; | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; | ||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef}; | ||||
|     use crate::milli_snap; | ||||
|     use crate::search::facet::facet_sort_descending::descending_facet_sort; | ||||
|     use crate::search::facet::test::FacetIndex; | ||||
| @@ -162,7 +162,7 @@ mod tests { | ||||
|             let txn = index.env.read_txn().unwrap(); | ||||
|             let candidates = (200..=300).into_iter().collect::<RoaringBitmap>(); | ||||
|             let mut results = String::new(); | ||||
|             let db = index.db.content.remap_key_type::<FacetKeyCodec<MyByteSlice>>(); | ||||
|             let db = index.db.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(); | ||||
|             let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); | ||||
|             for el in iter { | ||||
|                 let docids = el.unwrap(); | ||||
|   | ||||
| @@ -9,8 +9,8 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::facet_range_search; | ||||
| use crate::error::{Error, UserError}; | ||||
| use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec}; | ||||
| use crate::heed_codec::facet::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; | ||||
| use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result}; | ||||
|  | ||||
| /// The maximum number of filters the filter AST can process. | ||||
| @@ -180,7 +180,11 @@ impl<'a> Filter<'a> { | ||||
|                 let string_docids = strings_db | ||||
|                     .get( | ||||
|                         rtxn, | ||||
|                         &FacetKey { field_id, level: 0, left_bound: &val.value().to_lowercase() }, | ||||
|                         &FacetGroupKey { | ||||
|                             field_id, | ||||
|                             level: 0, | ||||
|                             left_bound: &val.value().to_lowercase(), | ||||
|                         }, | ||||
|                     )? | ||||
|                     .map(|v| v.bitmap) | ||||
|                     .unwrap_or_default(); | ||||
| @@ -218,10 +222,10 @@ impl<'a> Filter<'a> { | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .get_lower_than_or_equal_to( | ||||
|                 rtxn, | ||||
|                 &FacetKey { field_id, level: u8::MAX, left_bound: f64::MAX }, | ||||
|                 &FacetGroupKey { field_id, level: u8::MAX, left_bound: f64::MAX }, | ||||
|             )? | ||||
|             .and_then( | ||||
|                 |(FacetKey { field_id: id, level, .. }, _)| { | ||||
|                 |(FacetGroupKey { field_id: id, level, .. }, _)| { | ||||
|                     if id == field_id { | ||||
|                         Some(level) | ||||
|                     } else { | ||||
| @@ -252,7 +256,7 @@ impl<'a> Filter<'a> { | ||||
|     /// going deeper through the levels. | ||||
|     fn explore_facet_number_levels( | ||||
|         rtxn: &heed::RoTxn, | ||||
|         db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
|         db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
|         field_id: FieldId, | ||||
|         level: u8, | ||||
|         left: Bound<f64>, | ||||
|   | ||||
| @@ -3,7 +3,7 @@ use heed::{BytesDecode, RoTxn}; | ||||
|  | ||||
| pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; | ||||
| pub use self::filter::Filter; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; | ||||
| use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef}; | ||||
|  | ||||
| mod facet_distribution; | ||||
| mod facet_distribution_iter; | ||||
| @@ -14,7 +14,7 @@ mod filter; | ||||
|  | ||||
| pub(crate) fn get_first_facet_value<'t, BoundCodec>( | ||||
|     txn: &'t RoTxn, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ||||
| where | ||||
| @@ -28,7 +28,7 @@ where | ||||
|     if let Some(first) = level0_iter_forward.next() { | ||||
|         let (first_key, _) = first?; | ||||
|         let first_key = | ||||
|             FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?; | ||||
|             FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?; | ||||
|         Ok(Some(first_key.left_bound)) | ||||
|     } else { | ||||
|         Ok(None) | ||||
| @@ -36,7 +36,7 @@ where | ||||
| } | ||||
| pub(crate) fn get_last_facet_value<'t, BoundCodec>( | ||||
|     txn: &'t RoTxn, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ||||
| where | ||||
| @@ -51,7 +51,7 @@ where | ||||
|     if let Some(last) = level0_iter_backward.next() { | ||||
|         let (last_key, _) = last?; | ||||
|         let last_key = | ||||
|             FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?; | ||||
|             FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?; | ||||
|         Ok(Some(last_key.left_bound)) | ||||
|     } else { | ||||
|         Ok(None) | ||||
| @@ -59,7 +59,7 @@ where | ||||
| } | ||||
| pub(crate) fn get_highest_level<'t>( | ||||
|     txn: &'t RoTxn<'t>, | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
| ) -> heed::Result<u8> { | ||||
|     let field_id_prefix = &field_id.to_be_bytes(); | ||||
| @@ -69,7 +69,7 @@ pub(crate) fn get_highest_level<'t>( | ||||
|         .next() | ||||
|         .map(|el| { | ||||
|             let (key, _) = el.unwrap(); | ||||
|             let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(key).unwrap(); | ||||
|             let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap(); | ||||
|             key.level | ||||
|         }) | ||||
|         .unwrap_or(0)) | ||||
| @@ -84,8 +84,8 @@ pub mod test { | ||||
|     use heed::{BytesDecode, BytesEncode, Env, RwTxn}; | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     use crate::heed_codec::facet::new::{ | ||||
|         FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, | ||||
|     use crate::heed_codec::facet::{ | ||||
|         FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, | ||||
|     }; | ||||
|     use crate::snapshot_tests::display_bitmap; | ||||
|     use crate::update::FacetsUpdateIncremental; | ||||
| @@ -101,7 +101,7 @@ pub mod test { | ||||
|     } | ||||
|  | ||||
|     pub struct Database { | ||||
|         pub content: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|         pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|         pub group_size: usize, | ||||
|         pub max_group_size: usize, | ||||
|         _tempdir: Rc<tempfile::TempDir>, | ||||
| @@ -184,7 +184,7 @@ pub mod test { | ||||
|             let mut iter = self.db.content.iter(&txn).unwrap(); | ||||
|             while let Some(el) = iter.next() { | ||||
|                 let (key, value) = el.unwrap(); | ||||
|                 let FacetKey { field_id, level, left_bound: bound } = key; | ||||
|                 let FacetGroupKey { field_id, level, left_bound: bound } = key; | ||||
|                 let bound = BoundCodec::bytes_decode(bound).unwrap(); | ||||
|                 let FacetGroupValue { size, bitmap } = value; | ||||
|                 writeln!( | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use std::path::Path; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey}; | ||||
| use crate::heed_codec::facet::{FacetGroupValue, FacetGroupKey}; | ||||
| use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index}; | ||||
|  | ||||
| #[track_caller] | ||||
| @@ -280,7 +280,7 @@ pub fn snap_word_prefix_position_docids(index: &Index) -> String { | ||||
| } | ||||
| pub fn snap_facet_id_f64_docids(index: &Index) -> String { | ||||
|     let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |( | ||||
|         FacetKey { field_id, level, left_bound }, | ||||
|         FacetGroupKey { field_id, level, left_bound }, | ||||
|         FacetGroupValue { size, bitmap }, | ||||
|     )| { | ||||
|         &format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap)) | ||||
| @@ -289,7 +289,7 @@ pub fn snap_facet_id_f64_docids(index: &Index) -> String { | ||||
| } | ||||
| pub fn snap_facet_id_string_docids(index: &Index) -> String { | ||||
|     let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |( | ||||
|         FacetKey { field_id, level, left_bound }, | ||||
|         FacetGroupKey { field_id, level, left_bound }, | ||||
|         FacetGroupValue { size, bitmap }, | ||||
|     )| { | ||||
|         &format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap)) | ||||
|   | ||||
| @@ -11,7 +11,7 @@ use time::OffsetDateTime; | ||||
| use super::{ClearDocuments, FacetsUpdateBulk}; | ||||
| use crate::error::{InternalError, UserError}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; | ||||
| use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef}; | ||||
| use crate::heed_codec::CboRoaringBitmapCodec; | ||||
| use crate::index::{db_name, main_key}; | ||||
| use crate::{ | ||||
| @@ -626,10 +626,10 @@ fn remove_docids_from_facet_id_docids<'a>( | ||||
| ) -> Result<()> { | ||||
|     let db = match facet_type { | ||||
|         FacetType::String => { | ||||
|             index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|             index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|         } | ||||
|         FacetType::Number => { | ||||
|             index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|             index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|         } | ||||
|     }; | ||||
|     let mut modified = false; | ||||
|   | ||||
| @@ -12,8 +12,8 @@ use time::OffsetDateTime; | ||||
|  | ||||
| use crate::error::InternalError; | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::new::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, | ||||
| }; | ||||
| use crate::update::index_documents::{ | ||||
|     create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader, | ||||
| @@ -22,7 +22,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | ||||
|  | ||||
| pub struct FacetsUpdateBulk<'i> { | ||||
|     index: &'i Index, | ||||
|     database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     level_group_size: usize, | ||||
|     min_level_size: usize, | ||||
|     facet_type: FacetType, | ||||
| @@ -40,10 +40,10 @@ impl<'i> FacetsUpdateBulk<'i> { | ||||
|             index, | ||||
|             database: match facet_type { | ||||
|                 FacetType::String => { | ||||
|                     index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|                     index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|                 } | ||||
|                 FacetType::Number => { | ||||
|                     index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|                     index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|                 } | ||||
|             }, | ||||
|             level_group_size: 4, | ||||
| @@ -61,10 +61,10 @@ impl<'i> FacetsUpdateBulk<'i> { | ||||
|             index, | ||||
|             database: match facet_type { | ||||
|                 FacetType::String => { | ||||
|                     index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|                     index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|                 } | ||||
|                 FacetType::Number => { | ||||
|                     index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|                     index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|                 } | ||||
|             }, | ||||
|             level_group_size: 4, | ||||
| @@ -89,8 +89,8 @@ impl<'i> FacetsUpdateBulk<'i> { | ||||
|     } | ||||
|  | ||||
|     fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> { | ||||
|         let left = FacetKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; | ||||
|         let right = FacetKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; | ||||
|         let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; | ||||
|         let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; | ||||
|         let range = left..=right; | ||||
|         self.database.delete_range(wtxn, &range).map(drop)?; | ||||
|         Ok(()) | ||||
| @@ -119,7 +119,7 @@ impl<'i> FacetsUpdateBulk<'i> { | ||||
|             for level_reader in level_readers { | ||||
|                 let mut cursor = level_reader.into_cursor()?; | ||||
|                 while let Some((k, v)) = cursor.move_on_next()? { | ||||
|                     let key = FacetKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap(); | ||||
|                     let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap(); | ||||
|                     let value = FacetGroupValueCodec::bytes_decode(v).unwrap(); | ||||
|                     println!("inserting {key:?} {value:?}"); | ||||
|  | ||||
| @@ -210,7 +210,7 @@ impl<'i> FacetsUpdateBulk<'i> { | ||||
|  | ||||
| struct ComputeHigherLevels<'t> { | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     level_group_size: usize, | ||||
|     min_level_size: usize, | ||||
| @@ -233,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> { | ||||
|             .db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())? | ||||
|             .remap_types::<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>(); | ||||
|             .remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>(); | ||||
|  | ||||
|         let mut left_bound: &[u8] = &[]; | ||||
|         let mut first_iteration_for_new_group = true; | ||||
| @@ -311,9 +311,9 @@ impl<'t> ComputeHigherLevels<'t> { | ||||
|                 for ((bitmap, left_bound), group_size) in | ||||
|                     bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) | ||||
|                 { | ||||
|                     let key = FacetKey { field_id: self.field_id, level, left_bound }; | ||||
|                     let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; | ||||
|                     let key = | ||||
|                         FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?; | ||||
|                         FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?; | ||||
|                     let value = FacetGroupValue { size: group_size, bitmap }; | ||||
|                     let value = | ||||
|                         FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; | ||||
| @@ -329,9 +329,9 @@ impl<'t> ComputeHigherLevels<'t> { | ||||
|             for ((bitmap, left_bound), group_size) in | ||||
|                 bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) | ||||
|             { | ||||
|                 let key = FacetKey { field_id: self.field_id, level, left_bound }; | ||||
|                 let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; | ||||
|                 let key = | ||||
|                     FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?; | ||||
|                     FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?; | ||||
|                 let value = FacetGroupValue { size: group_size, bitmap }; | ||||
|                 let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; | ||||
|                 cur_writer.insert(key, value)?; | ||||
|   | ||||
| @@ -2,8 +2,8 @@ use heed::types::ByteSlice; | ||||
| use heed::{BytesDecode, Error, RoTxn, RwTxn}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::facet::new::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, | ||||
| }; | ||||
| use crate::search::facet::get_highest_level; | ||||
| use crate::Result; | ||||
| @@ -19,13 +19,13 @@ enum DeletionResult { | ||||
| } | ||||
|  | ||||
| pub struct FacetsUpdateIncremental { | ||||
|     db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     group_size: usize, | ||||
|     min_level_size: usize, | ||||
|     max_group_size: usize, | ||||
| } | ||||
| impl FacetsUpdateIncremental { | ||||
|     pub fn new(db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>) -> Self { | ||||
|     pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self { | ||||
|         Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 } | ||||
|     } | ||||
| } | ||||
| @@ -36,7 +36,7 @@ impl FacetsUpdateIncremental { | ||||
|         level: u8, | ||||
|         search_key: &[u8], | ||||
|         txn: &RoTxn, | ||||
|     ) -> Result<(FacetKey<Vec<u8>>, FacetGroupValue)> { | ||||
|     ) -> Result<(FacetGroupKey<Vec<u8>>, FacetGroupValue)> { | ||||
|         let mut prefix = vec![]; | ||||
|         prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|         prefix.push(level); | ||||
| @@ -45,17 +45,17 @@ impl FacetsUpdateIncremental { | ||||
|         let mut prefix_iter = self | ||||
|             .db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?; | ||||
|             .prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(txn, &prefix.as_slice())?; | ||||
|         if let Some(e) = prefix_iter.next() { | ||||
|             let (key_bytes, value) = e?; | ||||
|             Ok(( | ||||
|                 FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes) | ||||
|                 FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes) | ||||
|                     .ok_or(Error::Encoding)? | ||||
|                     .into_owned(), | ||||
|                 value, | ||||
|             )) | ||||
|         } else { | ||||
|             let key = FacetKey { field_id, level, left_bound: search_key }; | ||||
|             let key = FacetGroupKey { field_id, level, left_bound: search_key }; | ||||
|             match self.db.get_lower_than(txn, &key)? { | ||||
|                 Some((key, value)) => { | ||||
|                     if key.level != level || key.field_id != field_id { | ||||
| @@ -66,13 +66,13 @@ impl FacetsUpdateIncremental { | ||||
|                         let mut iter = self | ||||
|                             .db | ||||
|                             .as_polymorph() | ||||
|                             .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>( | ||||
|                             .prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>( | ||||
|                                 txn, | ||||
|                                 &prefix.as_slice(), | ||||
|                             )?; | ||||
|                         let (key_bytes, value) = iter.next().unwrap()?; | ||||
|                         Ok(( | ||||
|                             FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes) | ||||
|                             FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes) | ||||
|                                 .ok_or(Error::Encoding)? | ||||
|                                 .into_owned(), | ||||
|                             value, | ||||
| @@ -93,7 +93,7 @@ impl FacetsUpdateIncremental { | ||||
|         new_key: &[u8], | ||||
|         new_values: &RoaringBitmap, | ||||
|     ) -> Result<InsertionResult> { | ||||
|         let key = FacetKey { field_id, level: 0, left_bound: new_key }; | ||||
|         let key = FacetGroupKey { field_id, level: 0, left_bound: new_key }; | ||||
|         let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 }; | ||||
|  | ||||
|         let mut level0_prefix = vec![]; | ||||
| @@ -193,7 +193,7 @@ impl FacetsUpdateIncremental { | ||||
|                 .db | ||||
|                 .get_greater_than_or_equal_to( | ||||
|                     &txn, | ||||
|                     &FacetKey { | ||||
|                     &FacetGroupKey { | ||||
|                         field_id, | ||||
|                         level: level_below, | ||||
|                         left_bound: insertion_key.left_bound.as_slice(), | ||||
| @@ -217,7 +217,7 @@ impl FacetsUpdateIncremental { | ||||
|                 } | ||||
|  | ||||
|                 let key = | ||||
|                     FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; | ||||
|                     FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; | ||||
|                 let value = FacetGroupValue { size: size_left as u8, bitmap: values_left }; | ||||
|                 (key, value) | ||||
|             }; | ||||
| @@ -235,7 +235,7 @@ impl FacetsUpdateIncremental { | ||||
|                 } | ||||
|  | ||||
|                 let key = | ||||
|                     FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() }; | ||||
|                     FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() }; | ||||
|                 let value = FacetGroupValue { size: size_right as u8, bitmap: values_right }; | ||||
|                 (key, value) | ||||
|             }; | ||||
| @@ -303,7 +303,7 @@ impl FacetsUpdateIncremental { | ||||
|             let mut values = RoaringBitmap::new(); | ||||
|             for _ in 0..group_size { | ||||
|                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; | ||||
|                 let key_i = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes) | ||||
|                 let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes) | ||||
|                     .ok_or(Error::Encoding)?; | ||||
|  | ||||
|                 if first_key.is_none() { | ||||
| @@ -311,7 +311,7 @@ impl FacetsUpdateIncremental { | ||||
|                 } | ||||
|                 values |= value_i.bitmap; | ||||
|             } | ||||
|             let key = FacetKey { | ||||
|             let key = FacetGroupKey { | ||||
|                 field_id, | ||||
|                 level: highest_level + 1, | ||||
|                 left_bound: first_key.unwrap().left_bound, | ||||
| @@ -384,7 +384,7 @@ impl FacetsUpdateIncremental { | ||||
|         key: &[u8], | ||||
|         value: u32, | ||||
|     ) -> Result<DeletionResult> { | ||||
|         let key = FacetKey { field_id, level: 0, left_bound: key }; | ||||
|         let key = FacetGroupKey { field_id, level: 0, left_bound: key }; | ||||
|         let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap; | ||||
|         bitmap.remove(value); | ||||
|  | ||||
| @@ -415,7 +415,7 @@ impl FacetsUpdateIncremental { | ||||
|         key: &[u8], | ||||
|         value: u32, | ||||
|     ) -> Result<()> { | ||||
|         if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() { | ||||
|         if self.db.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: key })?.is_none() { | ||||
|             return Ok(()); | ||||
|         } | ||||
|         let highest_level = get_highest_level(&txn, self.db, field_id)?; | ||||
| @@ -450,7 +450,7 @@ impl FacetsUpdateIncremental { | ||||
|         while let Some(el) = iter.next() { | ||||
|             let (k, _) = el?; | ||||
|             to_delete.push( | ||||
|                 FacetKeyCodec::<MyByteSlice>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(), | ||||
|                 FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(), | ||||
|             ); | ||||
|         } | ||||
|         drop(iter); | ||||
| @@ -469,9 +469,9 @@ mod tests { | ||||
|     use rand::{Rng, SeedableRng}; | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::new::str_ref::StrRefCodec; | ||||
|     use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; | ||||
|     use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; | ||||
|     use crate::heed_codec::facet::str_ref::StrRefCodec; | ||||
|     use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef}; | ||||
|     use crate::milli_snap; | ||||
|     use crate::search::facet::get_highest_level; | ||||
|     use crate::search::facet::test::FacetIndex; | ||||
| @@ -502,7 +502,7 @@ mod tests { | ||||
|                 .unwrap(); | ||||
|             while let Some(el) = iter.next() { | ||||
|                 let (key, value) = el.unwrap(); | ||||
|                 let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key).unwrap(); | ||||
|                 let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap(); | ||||
|  | ||||
|                 let mut prefix_start_below = vec![]; | ||||
|                 prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); | ||||
| @@ -519,7 +519,7 @@ mod tests { | ||||
|                         ) | ||||
|                         .unwrap(); | ||||
|                     let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); | ||||
|                     FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes).unwrap() | ||||
|                     FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap() | ||||
|                 }; | ||||
|  | ||||
|                 assert!(value.size > 0 && (value.size as usize) < db.max_group_size); | ||||
| @@ -996,7 +996,7 @@ mod tests { | ||||
|  | ||||
| //             for ((key, values), group) in values_field_id.iter().zip(level0iter) { | ||||
| //                 let (group_key, group_values) = group.unwrap(); | ||||
| //                 let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap(); | ||||
| //                 let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap(); | ||||
| //                 assert_eq!(key, &group_key.left_bound); | ||||
| //                 assert_eq!(values, &group_values.bitmap); | ||||
| //             } | ||||
| @@ -1014,7 +1014,7 @@ mod tests { | ||||
|  | ||||
| //             for ((key, values), group) in values_field_id.iter().zip(level0iter) { | ||||
| //                 let (group_key, group_values) = group.unwrap(); | ||||
| //                 let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap(); | ||||
| //                 let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap(); | ||||
| //                 assert_eq!(key, &group_key.left_bound); | ||||
| //                 assert_eq!(values, &group_values.bitmap); | ||||
| //             } | ||||
|   | ||||
| @@ -1,23 +1,20 @@ | ||||
| use std::{collections::HashMap, fs::File}; | ||||
|  | ||||
| use super::{FacetsUpdateBulk, FacetsUpdateIncremental}; | ||||
| use crate::{ | ||||
|     facet::FacetType, | ||||
|     heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}, | ||||
|     CboRoaringBitmapCodec, FieldId, Index, Result, | ||||
| }; | ||||
| use grenad::CompressionType; | ||||
| use heed::BytesDecode; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::{ | ||||
|     facet::FacetType, | ||||
|     heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}, | ||||
|     CboRoaringBitmapCodec, FieldId, Index, Result, | ||||
| }; | ||||
|  | ||||
| use super::{FacetsUpdateBulk, FacetsUpdateIncremental}; | ||||
| use std::{collections::HashMap, fs::File}; | ||||
|  | ||||
| pub mod bulk; | ||||
| pub mod incremental; | ||||
|  | ||||
| pub struct FacetsUpdate<'i> { | ||||
|     index: &'i Index, | ||||
|     database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, | ||||
|     level_group_size: u8, | ||||
|     max_level_group_size: u8, | ||||
|     min_level_size: u8, | ||||
| @@ -28,10 +25,10 @@ impl<'i> FacetsUpdate<'i> { | ||||
|     pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self { | ||||
|         let database = match facet_type { | ||||
|             FacetType::String => { | ||||
|                 index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|                 index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|             } | ||||
|             FacetType::Number => { | ||||
|                 index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() | ||||
|                 index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() | ||||
|             } | ||||
|         }; | ||||
|         Self { | ||||
| @@ -70,8 +67,8 @@ impl<'i> FacetsUpdate<'i> { | ||||
|  | ||||
|             let mut cursor = self.new_data.into_cursor()?; | ||||
|             while let Some((key, value)) = cursor.move_on_next()? { | ||||
|                 let key = | ||||
|                     FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?; | ||||
|                 let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key) | ||||
|                     .ok_or(heed::Error::Encoding)?; | ||||
|                 let docids = | ||||
|                     CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; | ||||
|                 indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?; | ||||
|   | ||||
| @@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode}; | ||||
| use super::helpers::{ | ||||
|     create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters, | ||||
| }; | ||||
| use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec}; | ||||
| use crate::heed_codec::facet::FieldDocIdFacetF64Codec; | ||||
| use crate::heed_codec::facet::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; | ||||
| use crate::Result; | ||||
|  | ||||
| /// Extracts the facet number and the documents ids where this facet number appear. | ||||
| @@ -36,8 +36,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>( | ||||
|         let (field_id, document_id, number) = | ||||
|             FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap(); | ||||
|  | ||||
|         let key = FacetKey { field_id, level: 0, left_bound: number }; | ||||
|         let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap(); | ||||
|         let key = FacetGroupKey { field_id, level: 0, left_bound: number }; | ||||
|         let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap(); | ||||
|  | ||||
|         facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?; | ||||
|     } | ||||
|   | ||||
| @@ -4,8 +4,8 @@ use std::io; | ||||
| use heed::BytesEncode; | ||||
|  | ||||
| use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; | ||||
| use crate::heed_codec::facet::new::str_ref::StrRefCodec; | ||||
| use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec}; | ||||
| use crate::heed_codec::facet::StrRefCodec; | ||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; | ||||
| use crate::update::index_documents::merge_cbo_roaring_bitmaps; | ||||
| use crate::{FieldId, Result}; | ||||
|  | ||||
| @@ -43,8 +43,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>( | ||||
|         let document_id = u32::from_be_bytes(document_id_bytes); | ||||
|  | ||||
|         let normalised_value = std::str::from_utf8(normalized_value_bytes)?; | ||||
|         let key = FacetKey { field_id, level: 0, left_bound: normalised_value }; | ||||
|         let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap(); | ||||
|         let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value }; | ||||
|         let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap(); | ||||
|  | ||||
|         facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?; | ||||
|     } | ||||
|   | ||||
| @@ -0,0 +1,4 @@ | ||||
| --- | ||||
| source: milli/src/update/word_prefix_pair_proximity_docids.rs | ||||
| --- | ||||
| 6873ff1f78d08f2b1a13bb9e37349c01 | ||||
		Reference in New Issue
	
	Block a user