mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Move StrRefCodec and ByteSliceRefCodec to their own files
This commit is contained in:
		| @@ -57,7 +57,7 @@ md5 = "0.7.0" | |||||||
| rand = {version = "0.8.5", features = ["small_rng"] } | rand = {version = "0.8.5", features = ["small_rng"] } | ||||||
|  |  | ||||||
| [target.'cfg(fuzzing)'.dev-dependencies] | [target.'cfg(fuzzing)'.dev-dependencies] | ||||||
| fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" } | fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" } # TODO: use released version | ||||||
|  |  | ||||||
| [features] | [features] | ||||||
| default = [ "charabia/default" ] | default = [ "charabia/default" ] | ||||||
|   | |||||||
							
								
								
									
										23
									
								
								milli/src/heed_codec/byte_slice_ref.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								milli/src/heed_codec/byte_slice_ref.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | |||||||
|  | use std::borrow::Cow; | ||||||
|  |  | ||||||
|  | use heed::{BytesDecode, BytesEncode}; | ||||||
|  |  | ||||||
|  | /// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated | ||||||
|  | /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. | ||||||
|  | pub struct ByteSliceRefCodec; | ||||||
|  |  | ||||||
|  | impl<'a> BytesEncode<'a> for ByteSliceRefCodec { | ||||||
|  |     type EItem = &'a [u8]; | ||||||
|  |  | ||||||
|  |     fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||||
|  |         Some(Cow::Borrowed(item)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> BytesDecode<'a> for ByteSliceRefCodec { | ||||||
|  |     type DItem = &'a [u8]; | ||||||
|  |  | ||||||
|  |     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||||
|  |         Some(bytes) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,6 +1,5 @@ | |||||||
| mod field_doc_id_facet_codec; | mod field_doc_id_facet_codec; | ||||||
| mod ordered_f64_codec; | mod ordered_f64_codec; | ||||||
| mod str_ref; |  | ||||||
|  |  | ||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
| use std::convert::TryFrom; | use std::convert::TryFrom; | ||||||
| @@ -12,9 +11,10 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec; | pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec; | ||||||
| pub use self::ordered_f64_codec::OrderedF64Codec; | pub use self::ordered_f64_codec::OrderedF64Codec; | ||||||
| pub use self::str_ref::StrRefCodec; |  | ||||||
| use crate::{CboRoaringBitmapCodec, BEU16}; | use crate::{CboRoaringBitmapCodec, BEU16}; | ||||||
|  |  | ||||||
|  | use super::StrRefCodec; | ||||||
|  |  | ||||||
| pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>; | pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>; | ||||||
| pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>; | pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>; | ||||||
| pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>; | pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>; | ||||||
| @@ -33,7 +33,7 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> { | |||||||
|  |  | ||||||
| /// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`] | /// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`] | ||||||
| /// databases. | /// databases. | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] // TODO: try removing PartialOrd and Ord | ||||||
| pub struct FacetGroupKey<T> { | pub struct FacetGroupKey<T> { | ||||||
|     pub field_id: u16, |     pub field_id: u16, | ||||||
|     pub level: u8, |     pub level: u8, | ||||||
| @@ -103,23 +103,3 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | |||||||
|         Some(FacetGroupValue { size, bitmap }) |         Some(FacetGroupValue { size, bitmap }) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated |  | ||||||
| /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. |  | ||||||
| pub struct ByteSliceRef; |  | ||||||
|  |  | ||||||
| impl<'a> BytesEncode<'a> for ByteSliceRef { |  | ||||||
|     type EItem = &'a [u8]; |  | ||||||
|  |  | ||||||
|     fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { |  | ||||||
|         Some(Cow::Borrowed(item)) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'a> BytesDecode<'a> for ByteSliceRef { |  | ||||||
|     type DItem = &'a [u8]; |  | ||||||
|  |  | ||||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { |  | ||||||
|         Some(bytes) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -1,10 +1,12 @@ | |||||||
| mod beu32_str_codec; | mod beu32_str_codec; | ||||||
|  | mod byte_slice_ref; | ||||||
| pub mod facet; | pub mod facet; | ||||||
| mod field_id_word_count_codec; | mod field_id_word_count_codec; | ||||||
| mod obkv_codec; | mod obkv_codec; | ||||||
| mod roaring_bitmap; | mod roaring_bitmap; | ||||||
| mod roaring_bitmap_length; | mod roaring_bitmap_length; | ||||||
| mod str_beu32_codec; | mod str_beu32_codec; | ||||||
|  | mod str_ref; | ||||||
| mod str_str_u8_codec; | mod str_str_u8_codec; | ||||||
|  |  | ||||||
| pub use self::beu32_str_codec::BEU32StrCodec; | pub use self::beu32_str_codec::BEU32StrCodec; | ||||||
| @@ -16,3 +18,5 @@ pub use self::roaring_bitmap_length::{ | |||||||
| }; | }; | ||||||
| pub use self::str_beu32_codec::StrBEU32Codec; | pub use self::str_beu32_codec::StrBEU32Codec; | ||||||
| pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; | pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; | ||||||
|  | pub use byte_slice_ref::ByteSliceRefCodec; | ||||||
|  | pub use str_ref::StrRefCodec; | ||||||
|   | |||||||
| @@ -16,8 +16,9 @@ use crate::facet::FacetType; | |||||||
| use crate::fields_ids_map::FieldsIdsMap; | use crate::fields_ids_map::FieldsIdsMap; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, |     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, | ||||||
|     FieldIdCodec, OrderedF64Codec, StrRefCodec, |     FieldIdCodec, OrderedF64Codec, | ||||||
| }; | }; | ||||||
|  | use crate::heed_codec::StrRefCodec; | ||||||
| use crate::{ | use crate::{ | ||||||
|     default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, |     default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, | ||||||
|     DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, |     DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, | ||||||
|   | |||||||
| @@ -7,7 +7,8 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use super::{Criterion, CriterionParameters, CriterionResult}; | use super::{Criterion, CriterionParameters, CriterionResult}; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; | use crate::heed_codec::facet::FacetGroupKeyCodec; | ||||||
|  | use crate::heed_codec::ByteSliceRefCodec; | ||||||
| use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; | use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; | ||||||
| use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; | use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; | ||||||
| use crate::search::query_tree::Operation; | use crate::search::query_tree::Operation; | ||||||
| @@ -194,14 +195,14 @@ fn facet_ordered<'t>( | |||||||
|  |  | ||||||
|         let number_iter = make_iter( |         let number_iter = make_iter( | ||||||
|             rtxn, |             rtxn, | ||||||
|             index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), |             index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|             field_id, |             field_id, | ||||||
|             candidates.clone(), |             candidates.clone(), | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|         let string_iter = make_iter( |         let string_iter = make_iter( | ||||||
|             rtxn, |             rtxn, | ||||||
|             index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), |             index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|             field_id, |             field_id, | ||||||
|             candidates, |             candidates, | ||||||
|         )?; |         )?; | ||||||
|   | |||||||
| @@ -9,9 +9,10 @@ use roaring::RoaringBitmap; | |||||||
| use crate::error::UserError; | use crate::error::UserError; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, |     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, | ||||||
|     FieldDocIdFacetStringCodec, OrderedF64Codec, StrRefCodec, |     OrderedF64Codec, | ||||||
| }; | }; | ||||||
|  | use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; | ||||||
| use crate::search::facet::facet_distribution_iter; | use crate::search::facet::facet_distribution_iter; | ||||||
| use crate::{FieldId, Index, Result}; | use crate::{FieldId, Index, Result}; | ||||||
|  |  | ||||||
| @@ -137,7 +138,9 @@ impl<'a> FacetDistribution<'a> { | |||||||
|     ) -> heed::Result<()> { |     ) -> heed::Result<()> { | ||||||
|         facet_distribution_iter::iterate_over_facet_distribution( |         facet_distribution_iter::iterate_over_facet_distribution( | ||||||
|             self.rtxn, |             self.rtxn, | ||||||
|             self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), |             self.index | ||||||
|  |                 .facet_id_f64_docids | ||||||
|  |                 .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|             field_id, |             field_id, | ||||||
|             candidates, |             candidates, | ||||||
|             |facet_key, nbr_docids, _| { |             |facet_key, nbr_docids, _| { | ||||||
| @@ -160,7 +163,9 @@ impl<'a> FacetDistribution<'a> { | |||||||
|     ) -> heed::Result<()> { |     ) -> heed::Result<()> { | ||||||
|         facet_distribution_iter::iterate_over_facet_distribution( |         facet_distribution_iter::iterate_over_facet_distribution( | ||||||
|             self.rtxn, |             self.rtxn, | ||||||
|             self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), |             self.index | ||||||
|  |                 .facet_id_string_docids | ||||||
|  |                 .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|             field_id, |             field_id, | ||||||
|             candidates, |             candidates, | ||||||
|             |facet_key, nbr_docids, any_docid| { |             |facet_key, nbr_docids, any_docid| { | ||||||
|   | |||||||
| @@ -4,9 +4,8 @@ use heed::Result; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level}; | use super::{get_first_facet_value, get_highest_level}; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; | ||||||
|     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, | use crate::heed_codec::ByteSliceRefCodec; | ||||||
| }; |  | ||||||
| use crate::DocumentId; | use crate::DocumentId; | ||||||
|  |  | ||||||
| /// Call the given closure on the facet distribution of the candidate documents. | /// Call the given closure on the facet distribution of the candidate documents. | ||||||
| @@ -22,7 +21,7 @@ use crate::DocumentId; | |||||||
| /// keep iterating over the different facet values or stop. | /// keep iterating over the different facet values or stop. | ||||||
| pub fn iterate_over_facet_distribution<'t, CB>( | pub fn iterate_over_facet_distribution<'t, CB>( | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     candidates: &RoaringBitmap, |     candidates: &RoaringBitmap, | ||||||
|     callback: CB, |     callback: CB, | ||||||
| @@ -31,10 +30,13 @@ where | |||||||
|     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, |     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, | ||||||
| { | { | ||||||
|     let mut fd = FacetDistribution { rtxn, db, field_id, callback }; |     let mut fd = FacetDistribution { rtxn, db, field_id, callback }; | ||||||
|     let highest_level = |     let highest_level = get_highest_level( | ||||||
|         get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?; |         rtxn, | ||||||
|  |         db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|  |         field_id, | ||||||
|  |     )?; | ||||||
|  |  | ||||||
|     if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? { | ||||||
|         fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; |         fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; | ||||||
|         return Ok(()); |         return Ok(()); | ||||||
|     } else { |     } else { | ||||||
| @@ -47,7 +49,7 @@ where | |||||||
|     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, |     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, | ||||||
| { | { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     callback: CB, |     callback: CB, | ||||||
| } | } | ||||||
| @@ -72,11 +74,13 @@ where | |||||||
|             if key.field_id != self.field_id { |             if key.field_id != self.field_id { | ||||||
|                 return Ok(ControlFlow::Break(())); |                 return Ok(ControlFlow::Break(())); | ||||||
|             } |             } | ||||||
|  |             // TODO: use real intersection and then take min()? | ||||||
|             let docids_in_common = value.bitmap.intersection_len(candidates); |             let docids_in_common = value.bitmap.intersection_len(candidates); | ||||||
|             if docids_in_common > 0 { |             if docids_in_common > 0 { | ||||||
|  |                 // TODO: use min() | ||||||
|                 let any_docid = value.bitmap.iter().next().unwrap(); |                 let any_docid = value.bitmap.iter().next().unwrap(); | ||||||
|                 match (self.callback)(key.left_bound, docids_in_common, any_docid)? { |                 match (self.callback)(key.left_bound, docids_in_common, any_docid)? { | ||||||
|                     ControlFlow::Continue(_) => {} |                     ControlFlow::Continue(_) => (), // TODO use unit instead of empty scope | ||||||
|                     ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), |                     ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|   | |||||||
| @@ -4,9 +4,8 @@ use heed::BytesEncode; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; | ||||||
|     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, | use crate::heed_codec::ByteSliceRefCodec; | ||||||
| }; |  | ||||||
| use crate::Result; | use crate::Result; | ||||||
|  |  | ||||||
| /// Find all the document ids for which the given field contains a value contained within | /// Find all the document ids for which the given field contains a value contained within | ||||||
| @@ -47,13 +46,16 @@ where | |||||||
|         } |         } | ||||||
|         Bound::Unbounded => Bound::Unbounded, |         Bound::Unbounded => Bound::Unbounded, | ||||||
|     }; |     }; | ||||||
|     let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(); |     let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); | ||||||
|     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; |     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|  |  | ||||||
|     if let Some(starting_left_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { |     if let Some(starting_left_bound) = | ||||||
|         let rightmost_bound = |         get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? | ||||||
|             Bound::Included(get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded |     { | ||||||
|  |         let rightmost_bound = Bound::Included( | ||||||
|  |             get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(), | ||||||
|  |         ); // will not fail because get_first_facet_value succeeded | ||||||
|         let group_size = usize::MAX; |         let group_size = usize::MAX; | ||||||
|         f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; |         f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -65,7 +67,7 @@ where | |||||||
| /// Fetch the document ids that have a facet with a value between the two given bounds | /// Fetch the document ids that have a facet with a value between the two given bounds | ||||||
| struct FacetRangeSearch<'t, 'b, 'bitmap> { | struct FacetRangeSearch<'t, 'b, 'bitmap> { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     left: Bound<&'b [u8]>, |     left: Bound<&'b [u8]>, | ||||||
|     right: Bound<&'b [u8]>, |     right: Bound<&'b [u8]>, | ||||||
|   | |||||||
| @@ -3,8 +3,9 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level}; | use super::{get_first_facet_value, get_highest_level}; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, |     FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
| }; | }; | ||||||
|  | use crate::heed_codec::ByteSliceRefCodec; | ||||||
|  |  | ||||||
| /// Return an iterator which iterates over the given candidate documents in | /// Return an iterator which iterates over the given candidate documents in | ||||||
| /// ascending order of their facet value for the given field id. | /// ascending order of their facet value for the given field id. | ||||||
| @@ -30,12 +31,12 @@ use crate::heed_codec::facet::{ | |||||||
| /// Note that once a document id is returned by the iterator, it is never returned again. | /// Note that once a document id is returned by the iterator, it is never returned again. | ||||||
| pub fn ascending_facet_sort<'t>( | pub fn ascending_facet_sort<'t>( | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
| ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { | ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|     if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? { | ||||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
|         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); |         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); | ||||||
|  |  | ||||||
| @@ -47,11 +48,13 @@ pub fn ascending_facet_sort<'t>( | |||||||
|  |  | ||||||
| struct AscendingFacetSort<'t, 'e> { | struct AscendingFacetSort<'t, 'e> { | ||||||
|     rtxn: &'t heed::RoTxn<'e>, |     rtxn: &'t heed::RoTxn<'e>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     stack: Vec<( |     stack: Vec<( | ||||||
|         RoaringBitmap, |         RoaringBitmap, | ||||||
|         std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>, |         std::iter::Take< | ||||||
|  |             heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|  |         >, | ||||||
|     )>, |     )>, | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,22 +5,23 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, |     FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
| }; | }; | ||||||
|  | use crate::heed_codec::ByteSliceRefCodec; | ||||||
|  |  | ||||||
| /// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort). | /// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort). | ||||||
| /// | /// | ||||||
| /// This function does the same thing, but in the opposite order. | /// This function does the same thing, but in the opposite order. | ||||||
| pub fn descending_facet_sort<'t>( | pub fn descending_facet_sort<'t>( | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
| ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { | ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|     if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? { | ||||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
|         let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap(); |         let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(); | ||||||
|         let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; |         let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; | ||||||
|         let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); |         let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); | ||||||
|         Ok(Box::new(DescendingFacetSort { |         Ok(Box::new(DescendingFacetSort { | ||||||
| @@ -36,12 +37,12 @@ pub fn descending_facet_sort<'t>( | |||||||
|  |  | ||||||
| struct DescendingFacetSort<'t> { | struct DescendingFacetSort<'t> { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     stack: Vec<( |     stack: Vec<( | ||||||
|         RoaringBitmap, |         RoaringBitmap, | ||||||
|         std::iter::Take< |         std::iter::Take< | ||||||
|             heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |             heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|         >, |         >, | ||||||
|         Bound<&'t [u8]>, |         Bound<&'t [u8]>, | ||||||
|     )>, |     )>, | ||||||
| @@ -97,7 +98,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { | |||||||
|                     *right_bound = Bound::Excluded(left_bound); |                     *right_bound = Bound::Excluded(left_bound); | ||||||
|                     let iter = match self |                     let iter = match self | ||||||
|                         .db |                         .db | ||||||
|                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>() | ||||||
|                         .rev_range( |                         .rev_range( | ||||||
|                             &self.rtxn, |                             &self.rtxn, | ||||||
|                             &(Bound::Included(starting_key_below), end_key_kelow), |                             &(Bound::Included(starting_key_below), end_key_kelow), | ||||||
| @@ -121,7 +122,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> { | |||||||
| mod tests { | mod tests { | ||||||
|     use roaring::RoaringBitmap; |     use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|     use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; |     use crate::heed_codec::facet::FacetGroupKeyCodec; | ||||||
|  |     use crate::heed_codec::ByteSliceRefCodec; | ||||||
|     use crate::milli_snap; |     use crate::milli_snap; | ||||||
|     use crate::search::facet::facet_sort_descending::descending_facet_sort; |     use crate::search::facet::facet_sort_descending::descending_facet_sort; | ||||||
|     use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; |     use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; | ||||||
| @@ -134,7 +136,7 @@ mod tests { | |||||||
|             let txn = index.env.read_txn().unwrap(); |             let txn = index.env.read_txn().unwrap(); | ||||||
|             let candidates = (200..=300).into_iter().collect::<RoaringBitmap>(); |             let candidates = (200..=300).into_iter().collect::<RoaringBitmap>(); | ||||||
|             let mut results = String::new(); |             let mut results = String::new(); | ||||||
|             let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(); |             let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); | ||||||
|             let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); |             let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); | ||||||
|             for el in iter { |             for el in iter { | ||||||
|                 let docids = el.unwrap(); |                 let docids = el.unwrap(); | ||||||
|   | |||||||
| @@ -5,8 +5,8 @@ use heed::{BytesDecode, RoTxn}; | |||||||
|  |  | ||||||
| pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; | pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; | ||||||
| pub use self::filter::Filter; | pub use self::filter::Filter; | ||||||
| use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; | use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; | ||||||
|  | use crate::heed_codec::ByteSliceRefCodec; | ||||||
| mod facet_distribution; | mod facet_distribution; | ||||||
| mod facet_distribution_iter; | mod facet_distribution_iter; | ||||||
| mod facet_range_search; | mod facet_range_search; | ||||||
| @@ -17,7 +17,7 @@ mod filter; | |||||||
| /// Get the first facet value in the facet database | /// Get the first facet value in the facet database | ||||||
| pub(crate) fn get_first_facet_value<'t, BoundCodec>( | pub(crate) fn get_first_facet_value<'t, BoundCodec>( | ||||||
|     txn: &'t RoTxn, |     txn: &'t RoTxn, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ) -> heed::Result<Option<BoundCodec::DItem>> | ||||||
| where | where | ||||||
| @@ -42,7 +42,7 @@ where | |||||||
| /// Get the last facet value in the facet database | /// Get the last facet value in the facet database | ||||||
| pub(crate) fn get_last_facet_value<'t, BoundCodec>( | pub(crate) fn get_last_facet_value<'t, BoundCodec>( | ||||||
|     txn: &'t RoTxn, |     txn: &'t RoTxn, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ) -> heed::Result<Option<BoundCodec::DItem>> | ||||||
| where | where | ||||||
| @@ -67,7 +67,7 @@ where | |||||||
| /// Get the height of the highest level in the facet database | /// Get the height of the highest level in the facet database | ||||||
| pub(crate) fn get_highest_level<'t>( | pub(crate) fn get_highest_level<'t>( | ||||||
|     txn: &'t RoTxn<'t>, |     txn: &'t RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<u8> { | ) -> heed::Result<u8> { | ||||||
|     let field_id_prefix = &field_id.to_be_bytes(); |     let field_id_prefix = &field_id.to_be_bytes(); | ||||||
| @@ -77,7 +77,7 @@ pub(crate) fn get_highest_level<'t>( | |||||||
|         .next() |         .next() | ||||||
|         .map(|el| { |         .map(|el| { | ||||||
|             let (key, _) = el.unwrap(); |             let (key, _) = el.unwrap(); | ||||||
|             let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap(); |             let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap(); | ||||||
|             key.level |             key.level | ||||||
|         }) |         }) | ||||||
|         .unwrap_or(0)) |         .unwrap_or(0)) | ||||||
|   | |||||||
| @@ -11,8 +11,9 @@ use time::OffsetDateTime; | |||||||
| use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; | use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, |     FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
| }; | }; | ||||||
|  | use crate::heed_codec::ByteSliceRefCodec; | ||||||
| use crate::update::index_documents::{create_writer, writer_into_reader}; | use crate::update::index_documents::{create_writer, writer_into_reader}; | ||||||
| use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | ||||||
|  |  | ||||||
| @@ -75,11 +76,11 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|         let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self; |         let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self; | ||||||
|  |  | ||||||
|         let db = match facet_type { |         let db = match facet_type { | ||||||
|             FacetType::String => { |             FacetType::String => index | ||||||
|                 index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                 .facet_id_string_docids | ||||||
|             } |                 .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|             FacetType::Number => { |             FacetType::Number => { | ||||||
|                 index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                 index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>() | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -98,7 +99,7 @@ impl<'i> FacetsUpdateBulk<'i> { | |||||||
|  |  | ||||||
| /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type | /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type | ||||||
| pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> { | pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> { | ||||||
|     pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     pub new_data: Option<grenad::Reader<R>>, |     pub new_data: Option<grenad::Reader<R>>, | ||||||
|     pub group_size: u8, |     pub group_size: u8, | ||||||
|     pub min_level_size: u8, |     pub min_level_size: u8, | ||||||
| @@ -216,7 +217,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | |||||||
|             .db |             .db | ||||||
|             .as_polymorph() |             .as_polymorph() | ||||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())? |             .prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())? | ||||||
|             .remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>(); |             .remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>(); | ||||||
|  |  | ||||||
|         let mut left_bound: &[u8] = &[]; |         let mut left_bound: &[u8] = &[]; | ||||||
|         let mut first_iteration_for_new_group = true; |         let mut first_iteration_for_new_group = true; | ||||||
| @@ -299,7 +300,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | |||||||
|                     bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) |                     bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) | ||||||
|                 { |                 { | ||||||
|                     let key = FacetGroupKey { field_id, level, left_bound }; |                     let key = FacetGroupKey { field_id, level, left_bound }; | ||||||
|                     let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key) |                     let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key) | ||||||
|                         .ok_or(Error::Encoding)?; |                         .ok_or(Error::Encoding)?; | ||||||
|                     let value = FacetGroupValue { size: group_size, bitmap }; |                     let value = FacetGroupValue { size: group_size, bitmap }; | ||||||
|                     let value = |                     let value = | ||||||
| @@ -328,7 +329,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | |||||||
|                 bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) |                 bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) | ||||||
|             { |             { | ||||||
|                 let key = FacetGroupKey { field_id, level, left_bound }; |                 let key = FacetGroupKey { field_id, level, left_bound }; | ||||||
|                 let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key) |                 let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key) | ||||||
|                     .ok_or(Error::Encoding)?; |                     .ok_or(Error::Encoding)?; | ||||||
|                 let value = FacetGroupValue { size: group_size, bitmap }; |                 let value = FacetGroupValue { size: group_size, bitmap }; | ||||||
|                 let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; |                 let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; | ||||||
|   | |||||||
| @@ -1,7 +1,8 @@ | |||||||
| use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; | use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; | ||||||
| use crate::{ | use crate::{ | ||||||
|     facet::FacetType, |     facet::FacetType, | ||||||
|     heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}, |     heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}, | ||||||
|  |     heed_codec::ByteSliceRefCodec, | ||||||
|     update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner}, |     update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner}, | ||||||
|     FieldId, Index, Result, |     FieldId, Index, Result, | ||||||
| }; | }; | ||||||
| @@ -11,7 +12,7 @@ use std::collections::{HashMap, HashSet}; | |||||||
|  |  | ||||||
| pub struct FacetsDelete<'i, 'b> { | pub struct FacetsDelete<'i, 'b> { | ||||||
|     index: &'i Index, |     index: &'i Index, | ||||||
|     database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     facet_type: FacetType, |     facet_type: FacetType, | ||||||
|     affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>, |     affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>, | ||||||
|     docids_to_delete: &'b RoaringBitmap, |     docids_to_delete: &'b RoaringBitmap, | ||||||
| @@ -27,11 +28,11 @@ impl<'i, 'b> FacetsDelete<'i, 'b> { | |||||||
|         docids_to_delete: &'b RoaringBitmap, |         docids_to_delete: &'b RoaringBitmap, | ||||||
|     ) -> Self { |     ) -> Self { | ||||||
|         let database = match facet_type { |         let database = match facet_type { | ||||||
|             FacetType::String => { |             FacetType::String => index | ||||||
|                 index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                 .facet_id_string_docids | ||||||
|             } |                 .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|             FacetType::Number => { |             FacetType::Number => { | ||||||
|                 index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                 index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>() | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|         Self { |         Self { | ||||||
|   | |||||||
| @@ -7,8 +7,9 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, |     FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
| }; | }; | ||||||
|  | use crate::heed_codec::ByteSliceRefCodec; | ||||||
| use crate::search::facet::get_highest_level; | use crate::search::facet::get_highest_level; | ||||||
| use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | ||||||
|  |  | ||||||
| @@ -50,10 +51,10 @@ impl<'i> FacetsUpdateIncremental<'i> { | |||||||
|                 db: match facet_type { |                 db: match facet_type { | ||||||
|                     FacetType::String => index |                     FacetType::String => index | ||||||
|                         .facet_id_string_docids |                         .facet_id_string_docids | ||||||
|                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), |                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|                     FacetType::Number => index |                     FacetType::Number => index | ||||||
|                         .facet_id_f64_docids |                         .facet_id_f64_docids | ||||||
|                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), |                         .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|                 }, |                 }, | ||||||
|                 group_size, |                 group_size, | ||||||
|                 max_group_size, |                 max_group_size, | ||||||
| @@ -69,7 +70,7 @@ impl<'i> FacetsUpdateIncremental<'i> { | |||||||
|  |  | ||||||
|         let mut cursor = self.new_data.into_cursor()?; |         let mut cursor = self.new_data.into_cursor()?; | ||||||
|         while let Some((key, value)) = cursor.move_on_next()? { |         while let Some((key, value)) = cursor.move_on_next()? { | ||||||
|             let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key) |             let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key) | ||||||
|                 .ok_or(heed::Error::Encoding)?; |                 .ok_or(heed::Error::Encoding)?; | ||||||
|             let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; |             let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; | ||||||
|             self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?; |             self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?; | ||||||
| @@ -87,7 +88,7 @@ impl<'i> FacetsUpdateIncremental<'i> { | |||||||
|  |  | ||||||
| /// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type | /// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type | ||||||
| pub struct FacetsUpdateIncrementalInner { | pub struct FacetsUpdateIncrementalInner { | ||||||
|     pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     pub group_size: u8, |     pub group_size: u8, | ||||||
|     pub min_level_size: u8, |     pub min_level_size: u8, | ||||||
|     pub max_group_size: u8, |     pub max_group_size: u8, | ||||||
| @@ -126,7 +127,7 @@ impl FacetsUpdateIncrementalInner { | |||||||
|         if let Some(e) = prefix_iter.next() { |         if let Some(e) = prefix_iter.next() { | ||||||
|             let (key_bytes, value) = e?; |             let (key_bytes, value) = e?; | ||||||
|             Ok(( |             Ok(( | ||||||
|                 FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes) |                 FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes) | ||||||
|                     .ok_or(Error::Encoding)? |                     .ok_or(Error::Encoding)? | ||||||
|                     .into_owned(), |                     .into_owned(), | ||||||
|                 value, |                 value, | ||||||
| @@ -149,7 +150,7 @@ impl FacetsUpdateIncrementalInner { | |||||||
|                             )?; |                             )?; | ||||||
|                         let (key_bytes, value) = iter.next().unwrap()?; |                         let (key_bytes, value) = iter.next().unwrap()?; | ||||||
|                         Ok(( |                         Ok(( | ||||||
|                             FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes) |                             FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes) | ||||||
|                                 .ok_or(Error::Encoding)? |                                 .ok_or(Error::Encoding)? | ||||||
|                                 .into_owned(), |                                 .into_owned(), | ||||||
|                             value, |                             value, | ||||||
| @@ -411,7 +412,7 @@ impl FacetsUpdateIncrementalInner { | |||||||
|             let mut values = RoaringBitmap::new(); |             let mut values = RoaringBitmap::new(); | ||||||
|             for _ in 0..group_size { |             for _ in 0..group_size { | ||||||
|                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; |                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; | ||||||
|                 let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes) |                 let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes) | ||||||
|                     .ok_or(Error::Encoding)?; |                     .ok_or(Error::Encoding)?; | ||||||
|  |  | ||||||
|                 if first_key.is_none() { |                 if first_key.is_none() { | ||||||
| @@ -434,7 +435,7 @@ impl FacetsUpdateIncrementalInner { | |||||||
|             let mut values = RoaringBitmap::new(); |             let mut values = RoaringBitmap::new(); | ||||||
|             for _ in 0..nbr_leftover_elements { |             for _ in 0..nbr_leftover_elements { | ||||||
|                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; |                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; | ||||||
|                 let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes) |                 let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes) | ||||||
|                     .ok_or(Error::Encoding)?; |                     .ok_or(Error::Encoding)?; | ||||||
|  |  | ||||||
|                 if first_key.is_none() { |                 if first_key.is_none() { | ||||||
| @@ -616,7 +617,7 @@ impl FacetsUpdateIncrementalInner { | |||||||
|         while let Some(el) = iter.next() { |         while let Some(el) = iter.next() { | ||||||
|             let (k, _) = el?; |             let (k, _) = el?; | ||||||
|             to_delete.push( |             to_delete.push( | ||||||
|                 FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k) |                 FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k) | ||||||
|                     .ok_or(Error::Encoding)? |                     .ok_or(Error::Encoding)? | ||||||
|                     .into_owned(), |                     .into_owned(), | ||||||
|             ); |             ); | ||||||
| @@ -655,7 +656,8 @@ mod tests { | |||||||
|     use rand::{Rng, SeedableRng}; |     use rand::{Rng, SeedableRng}; | ||||||
|     use roaring::RoaringBitmap; |     use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|     use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec}; |     use crate::heed_codec::facet::OrderedF64Codec; | ||||||
|  |     use crate::heed_codec::StrRefCodec; | ||||||
|     use crate::milli_snap; |     use crate::milli_snap; | ||||||
|     use crate::update::facet::tests::FacetIndex; |     use crate::update::facet::tests::FacetIndex; | ||||||
|  |  | ||||||
| @@ -1019,6 +1021,7 @@ mod tests { | |||||||
|  |  | ||||||
|     // fuzz tests |     // fuzz tests | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(all(test, fuzzing))] | #[cfg(all(test, fuzzing))] | ||||||
| mod fuzz { | mod fuzz { | ||||||
|     use std::borrow::Cow; |     use std::borrow::Cow; | ||||||
|   | |||||||
| @@ -77,7 +77,8 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5; | |||||||
| use self::incremental::FacetsUpdateIncremental; | use self::incremental::FacetsUpdateIncremental; | ||||||
| use super::FacetsUpdateBulk; | use super::FacetsUpdateBulk; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; | use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; | ||||||
|  | use crate::heed_codec::ByteSliceRefCodec; | ||||||
| use crate::{Index, Result}; | use crate::{Index, Result}; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
|  |  | ||||||
| @@ -87,7 +88,7 @@ pub mod incremental; | |||||||
|  |  | ||||||
| pub struct FacetsUpdate<'i> { | pub struct FacetsUpdate<'i> { | ||||||
|     index: &'i Index, |     index: &'i Index, | ||||||
|     database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |     database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     facet_type: FacetType, |     facet_type: FacetType, | ||||||
|     new_data: grenad::Reader<File>, |     new_data: grenad::Reader<File>, | ||||||
|     group_size: u8, |     group_size: u8, | ||||||
| @@ -97,11 +98,11 @@ pub struct FacetsUpdate<'i> { | |||||||
| impl<'i> FacetsUpdate<'i> { | impl<'i> FacetsUpdate<'i> { | ||||||
|     pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self { |     pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self { | ||||||
|         let database = match facet_type { |         let database = match facet_type { | ||||||
|             FacetType::String => { |             FacetType::String => index | ||||||
|                 index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                 .facet_id_string_docids | ||||||
|             } |                 .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
|             FacetType::Number => { |             FacetType::Number => { | ||||||
|                 index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>() |                 index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>() | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|         Self { |         Self { | ||||||
| @@ -159,8 +160,9 @@ pub(crate) mod tests { | |||||||
|  |  | ||||||
|     use super::bulk::FacetsUpdateBulkInner; |     use super::bulk::FacetsUpdateBulkInner; | ||||||
|     use crate::heed_codec::facet::{ |     use crate::heed_codec::facet::{ | ||||||
|         ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, |         FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||||
|     }; |     }; | ||||||
|  |     use crate::heed_codec::ByteSliceRefCodec; | ||||||
|     use crate::search::facet::get_highest_level; |     use crate::search::facet::get_highest_level; | ||||||
|     use crate::snapshot_tests::display_bitmap; |     use crate::snapshot_tests::display_bitmap; | ||||||
|     use crate::update::FacetsUpdateIncrementalInner; |     use crate::update::FacetsUpdateIncrementalInner; | ||||||
| @@ -173,7 +175,7 @@ pub(crate) mod tests { | |||||||
|             BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, |             BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, | ||||||
|     { |     { | ||||||
|         pub env: Env, |         pub env: Env, | ||||||
|         pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, |         pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|         pub group_size: Cell<u8>, |         pub group_size: Cell<u8>, | ||||||
|         pub min_level_size: Cell<u8>, |         pub min_level_size: Cell<u8>, | ||||||
|         pub max_group_size: Cell<u8>, |         pub max_group_size: Cell<u8>, | ||||||
| @@ -327,7 +329,7 @@ pub(crate) mod tests { | |||||||
|                 let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned(); |                 let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned(); | ||||||
|                 let key: FacetGroupKey<&[u8]> = |                 let key: FacetGroupKey<&[u8]> = | ||||||
|                     FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes }; |                     FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes }; | ||||||
|                 let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).unwrap(); |                 let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap(); | ||||||
|                 let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap(); |                 let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap(); | ||||||
|                 writer.insert(&key, &value).unwrap(); |                 writer.insert(&key, &value).unwrap(); | ||||||
|             } |             } | ||||||
| @@ -362,7 +364,7 @@ pub(crate) mod tests { | |||||||
|                     .unwrap(); |                     .unwrap(); | ||||||
|                 while let Some(el) = iter.next() { |                 while let Some(el) = iter.next() { | ||||||
|                     let (key, value) = el.unwrap(); |                     let (key, value) = el.unwrap(); | ||||||
|                     let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap(); |                     let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key).unwrap(); | ||||||
|  |  | ||||||
|                     let mut prefix_start_below = vec![]; |                     let mut prefix_start_below = vec![]; | ||||||
|                     prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); |                     prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); | ||||||
| @@ -379,7 +381,7 @@ pub(crate) mod tests { | |||||||
|                             ) |                             ) | ||||||
|                             .unwrap(); |                             .unwrap(); | ||||||
|                         let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); |                         let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); | ||||||
|                         FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap() |                         FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes).unwrap() | ||||||
|                     }; |                     }; | ||||||
|  |  | ||||||
|                     assert!(value.size > 0); |                     assert!(value.size > 0); | ||||||
|   | |||||||
| @@ -4,7 +4,8 @@ use std::io; | |||||||
| use heed::BytesEncode; | use heed::BytesEncode; | ||||||
|  |  | ||||||
| use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; | use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; | ||||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec}; | use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; | ||||||
|  | use crate::heed_codec::StrRefCodec; | ||||||
| use crate::update::index_documents::merge_cbo_roaring_bitmaps; | use crate::update::index_documents::merge_cbo_roaring_bitmaps; | ||||||
| use crate::{FieldId, Result}; | use crate::{FieldId, Result}; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user