Move StrRefCodec and ByteSliceRefCodec to their own files

This commit is contained in:
Loïc Lecrenier
2022-10-12 09:42:55 +02:00
parent 1165ba2171
commit a034a1e628
18 changed files with 140 additions and 107 deletions

View File

@ -11,8 +11,9 @@ use time::OffsetDateTime;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::facet::FacetType;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
@ -75,11 +76,11 @@ impl<'i> FacetsUpdateBulk<'i> {
let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
let db = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
}
};
@ -98,7 +99,7 @@ impl<'i> FacetsUpdateBulk<'i> {
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub new_data: Option<grenad::Reader<R>>,
pub group_size: u8,
pub min_level_size: u8,
@ -216,7 +217,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[];
let mut first_iteration_for_new_group = true;
@ -299,7 +300,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value =
@ -328,7 +329,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;

View File

@ -1,7 +1,8 @@
use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::{
facet::FacetType,
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
heed_codec::ByteSliceRefCodec,
update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner},
FieldId, Index, Result,
};
@ -11,7 +12,7 @@ use std::collections::{HashMap, HashSet};
pub struct FacetsDelete<'i, 'b> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
docids_to_delete: &'b RoaringBitmap,
@ -27,11 +28,11 @@ impl<'i, 'b> FacetsDelete<'i, 'b> {
docids_to_delete: &'b RoaringBitmap,
) -> Self {
let database = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
}
};
Self {

View File

@ -7,8 +7,9 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::facet::get_highest_level;
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
@ -50,10 +51,10 @@ impl<'i> FacetsUpdateIncremental<'i> {
db: match facet_type {
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
},
group_size,
max_group_size,
@ -69,7 +70,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
let mut cursor = self.new_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?;
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
@ -87,7 +88,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
pub struct FacetsUpdateIncrementalInner {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub group_size: u8,
pub min_level_size: u8,
pub max_group_size: u8,
@ -126,7 +127,7 @@ impl FacetsUpdateIncrementalInner {
if let Some(e) = prefix_iter.next() {
let (key_bytes, value) = e?;
Ok((
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
@ -149,7 +150,7 @@ impl FacetsUpdateIncrementalInner {
)?;
let (key_bytes, value) = iter.next().unwrap()?;
Ok((
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
@ -411,7 +412,7 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new();
for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?;
if first_key.is_none() {
@ -434,7 +435,7 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new();
for _ in 0..nbr_leftover_elements {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?;
if first_key.is_none() {
@ -616,7 +617,7 @@ impl FacetsUpdateIncrementalInner {
while let Some(el) = iter.next() {
let (k, _) = el?;
to_delete.push(
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k)
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
.ok_or(Error::Encoding)?
.into_owned(),
);
@ -655,7 +656,8 @@ mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::StrRefCodec;
use crate::milli_snap;
use crate::update::facet::tests::FacetIndex;
@ -1019,6 +1021,7 @@ mod tests {
// fuzz tests
}
#[cfg(all(test, fuzzing))]
mod fuzz {
use std::borrow::Cow;

View File

@ -77,7 +77,8 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::{Index, Result};
use std::fs::File;
@ -87,7 +88,7 @@ pub mod incremental;
pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
new_data: grenad::Reader<File>,
group_size: u8,
@ -97,11 +98,11 @@ pub struct FacetsUpdate<'i> {
impl<'i> FacetsUpdate<'i> {
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
let database = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
}
};
Self {
@ -159,8 +160,9 @@ pub(crate) mod tests {
use super::bulk::FacetsUpdateBulkInner;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::facet::get_highest_level;
use crate::snapshot_tests::display_bitmap;
use crate::update::FacetsUpdateIncrementalInner;
@ -173,7 +175,7 @@ pub(crate) mod tests {
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{
pub env: Env,
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub group_size: Cell<u8>,
pub min_level_size: Cell<u8>,
pub max_group_size: Cell<u8>,
@ -327,7 +329,7 @@ pub(crate) mod tests {
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
let key: FacetGroupKey<&[u8]> =
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap();
writer.insert(&key, &value).unwrap();
}
@ -362,7 +364,7 @@ pub(crate) mod tests {
.unwrap();
while let Some(el) = iter.next() {
let (key, value) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key).unwrap();
let mut prefix_start_below = vec![];
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
@ -379,7 +381,7 @@ pub(crate) mod tests {
)
.unwrap();
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes).unwrap()
};
assert!(value.size > 0);

View File

@ -4,7 +4,8 @@ use std::io;
use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::StrRefCodec;
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
use crate::{FieldId, Result};