Move crates under a sub folder to clean up the code

This commit is contained in:
Clément Renault
2024-10-21 08:18:43 +02:00
parent 30f3c30389
commit 9c1e54a2c8
1062 changed files with 19 additions and 20 deletions

View File

@ -0,0 +1,47 @@
use std::borrow::Cow;
use std::marker::PhantomData;
use heed::{BoxedError, BytesDecode, BytesEncode};
use crate::heed_codec::SliceTooShortError;
use crate::{try_split_array_at, DocumentId, FieldId};
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>
where
C: BytesDecode<'a>,
{
type DItem = (FieldId, DocumentId, C::DItem);
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let document_id = u32::from_be_bytes(document_id_bytes);
let value = C::bytes_decode(bytes)?;
Ok((field_id, document_id, value))
}
}
impl<'a, C> BytesEncode<'a> for FieldDocIdFacetCodec<C>
where
C: BytesEncode<'a>,
{
type EItem = (FieldId, DocumentId, C::EItem);
fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
let value_bytes = C::bytes_encode(value)?;
// variable length, if f64 -> 16 bytes, if string -> large, potentially
bytes.extend_from_slice(&value_bytes);
Ok(Cow::Owned(bytes))
}
}

View File

@ -0,0 +1,123 @@
mod field_doc_id_facet_codec;
mod ordered_f64_codec;
use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
use heed::types::DecodeIgnore;
use heed::{BoxedError, BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
pub use self::ordered_f64_codec::OrderedF64Codec;
use super::StrRefCodec;
use crate::{CboRoaringBitmapCodec, BEU16};
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
pub type FieldIdCodec = BEU16;
/// Tries to split a slice in half at the given middle point,
/// `None` if the slice is too short.
pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
if slice.len() >= mid {
Some(slice.split_at(mid))
} else {
None
}
}
/// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
/// databases.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] // TODO: try removing PartialOrd and Ord
pub struct FacetGroupKey<T> {
pub field_id: u16,
pub level: u8,
pub left_bound: T,
}
/// The value in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
/// databases.
#[derive(Debug)]
pub struct FacetGroupValue {
pub size: u8,
pub bitmap: RoaringBitmap,
}
#[derive(Debug)]
pub struct FacetGroupLazyValue<'b> {
pub size: u8,
pub bitmap_bytes: &'b [u8],
}
pub struct FacetGroupKeyCodec<T> {
_phantom: PhantomData<T>,
}
impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T>
where
T: BytesEncode<'a>,
T::EItem: Sized,
{
type EItem = FacetGroupKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]);
let bound = T::bytes_encode(&value.left_bound)?;
v.extend_from_slice(&bound);
Ok(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
where
T: BytesDecode<'a>,
{
type DItem = FacetGroupKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?;
Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
}
}
pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![value.size];
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Ok(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Ok(FacetGroupValue { size, bitmap })
}
}
pub struct FacetGroupLazyValueCodec;
impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
type DItem = FacetGroupLazyValue<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
}
}

View File

@ -0,0 +1,45 @@
use std::borrow::Cow;
use std::convert::TryInto;
use heed::{BoxedError, BytesDecode};
use thiserror::Error;
use crate::facet::value_encoding::f64_into_bytes;
use crate::heed_codec::SliceTooShortError;
pub struct OrderedF64Codec;
impl<'a> BytesDecode<'a> for OrderedF64Codec {
type DItem = f64;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() < 16 {
Err(SliceTooShortError.into())
} else {
bytes[8..].try_into().map(f64::from_be_bytes).map_err(Into::into)
}
}
}
impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
let mut buffer = [0u8; 16];
// write the globally ordered float
let bytes = f64_into_bytes(*f).ok_or(InvalidGloballyOrderedFloatError { float: *f })?;
buffer[..8].copy_from_slice(&bytes[..]);
// Then the f64 value just to be able to read it back
let bytes = f.to_be_bytes();
buffer[8..16].copy_from_slice(&bytes[..]);
Ok(Cow::Owned(buffer.to_vec()))
}
}
#[derive(Error, Debug)]
#[error("the float {float} cannot be converted to a globally ordered representation")]
pub struct InvalidGloballyOrderedFloatError {
float: f64,
}