Store the field id instead of the field name in the facets database

This commit is contained in:
Clément Renault
2020-11-13 14:04:24 +01:00
parent cf9ddd293d
commit 8ae9888959
5 changed files with 33 additions and 63 deletions

View File

@@ -1,26 +1,24 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use std::str;
use crate::heed_codec::StrBytesCodec;
use crate::facet::value_encoding::f64_into_bytes; use crate::facet::value_encoding::f64_into_bytes;
pub struct FacetValueF64Codec; pub struct FacetValueF64Codec;
impl<'a> heed::BytesDecode<'a> for FacetValueF64Codec { impl<'a> heed::BytesDecode<'a> for FacetValueF64Codec {
type DItem = (&'a str, f64); type DItem = (u8, f64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (name, buffer) = StrBytesCodec::bytes_decode(bytes)?; let (field_id, buffer) = bytes.split_first()?;
let value = buffer[8..].try_into().ok().map(f64::from_be_bytes)?; let value = buffer[8..].try_into().ok().map(f64::from_be_bytes)?;
Some((name, value)) Some((*field_id, value))
} }
} }
impl<'a> heed::BytesEncode<'a> for FacetValueF64Codec { impl heed::BytesEncode<'_> for FacetValueF64Codec {
type EItem = (&'a str, f64); type EItem = (u8, f64);
fn bytes_encode((name, value): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut buffer = [0u8; 16]; let mut buffer = [0u8; 16];
// Write the globally ordered float. // Write the globally ordered float.
@@ -31,8 +29,10 @@ impl<'a> heed::BytesEncode<'a> for FacetValueF64Codec {
let bytes = value.to_be_bytes(); let bytes = value.to_be_bytes();
buffer[8..].copy_from_slice(&bytes[..]); buffer[8..].copy_from_slice(&bytes[..]);
let tuple = (*name, &buffer[..]); let mut bytes = Vec::with_capacity(buffer.len() + 1);
StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned) bytes.push(*field_id);
bytes.extend_from_slice(&buffer[..]);
Some(Cow::Owned(bytes))
} }
} }
@@ -43,8 +43,8 @@ mod tests {
#[test] #[test]
fn globally_ordered_f64() { fn globally_ordered_f64() {
let bytes = FacetValueF64Codec::bytes_encode(&("hello", -32.0)).unwrap(); let bytes = FacetValueF64Codec::bytes_encode(&(3, -32.0)).unwrap();
let (name, value) = FacetValueF64Codec::bytes_decode(&bytes).unwrap(); let (name, value) = FacetValueF64Codec::bytes_decode(&bytes).unwrap();
assert_eq!((name, value), ("hello", -32.0)); assert_eq!((name, value), (3, -32.0));
} }
} }

View File

@@ -1,28 +1,28 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use std::str;
use crate::heed_codec::StrBytesCodec;
use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes}; use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes};
pub struct FacetValueI64Codec; pub struct FacetValueI64Codec;
impl<'a> heed::BytesDecode<'a> for FacetValueI64Codec { impl<'a> heed::BytesDecode<'a> for FacetValueI64Codec {
type DItem = (&'a str, i64); type DItem = (u8, i64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (name, bytes) = StrBytesCodec::bytes_decode(bytes)?; let (field_id, buffer) = bytes.split_first()?;
let value = bytes.try_into().map(i64_from_bytes).ok()?; let value = buffer.try_into().map(i64_from_bytes).ok()?;
Some((name, value)) Some((*field_id, value))
} }
} }
impl<'a> heed::BytesEncode<'a> for FacetValueI64Codec { impl heed::BytesEncode<'_> for FacetValueI64Codec {
type EItem = (&'a str, i64); type EItem = (u8, i64);
fn bytes_encode((name, value): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let value = i64_into_bytes(*value); let value = i64_into_bytes(*value);
let tuple = (*name, &value[..]); let mut bytes = Vec::with_capacity(value.len() + 1);
StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned) bytes.push(*field_id);
bytes.extend_from_slice(&value[..]);
Some(Cow::Owned(bytes))
} }
} }

View File

@@ -1,25 +1,25 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::str; use std::str;
use crate::heed_codec::StrBytesCodec;
pub struct FacetValueStringCodec; pub struct FacetValueStringCodec;
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec { impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
type DItem = (&'a str, &'a str); type DItem = (u8, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (name, bytes) = StrBytesCodec::bytes_decode(bytes)?; let (field_id, bytes) = bytes.split_first()?;
let value = str::from_utf8(bytes).ok()?; let value = str::from_utf8(bytes).ok()?;
Some((name, value)) Some((*field_id, value))
} }
} }
impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec { impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
type EItem = (&'a str, &'a str); type EItem = (u8, &'a str);
fn bytes_encode((name, value): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let tuple = (*name, value.as_bytes()); let mut bytes = Vec::with_capacity(value.len() + 1);
StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned) bytes.push(*field_id);
bytes.extend_from_slice(value.as_bytes());
Some(Cow::Owned(bytes))
} }
} }

View File

@@ -1,16 +1,14 @@
mod beu32_str_codec; mod beu32_str_codec;
mod bo_roaring_bitmap_codec; mod bo_roaring_bitmap_codec;
mod cbo_roaring_bitmap_codec; mod cbo_roaring_bitmap_codec;
mod facet;
mod obkv_codec; mod obkv_codec;
mod roaring_bitmap_codec; mod roaring_bitmap_codec;
mod str_bytes_codec;
mod str_str_u8_codec; mod str_str_u8_codec;
pub mod facet;
pub use self::beu32_str_codec::BEU32StrCodec; pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec; pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec; pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
pub use self::obkv_codec::ObkvCodec; pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap_codec::RoaringBitmapCodec; pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
pub use self::str_bytes_codec::StrBytesCodec;
pub use self::str_str_u8_codec::StrStrU8Codec; pub use self::str_str_u8_codec::StrStrU8Codec;

View File

@@ -1,28 +0,0 @@
use std::borrow::Cow;
use std::str;
pub struct StrBytesCodec;
impl<'a> heed::BytesDecode<'a> for StrBytesCodec {
type DItem = (&'a str, &'a [u8]);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let s1_end = bytes.iter().position(|b| *b == 0)?;
let (s1_bytes, s2_bytes) = bytes.split_at(s1_end);
let s1 = str::from_utf8(s1_bytes).ok()?;
let s2 = &s2_bytes[1..];
Some((s1, s2))
}
}
impl<'a> heed::BytesEncode<'a> for StrBytesCodec {
type EItem = (&'a str, &'a [u8]);
fn bytes_encode((s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.extend_from_slice(s1.as_bytes());
bytes.push(0);
bytes.extend_from_slice(s2);
Some(Cow::Owned(bytes))
}
}