mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-11 23:25:41 +00:00
Compare commits
2 Commits
delta-enco
...
v1.29.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2dd7f29edf | ||
|
|
ff680d29a8 |
601
Cargo.lock
generated
601
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.28.2"
|
version = "1.29.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -300,7 +300,6 @@ impl Infos {
|
|||||||
max_indexing_memory,
|
max_indexing_memory,
|
||||||
max_indexing_threads,
|
max_indexing_threads,
|
||||||
skip_index_budget: _,
|
skip_index_budget: _,
|
||||||
experimental_disable_delta_encoding: _,
|
|
||||||
experimental_no_edition_2024_for_settings,
|
experimental_no_edition_2024_for_settings,
|
||||||
experimental_no_edition_2024_for_dumps,
|
experimental_no_edition_2024_for_dumps,
|
||||||
experimental_no_edition_2024_for_prefix_post_processing,
|
experimental_no_edition_2024_for_prefix_post_processing,
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ use meilisearch::{
|
|||||||
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
||||||
};
|
};
|
||||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||||
use meilisearch_types::milli::heed_codec::DELTA_ENCODING_STATUS;
|
|
||||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||||
use tracing::level_filters::LevelFilter;
|
use tracing::level_filters::LevelFilter;
|
||||||
use tracing_subscriber::layer::SubscriberExt as _;
|
use tracing_subscriber::layer::SubscriberExt as _;
|
||||||
@@ -96,14 +95,6 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||||
let (opt, config_read_from) = Opt::try_build()?;
|
let (opt, config_read_from) = Opt::try_build()?;
|
||||||
|
|
||||||
// Disables the delta encoding of bitmaps as soon as possible
|
|
||||||
if opt.indexer_options.experimental_disable_delta_encoding {
|
|
||||||
DELTA_ENCODING_STATUS.set_to_disabled()
|
|
||||||
} else {
|
|
||||||
DELTA_ENCODING_STATUS.set_to_enabled()
|
|
||||||
}
|
|
||||||
.expect("the delta-encoding status to be set only once");
|
|
||||||
|
|
||||||
std::panic::set_hook(Box::new(on_panic));
|
std::panic::set_hook(Box::new(on_panic));
|
||||||
|
|
||||||
anyhow::ensure!(
|
anyhow::ensure!(
|
||||||
|
|||||||
@@ -60,7 +60,6 @@ const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
|
|||||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
||||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
||||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
||||||
const MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING: &str = "MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING";
|
|
||||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||||
@@ -846,14 +845,6 @@ pub struct IndexerOpts {
|
|||||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||||
|
|
||||||
/// Experimental disable delta-encoding for bitmaps. For more information,
|
|
||||||
/// see: <https://github.com/orgs/meilisearch/discussions/875>
|
|
||||||
///
|
|
||||||
/// Enables the experimental disable delta-encoding for bitmaps feature.
|
|
||||||
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING)]
|
|
||||||
#[serde(default)]
|
|
||||||
pub experimental_disable_delta_encoding: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexerOpts {
|
impl IndexerOpts {
|
||||||
@@ -867,7 +858,6 @@ impl IndexerOpts {
|
|||||||
experimental_no_edition_2024_for_dumps,
|
experimental_no_edition_2024_for_dumps,
|
||||||
experimental_no_edition_2024_for_prefix_post_processing,
|
experimental_no_edition_2024_for_prefix_post_processing,
|
||||||
experimental_no_edition_2024_for_facet_post_processing,
|
experimental_no_edition_2024_for_facet_post_processing,
|
||||||
experimental_disable_delta_encoding,
|
|
||||||
} = self;
|
} = self;
|
||||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||||
export_to_env_if_not_present(
|
export_to_env_if_not_present(
|
||||||
@@ -905,12 +895,6 @@ impl IndexerOpts {
|
|||||||
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if experimental_disable_delta_encoding {
|
|
||||||
export_to_env_if_not_present(
|
|
||||||
MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING,
|
|
||||||
experimental_disable_delta_encoding.to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -926,7 +910,6 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
experimental_no_edition_2024_for_dumps,
|
experimental_no_edition_2024_for_dumps,
|
||||||
experimental_no_edition_2024_for_prefix_post_processing,
|
experimental_no_edition_2024_for_prefix_post_processing,
|
||||||
experimental_no_edition_2024_for_facet_post_processing,
|
experimental_no_edition_2024_for_facet_post_processing,
|
||||||
experimental_disable_delta_encoding: _, // managed in try_main
|
|
||||||
} = other;
|
} = other;
|
||||||
|
|
||||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||||
@@ -1262,7 +1245,7 @@ where
|
|||||||
T: AsRef<OsStr>,
|
T: AsRef<OsStr>,
|
||||||
{
|
{
|
||||||
if let Err(VarError::NotPresent) = std::env::var(key) {
|
if let Err(VarError::NotPresent) = std::env::var(key) {
|
||||||
unsafe { std::env::set_var(key, value) }
|
std::env::set_var(key, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -43,9 +43,9 @@ impl Server<Owned> {
|
|||||||
let dir = TempDir::new().unwrap();
|
let dir = TempDir::new().unwrap();
|
||||||
|
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||||
} else {
|
} else {
|
||||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||||
}
|
}
|
||||||
|
|
||||||
let options = default_settings(dir.path());
|
let options = default_settings(dir.path());
|
||||||
@@ -58,9 +58,9 @@ impl Server<Owned> {
|
|||||||
|
|
||||||
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
|
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||||
} else {
|
} else {
|
||||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||||
}
|
}
|
||||||
|
|
||||||
options.master_key = Some("MASTER_KEY".to_string());
|
options.master_key = Some("MASTER_KEY".to_string());
|
||||||
@@ -215,9 +215,9 @@ impl Server<Shared> {
|
|||||||
let dir = TempDir::new().unwrap();
|
let dir = TempDir::new().unwrap();
|
||||||
|
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||||
} else {
|
} else {
|
||||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||||
}
|
}
|
||||||
|
|
||||||
let options = default_settings(dir.path());
|
let options = default_settings(dir.path());
|
||||||
@@ -508,8 +508,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
experimental_no_edition_2024_for_dumps: false,
|
experimental_no_edition_2024_for_dumps: false,
|
||||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||||
// It has no effect to set the delta encoding here as the toggle is done in try_main
|
|
||||||
experimental_disable_delta_encoding: false,
|
|
||||||
},
|
},
|
||||||
experimental_enable_metrics: false,
|
experimental_enable_metrics: false,
|
||||||
..Parser::parse_from(None as Option<&str>)
|
..Parser::parse_from(None as Option<&str>)
|
||||||
|
|||||||
@@ -120,16 +120,14 @@ twox-hash = { version = "2.1.2", default-features = false, features = [
|
|||||||
] }
|
] }
|
||||||
geo-types = "0.7.17"
|
geo-types = "0.7.17"
|
||||||
zerometry = "0.3.0"
|
zerometry = "0.3.0"
|
||||||
bitpacking = "0.9.2"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
mimalloc = { version = "0.1.48", default-features = false }
|
||||||
# fixed version due to format breakages in v1.40
|
# fixed version due to format breakages in v1.40
|
||||||
insta = "=1.39.0"
|
insta = "=1.39.0"
|
||||||
mimalloc = { version = "0.1.48", default-features = false }
|
|
||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
md5 = "0.8.0"
|
md5 = "0.8.0"
|
||||||
meili-snap = { path = "../meili-snap" }
|
meili-snap = { path = "../meili-snap" }
|
||||||
quickcheck = "1.0.3"
|
|
||||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
|||||||
@@ -22,10 +22,7 @@ pub use self::beu32_str_codec::BEU32StrCodec;
|
|||||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||||
pub use self::fst_set_codec::FstSetCodec;
|
pub use self::fst_set_codec::FstSetCodec;
|
||||||
pub use self::obkv_codec::ObkvCodec;
|
pub use self::obkv_codec::ObkvCodec;
|
||||||
pub use self::roaring_bitmap::{
|
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||||
BoRoaringBitmapCodec, CboRoaringBitmapCodec, DeCboRoaringBitmapCodec, RoaringBitmapCodec,
|
|
||||||
DELTA_ENCODING_STATUS,
|
|
||||||
};
|
|
||||||
pub use self::roaring_bitmap_length::{
|
pub use self::roaring_bitmap_length::{
|
||||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -19,19 +19,8 @@ pub const THRESHOLD: usize = 7;
|
|||||||
pub struct CboRoaringBitmapCodec;
|
pub struct CboRoaringBitmapCodec;
|
||||||
|
|
||||||
impl CboRoaringBitmapCodec {
|
impl CboRoaringBitmapCodec {
|
||||||
/// If the number of items (u32s) to encode is less than or equal to the threshold
|
|
||||||
/// it means that it would weigh the same or less than the RoaringBitmap
|
|
||||||
/// header, so we directly encode them using ByteOrder instead.
|
|
||||||
pub fn bitmap_serialize_as_raw_u32s(roaring: &RoaringBitmap) -> bool {
|
|
||||||
roaring.len() <= THRESHOLD as u64
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn bytes_deserialize_as_raw_u32s(bytes: &[u8]) -> bool {
|
|
||||||
bytes.len() <= THRESHOLD * size_of::<u32>()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
||||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
if roaring.len() <= THRESHOLD as u64 {
|
||||||
roaring.len() as usize * size_of::<u32>()
|
roaring.len() as usize * size_of::<u32>()
|
||||||
} else {
|
} else {
|
||||||
roaring.serialized_size()
|
roaring.serialized_size()
|
||||||
@@ -46,7 +35,10 @@ impl CboRoaringBitmapCodec {
|
|||||||
roaring: &RoaringBitmap,
|
roaring: &RoaringBitmap,
|
||||||
mut writer: W,
|
mut writer: W,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
if roaring.len() <= THRESHOLD as u64 {
|
||||||
|
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||||
|
// it means that it would weigh the same or less than the RoaringBitmap
|
||||||
|
// header, so we directly encode them using ByteOrder instead.
|
||||||
for integer in roaring {
|
for integer in roaring {
|
||||||
writer.write_u32::<NativeEndian>(integer)?;
|
writer.write_u32::<NativeEndian>(integer)?;
|
||||||
}
|
}
|
||||||
@@ -59,7 +51,7 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||||
// If there is threshold or less than threshold integers that can fit into this array
|
// If there is threshold or less than threshold integers that can fit into this array
|
||||||
// of bytes it means that we used the ByteOrder codec serializer.
|
// of bytes it means that we used the ByteOrder codec serializer.
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
@@ -79,7 +71,7 @@ impl CboRoaringBitmapCodec {
|
|||||||
other: &RoaringBitmap,
|
other: &RoaringBitmap,
|
||||||
) -> io::Result<RoaringBitmap> {
|
) -> io::Result<RoaringBitmap> {
|
||||||
// See above `deserialize_from` method for implementation details.
|
// See above `deserialize_from` method for implementation details.
|
||||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||||
if other.contains(integer) {
|
if other.contains(integer) {
|
||||||
@@ -106,7 +98,7 @@ impl CboRoaringBitmapCodec {
|
|||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
|
|
||||||
for bytes in slices {
|
for bytes in slices {
|
||||||
if Self::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
|
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
|
||||||
let mut reader = bytes.as_ref();
|
let mut reader = bytes.as_ref();
|
||||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||||
vec.push(integer);
|
vec.push(integer);
|
||||||
@@ -120,8 +112,6 @@ impl CboRoaringBitmapCodec {
|
|||||||
vec.sort_unstable();
|
vec.sort_unstable();
|
||||||
vec.dedup();
|
vec.dedup();
|
||||||
|
|
||||||
// Be careful when modifying this condition,
|
|
||||||
// the rule must be the same everywhere
|
|
||||||
if vec.len() <= THRESHOLD {
|
if vec.len() <= THRESHOLD {
|
||||||
for integer in vec {
|
for integer in vec {
|
||||||
buffer.extend_from_slice(&integer.to_ne_bytes());
|
buffer.extend_from_slice(&integer.to_ne_bytes());
|
||||||
|
|||||||
@@ -1,153 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::io::{self, ErrorKind};
|
|
||||||
use std::sync::OnceLock;
|
|
||||||
|
|
||||||
use heed::BoxedError;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
|
||||||
use super::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
|
|
||||||
use crate::heed_codec::BytesDecodeOwned;
|
|
||||||
|
|
||||||
/// Defines the status of the delta encoding on whether we have enabled it or not.
|
|
||||||
pub static DELTA_ENCODING_STATUS: DeltaEncodingStatusLock = DeltaEncodingStatusLock::new();
|
|
||||||
|
|
||||||
pub struct DeCboRoaringBitmapCodec;
|
|
||||||
|
|
||||||
impl DeCboRoaringBitmapCodec {
|
|
||||||
pub fn serialized_size_with_tmp_buffer(
|
|
||||||
bitmap: &RoaringBitmap,
|
|
||||||
tmp_buffer: &mut Vec<u32>,
|
|
||||||
) -> usize {
|
|
||||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
|
||||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
|
||||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
|
||||||
&& DELTA_ENCODING_STATUS.is_disabled()
|
|
||||||
{
|
|
||||||
CboRoaringBitmapCodec::serialized_size(bitmap)
|
|
||||||
} else {
|
|
||||||
DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, tmp_buffer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Writes the delta-encoded compressed version of
|
|
||||||
/// the given roaring bitmap into the provided writer.
|
|
||||||
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: W) -> io::Result<()> {
|
|
||||||
let mut tmp_buffer = Vec::new();
|
|
||||||
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
|
|
||||||
///
|
|
||||||
/// Note that we always serialize the bitmap with the delta-encoded compressed version.
|
|
||||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
|
||||||
bitmap: &RoaringBitmap,
|
|
||||||
writer: W,
|
|
||||||
tmp_buffer: &mut Vec<u32>,
|
|
||||||
) -> io::Result<()> {
|
|
||||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
|
||||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
|
||||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
|
||||||
&& DELTA_ENCODING_STATUS.is_disabled()
|
|
||||||
{
|
|
||||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, writer)
|
|
||||||
} else {
|
|
||||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(bitmap, writer, tmp_buffer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
|
|
||||||
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
|
|
||||||
let mut tmp_buffer = Vec::new();
|
|
||||||
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
|
||||||
///
|
|
||||||
/// It tries to decode the input by using the delta-decoded version and
|
|
||||||
/// if it fails, falls back to the CboRoaringBitmap version.
|
|
||||||
pub fn deserialize_from_with_tmp_buffer(
|
|
||||||
input: &[u8],
|
|
||||||
tmp_buffer: &mut Vec<u32>,
|
|
||||||
) -> io::Result<RoaringBitmap> {
|
|
||||||
// The input is too short to be a valid delta-decoded bitmap.
|
|
||||||
// We fall back to the CboRoaringBitmap version with raw u32s.
|
|
||||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(input) {
|
|
||||||
return CboRoaringBitmapCodec::deserialize_from(input);
|
|
||||||
}
|
|
||||||
|
|
||||||
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(input, tmp_buffer) {
|
|
||||||
Ok(bitmap) => Ok(bitmap),
|
|
||||||
// If the error kind is Other it means that the delta-decoder found
|
|
||||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
|
||||||
Err(e) if e.kind() == ErrorKind::Other => {
|
|
||||||
CboRoaringBitmapCodec::deserialize_from(input)
|
|
||||||
}
|
|
||||||
Err(e) => Err(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for DeCboRoaringBitmapCodec {
|
|
||||||
type DItem = RoaringBitmap;
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
|
||||||
Self::deserialize_from(bytes).map_err(Into::into)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BytesDecodeOwned for DeCboRoaringBitmapCodec {
|
|
||||||
type DItem = RoaringBitmap;
|
|
||||||
|
|
||||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
|
||||||
Self::deserialize_from(bytes).map_err(Into::into)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for DeCboRoaringBitmapCodec {
|
|
||||||
type EItem = RoaringBitmap;
|
|
||||||
|
|
||||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
|
||||||
let mut tmp_buffer = Vec::new();
|
|
||||||
let capacity = Self::serialized_size_with_tmp_buffer(&item, &mut tmp_buffer);
|
|
||||||
let mut output = Vec::with_capacity(capacity);
|
|
||||||
Self::serialize_into_with_tmp_buffer(item, &mut output, &mut tmp_buffer)?;
|
|
||||||
Ok(Cow::Owned(output))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Manages the global status of the delta encoding.
|
|
||||||
///
|
|
||||||
/// Whether we must use delta encoding or not when encoding roaring bitmaps.
|
|
||||||
pub struct DeltaEncodingStatusLock(OnceLock<DeltaEncodingStatus>);
|
|
||||||
|
|
||||||
impl DeltaEncodingStatusLock {
|
|
||||||
pub const fn new() -> Self {
|
|
||||||
Self(OnceLock::new())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
enum DeltaEncodingStatus {
|
|
||||||
Enabled,
|
|
||||||
#[default]
|
|
||||||
Disabled,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DeltaEncodingStatusLock {
|
|
||||||
pub fn set_to_enabled(&self) -> Result<(), ()> {
|
|
||||||
self.0.set(DeltaEncodingStatus::Enabled).map_err(drop)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_to_disabled(&self) -> Result<(), ()> {
|
|
||||||
self.0.set(DeltaEncodingStatus::Disabled).map_err(drop)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_enabled(&self) -> bool {
|
|
||||||
matches!(self.0.get(), Some(DeltaEncodingStatus::Enabled))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_disabled(&self) -> bool {
|
|
||||||
!self.is_enabled()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,377 +0,0 @@
|
|||||||
use std::io::{self, ErrorKind};
|
|
||||||
use std::mem::{self, size_of, size_of_val};
|
|
||||||
|
|
||||||
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
/// The magic header for our custom encoding format
|
|
||||||
const MAGIC_HEADER: u16 = 36869;
|
|
||||||
|
|
||||||
pub struct DeRoaringBitmapCodec;
|
|
||||||
|
|
||||||
// TODO reintroduce:
|
|
||||||
// - serialized_size?
|
|
||||||
// - serialize_into_vec
|
|
||||||
// - intersection_with_serialized
|
|
||||||
// - merge_into
|
|
||||||
// - merge_deladd_into
|
|
||||||
impl DeRoaringBitmapCodec {
|
|
||||||
/// Returns the serialized size of the given roaring bitmap with the delta encoding format.
|
|
||||||
pub fn serialized_size_with_tmp_buffer(
|
|
||||||
bitmap: &RoaringBitmap,
|
|
||||||
tmp_buffer: &mut Vec<u32>,
|
|
||||||
) -> usize {
|
|
||||||
let mut size = 2; // u16 magic header
|
|
||||||
|
|
||||||
let bitpacker8x = BitPacker8x::new();
|
|
||||||
let bitpacker4x = BitPacker4x::new();
|
|
||||||
let bitpacker1x = BitPacker1x::new();
|
|
||||||
|
|
||||||
// This temporary buffer is used to store each chunk of decompressed u32s.
|
|
||||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
|
||||||
let decompressed = &mut tmp_buffer[..];
|
|
||||||
|
|
||||||
let mut buffer_index = 0;
|
|
||||||
let mut initial = None;
|
|
||||||
// We initially collect all the integers into a flat buffer of the size
|
|
||||||
// of the largest bitpacker. We encode them with it until we don't have
|
|
||||||
// enough of them...
|
|
||||||
for n in bitmap {
|
|
||||||
decompressed[buffer_index] = n;
|
|
||||||
buffer_index += 1;
|
|
||||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
|
||||||
let num_bits = bitpacker8x.num_bits_strictly_sorted(initial, decompressed);
|
|
||||||
let compressed_len = BitPacker8x::compressed_block_size(num_bits);
|
|
||||||
size += 1; // u8 chunk header
|
|
||||||
size += compressed_len; // compressed data length
|
|
||||||
initial = Some(n);
|
|
||||||
buffer_index = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
|
||||||
let decompressed = &decompressed[..buffer_index];
|
|
||||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
|
||||||
for decompressed in chunks.by_ref() {
|
|
||||||
let num_bits = bitpacker4x.num_bits_strictly_sorted(initial, decompressed);
|
|
||||||
let compressed_len = BitPacker4x::compressed_block_size(num_bits);
|
|
||||||
size += 1; // u8 chunk header
|
|
||||||
size += compressed_len; // compressed data length
|
|
||||||
initial = decompressed.iter().last().copied();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ...And so on...
|
|
||||||
let decompressed = chunks.remainder();
|
|
||||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
|
||||||
for decompressed in chunks.by_ref() {
|
|
||||||
let num_bits = bitpacker1x.num_bits_strictly_sorted(initial, decompressed);
|
|
||||||
let compressed_len = BitPacker1x::compressed_block_size(num_bits);
|
|
||||||
size += 1; // u8 chunk header
|
|
||||||
size += compressed_len; // compressed data length
|
|
||||||
initial = decompressed.iter().last().copied();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
|
||||||
// at the end of out buffer with a header indicating the matter.
|
|
||||||
let decompressed = chunks.remainder();
|
|
||||||
if !decompressed.is_empty() {
|
|
||||||
size += 1; // u8 chunk header
|
|
||||||
size += mem::size_of_val(decompressed); // remaining uncompressed u32s
|
|
||||||
}
|
|
||||||
|
|
||||||
size
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Writes the delta-encoded compressed version of
|
|
||||||
/// the given roaring bitmap into the provided writer.
|
|
||||||
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: W) -> io::Result<()> {
|
|
||||||
let mut tmp_buffer = Vec::new();
|
|
||||||
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
|
|
||||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
|
||||||
bitmap: &RoaringBitmap,
|
|
||||||
mut writer: W,
|
|
||||||
tmp_buffer: &mut Vec<u32>,
|
|
||||||
) -> io::Result<()> {
|
|
||||||
// Insert the magic header
|
|
||||||
writer.write_all(&MAGIC_HEADER.to_ne_bytes())?;
|
|
||||||
|
|
||||||
let bitpacker8x = BitPacker8x::new();
|
|
||||||
let bitpacker4x = BitPacker4x::new();
|
|
||||||
let bitpacker1x = BitPacker1x::new();
|
|
||||||
|
|
||||||
// This temporary buffer is used to store each chunk of decompressed and
|
|
||||||
// compressed and delta-encoded u32s. We need room for the decompressed
|
|
||||||
// u32s coming from the roaring bitmap, the compressed output that can
|
|
||||||
// be as large as the decompressed u32s, and the chunk header.
|
|
||||||
tmp_buffer.resize((BitPacker8x::BLOCK_LEN * 2) + 1, 0u32);
|
|
||||||
let (decompressed, compressed) = tmp_buffer.split_at_mut(BitPacker8x::BLOCK_LEN);
|
|
||||||
let compressed = bytemuck::cast_slice_mut(compressed);
|
|
||||||
|
|
||||||
let mut buffer_index = 0;
|
|
||||||
let mut initial = None;
|
|
||||||
// We initially collect all the integers into a flat buffer of the size
|
|
||||||
// of the largest bitpacker. We encode them with it until we don't have
|
|
||||||
// enough of them...
|
|
||||||
for n in bitmap {
|
|
||||||
decompressed[buffer_index] = n;
|
|
||||||
buffer_index += 1;
|
|
||||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
|
||||||
let output = encode_with_packer(&bitpacker8x, decompressed, initial, compressed);
|
|
||||||
writer.write_all(output)?;
|
|
||||||
initial = Some(n);
|
|
||||||
buffer_index = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
|
||||||
let decompressed = &decompressed[..buffer_index];
|
|
||||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
|
||||||
for decompressed in chunks.by_ref() {
|
|
||||||
let output = encode_with_packer(&bitpacker4x, decompressed, initial, compressed);
|
|
||||||
writer.write_all(output)?;
|
|
||||||
initial = decompressed.iter().last().copied();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ...And so on...
|
|
||||||
let decompressed = chunks.remainder();
|
|
||||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
|
||||||
for decompressed in chunks.by_ref() {
|
|
||||||
let output = encode_with_packer(&bitpacker1x, decompressed, initial, compressed);
|
|
||||||
writer.write_all(output)?;
|
|
||||||
initial = decompressed.iter().last().copied();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
|
||||||
// at the end of out buffer with a header indicating the matter.
|
|
||||||
let decompressed = chunks.remainder();
|
|
||||||
if !decompressed.is_empty() {
|
|
||||||
let header = encode_chunk_header(BitPackerLevel::None, u32::BITS as u8);
|
|
||||||
// Note: Not convinced about the performance of writing a single
|
|
||||||
// byte followed by a larger write. However, we will use this
|
|
||||||
// codec with a BufWriter or directly with a Vec of bytes.
|
|
||||||
writer.write_all(&[header])?;
|
|
||||||
writer.write_all(bytemuck::cast_slice(decompressed))?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
|
|
||||||
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
|
|
||||||
let mut tmp_buffer = Vec::new();
|
|
||||||
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
|
||||||
pub fn deserialize_from_with_tmp_buffer(
|
|
||||||
input: &[u8],
|
|
||||||
tmp_buffer: &mut Vec<u32>,
|
|
||||||
) -> io::Result<RoaringBitmap> {
|
|
||||||
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
|
|
||||||
else {
|
|
||||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
|
|
||||||
};
|
|
||||||
|
|
||||||
// Safety: This unwrap cannot happen as the header buffer is the right size
|
|
||||||
let header = u16::from_ne_bytes(header.try_into().unwrap());
|
|
||||||
|
|
||||||
if header != MAGIC_HEADER {
|
|
||||||
return Err(io::Error::other("invalid header value"));
|
|
||||||
}
|
|
||||||
|
|
||||||
let bitpacker8x = BitPacker8x::new();
|
|
||||||
let bitpacker4x = BitPacker4x::new();
|
|
||||||
let bitpacker1x = BitPacker1x::new();
|
|
||||||
|
|
||||||
let mut bitmap = RoaringBitmap::new();
|
|
||||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
|
||||||
let decompressed = &mut tmp_buffer[..];
|
|
||||||
let mut initial = None;
|
|
||||||
|
|
||||||
while let Some((&chunk_header, encoded)) = compressed.split_first() {
|
|
||||||
let (level, num_bits) = decode_chunk_header(chunk_header);
|
|
||||||
let (bytes_read, decompressed) = match level {
|
|
||||||
BitPackerLevel::None => {
|
|
||||||
if num_bits != u32::BITS as u8 {
|
|
||||||
return Err(io::Error::new(
|
|
||||||
ErrorKind::InvalidData,
|
|
||||||
"invalid number of bits to encode non-compressed u32s",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let chunks = encoded.chunks_exact(size_of::<u32>());
|
|
||||||
if !chunks.remainder().is_empty() {
|
|
||||||
return Err(io::Error::new(
|
|
||||||
io::ErrorKind::InvalidData,
|
|
||||||
"expecting last chunk to be a multiple of the size of an u32",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let integers = chunks
|
|
||||||
// safety: This unwrap cannot happen as
|
|
||||||
// the size of u32 is set correctly.
|
|
||||||
.map(|b| b.try_into().unwrap())
|
|
||||||
.map(u32::from_ne_bytes);
|
|
||||||
|
|
||||||
bitmap
|
|
||||||
.append(integers)
|
|
||||||
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
|
|
||||||
|
|
||||||
// This is basically always the last chunk that exists in
|
|
||||||
// this delta-encoded format as the raw u32s are appended
|
|
||||||
// when there is not enough of them to fit in a bitpacker.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
BitPackerLevel::BitPacker1x => {
|
|
||||||
decode_with_packer(&bitpacker1x, decompressed, initial, encoded, num_bits)
|
|
||||||
}
|
|
||||||
BitPackerLevel::BitPacker4x => {
|
|
||||||
decode_with_packer(&bitpacker4x, decompressed, initial, encoded, num_bits)
|
|
||||||
}
|
|
||||||
BitPackerLevel::BitPacker8x => {
|
|
||||||
decode_with_packer(&bitpacker8x, decompressed, initial, encoded, num_bits)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
initial = decompressed.iter().last().copied();
|
|
||||||
// TODO investigate perf
|
|
||||||
// Safety: Bitpackers cannot output unsorter integers when
|
|
||||||
// used with the compress_strictly_sorted function.
|
|
||||||
bitmap.append(decompressed.iter().copied()).unwrap();
|
|
||||||
// What the delta-decoding read plus the chunk header size
|
|
||||||
compressed = &compressed[bytes_read + 1..];
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(bitmap)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Takes a strickly sorted list of u32s and outputs delta-encoded
|
|
||||||
/// bytes with a chunk header. We expect the output buffer to be
|
|
||||||
/// at least BLOCK_LEN + 1.
|
|
||||||
fn encode_with_packer<'c, B: BitPackerExt>(
|
|
||||||
bitpacker: &B,
|
|
||||||
decompressed: &[u32],
|
|
||||||
initial: Option<u32>,
|
|
||||||
output: &'c mut [u8],
|
|
||||||
) -> &'c [u8] {
|
|
||||||
let num_bits = bitpacker.num_bits_strictly_sorted(initial, decompressed);
|
|
||||||
let compressed_len = B::compressed_block_size(num_bits);
|
|
||||||
let chunk_header = encode_chunk_header(B::level(), num_bits);
|
|
||||||
let buffer = &mut output[..compressed_len + 1];
|
|
||||||
// Safety: The buffer is at least one byte
|
|
||||||
let (header_in_buffer, encoded) = buffer.split_first_mut().unwrap();
|
|
||||||
*header_in_buffer = chunk_header;
|
|
||||||
bitpacker.compress_strictly_sorted(initial, decompressed, encoded, num_bits);
|
|
||||||
buffer
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the number of bytes read and the decoded unsigned integers.
|
|
||||||
fn decode_with_packer<'d, B: BitPacker>(
|
|
||||||
bitpacker: &B,
|
|
||||||
decompressed: &'d mut [u32],
|
|
||||||
initial: Option<u32>,
|
|
||||||
compressed: &[u8],
|
|
||||||
num_bits: u8,
|
|
||||||
) -> (usize, &'d [u32]) {
|
|
||||||
let decompressed = &mut decompressed[..B::BLOCK_LEN];
|
|
||||||
let read = bitpacker.decompress_strictly_sorted(initial, compressed, decompressed, num_bits);
|
|
||||||
(read, decompressed)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An identifier for the bitpacker to be able
|
|
||||||
/// to correctly decode the compressed integers.
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
#[repr(u8)]
|
|
||||||
enum BitPackerLevel {
|
|
||||||
/// The remaining bytes are raw little endian encoded u32s.
|
|
||||||
None,
|
|
||||||
/// The remaining bits are encoded using a `BitPacker1x`.
|
|
||||||
BitPacker1x,
|
|
||||||
/// The remaining bits are encoded using a `BitPacker4x`.
|
|
||||||
BitPacker4x,
|
|
||||||
/// The remaining bits are encoded using a `BitPacker8x`.
|
|
||||||
BitPacker8x,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the chunk header based on the bitpacker level
|
|
||||||
/// and the number of bits to encode the list of integers.
|
|
||||||
fn encode_chunk_header(level: BitPackerLevel, num_bits: u8) -> u8 {
|
|
||||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
|
||||||
let level = level as u8;
|
|
||||||
debug_assert!(level <= 3);
|
|
||||||
num_bits | (level << 6)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Decodes the chunk header and output the bitpacker level
|
|
||||||
/// and the number of bits to decode the following bytes.
|
|
||||||
fn decode_chunk_header(data: u8) -> (BitPackerLevel, u8) {
|
|
||||||
let num_bits = data & 0b00111111;
|
|
||||||
let level = match data >> 6 {
|
|
||||||
0 => BitPackerLevel::None,
|
|
||||||
1 => BitPackerLevel::BitPacker1x,
|
|
||||||
2 => BitPackerLevel::BitPacker4x,
|
|
||||||
3 => BitPackerLevel::BitPacker8x,
|
|
||||||
invalid => panic!("Invalid bitpacker level: {invalid}"),
|
|
||||||
};
|
|
||||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
|
||||||
(level, num_bits)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A simple helper trait to get the BitPackerLevel
|
|
||||||
/// and correctly generate the chunk header.
|
|
||||||
trait BitPackerExt: BitPacker {
|
|
||||||
/// Returns the level of the bitpacker: an identifier to be
|
|
||||||
/// able to decode the numbers with the right bitpacker.
|
|
||||||
fn level() -> BitPackerLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BitPackerExt for BitPacker8x {
|
|
||||||
fn level() -> BitPackerLevel {
|
|
||||||
BitPackerLevel::BitPacker8x
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BitPackerExt for BitPacker4x {
|
|
||||||
fn level() -> BitPackerLevel {
|
|
||||||
BitPackerLevel::BitPacker4x
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BitPackerExt for BitPacker1x {
|
|
||||||
fn level() -> BitPackerLevel {
|
|
||||||
BitPackerLevel::BitPacker1x
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use quickcheck::quickcheck;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::DeRoaringBitmapCodec;
|
|
||||||
|
|
||||||
quickcheck! {
|
|
||||||
fn qc_random(xs: Vec<u32>) -> bool {
|
|
||||||
let bitmap = RoaringBitmap::from_iter(xs);
|
|
||||||
let mut compressed = Vec::new();
|
|
||||||
DeRoaringBitmapCodec::serialize_into(&bitmap, &mut compressed).unwrap();
|
|
||||||
let decompressed = DeRoaringBitmapCodec::deserialize_from(&compressed[..]).unwrap();
|
|
||||||
decompressed == bitmap
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
quickcheck! {
|
|
||||||
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
|
|
||||||
let bitmap = RoaringBitmap::from_iter(xs);
|
|
||||||
let mut compressed = Vec::new();
|
|
||||||
let mut tmp_buffer = Vec::new();
|
|
||||||
DeRoaringBitmapCodec::serialize_into(&bitmap, &mut compressed).unwrap();
|
|
||||||
let expected_len = DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
|
|
||||||
compressed.len() == expected_len
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,10 +1,7 @@
|
|||||||
mod bo_roaring_bitmap_codec;
|
mod bo_roaring_bitmap_codec;
|
||||||
pub mod cbo_roaring_bitmap_codec;
|
pub mod cbo_roaring_bitmap_codec;
|
||||||
pub mod de_cbo_roaring_bitmap_codec;
|
|
||||||
mod de_roaring_bitmap_codec;
|
|
||||||
mod roaring_bitmap_codec;
|
mod roaring_bitmap_codec;
|
||||||
|
|
||||||
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||||
// pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||||
pub use self::de_cbo_roaring_bitmap_codec::{DeCboRoaringBitmapCodec, DELTA_ENCODING_STATUS};
|
|
||||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||||
|
|||||||
Reference in New Issue
Block a user