From 67b08a180fb64b94e5da2ccd4cf96fa3a4257c64 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 17 Jun 2025 12:13:06 +0200 Subject: [PATCH] Patch the CBO roaring encoders to use the main branch --- Cargo.lock | 3 +-- Cargo.toml | 3 +++ .../heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs | 4 +++- .../heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs | 8 +++++--- .../src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs | 2 ++ 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f50638bb..4da22e544 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5168,8 +5168,7 @@ checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" [[package]] name = "roaring" version = "0.10.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +source = "git+https://github.com/RoaringBitmap/roaring-rs.git#6535a822358fce546eb021da8b02d52e4906fe7a" dependencies = [ "bytemuck", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 835ef497c..b3973d6eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,3 +49,6 @@ opt-level = 3 opt-level = 3 [profile.dev.package.roaring] opt-level = 3 + +[patch.crates-io] +roaring = { git = "https://github.com/RoaringBitmap/roaring-rs.git" } diff --git a/crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs index a89b35fa6..edfb8b97e 100644 --- a/crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs +++ b/crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs @@ -43,8 +43,10 @@ impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { type EItem = RoaringBitmap; fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + let mut item = item.clone(); + item.optimize(); let mut out = Vec::new(); - BoRoaringBitmapCodec::serialize_into(item, &mut out); + BoRoaringBitmapCodec::serialize_into(&item, &mut out); Ok(Cow::Owned(out)) } } diff --git a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 0ab162880..c9addd598 100644 --- a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -12,7 +12,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd}; /// This is the limit where using a byteorder became less size efficient /// than using a direct roaring encoding, it is also the point where we are able /// to determine the encoding used only by using the array of bytes length. -pub const THRESHOLD: usize = 7; +pub const THRESHOLD: usize = 3; /// A conditionnal codec that either use the RoaringBitmap /// or a lighter ByteOrder en/decoding method. @@ -177,8 +177,10 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { type EItem = RoaringBitmap; fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { - let mut vec = Vec::with_capacity(Self::serialized_size(item)); - Self::serialize_into_vec(item, &mut vec); + let mut item = item.clone(); + item.optimize(); + let mut vec = Vec::with_capacity(Self::serialized_size(&item)); + Self::serialize_into_vec(&item, &mut vec); Ok(Cow::Owned(vec)) } } diff --git a/crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs index 764e10cd6..2c054b645 100644 --- a/crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs +++ b/crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs @@ -27,6 +27,8 @@ impl heed::BytesEncode<'_> for RoaringBitmapCodec { type EItem = RoaringBitmap; fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + let mut item = item.clone(); + item.optimize(); let mut bytes = Vec::with_capacity(item.serialized_size()); item.serialize_into(&mut bytes)?; Ok(Cow::Owned(bytes))