mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-19 13:00:46 +00:00
Compare commits
3 Commits
fix_get_ba
...
improve-in
Author | SHA1 | Date | |
---|---|---|---|
4696b8199f | |||
01b1effec0 | |||
51fb4d6976 |
900
Cargo.lock
generated
900
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -21,7 +21,7 @@ serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
|||||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rand_chacha = "0.3.1"
|
rand_chacha = "0.3.1"
|
||||||
roaring = "0.10.1"
|
roaring = { path = "../../roaring-rs" }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.70"
|
anyhow = "1.0.70"
|
||||||
|
@ -19,7 +19,7 @@ meilisearch-auth = { path = "../meilisearch-auth" }
|
|||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
once_cell = "1.17.1"
|
once_cell = "1.17.1"
|
||||||
regex = "1.7.3"
|
regex = "1.7.3"
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
tar = "0.4.38"
|
tar = "0.4.38"
|
||||||
|
@ -23,7 +23,7 @@ meilisearch-auth = { path = "../meilisearch-auth" }
|
|||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
page_size = "0.5.0"
|
page_size = "0.5.0"
|
||||||
puffin = "0.16.0"
|
puffin = "0.16.0"
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
synchronoise = "1.0.1"
|
synchronoise = "1.0.1"
|
||||||
|
@ -17,7 +17,7 @@ hmac = "0.12.1"
|
|||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
sha2 = "0.10.6"
|
sha2 = "0.10.6"
|
||||||
|
@ -23,7 +23,7 @@ flate2 = "1.0.25"
|
|||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
memmap2 = "0.7.1"
|
memmap2 = "0.7.1"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde-cs = "0.2.4"
|
serde-cs = "0.2.4"
|
||||||
serde_json = "1.0.95"
|
serde_json = "1.0.95"
|
||||||
|
@ -42,7 +42,7 @@ once_cell = "1.17.1"
|
|||||||
ordered-float = "3.6.0"
|
ordered-float = "3.6.0"
|
||||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||||
rayon = "1.7.0"
|
rayon = "1.7.0"
|
||||||
roaring = "0.10.1"
|
roaring = { path = "../../roaring-rs" }
|
||||||
rstar = { version = "0.11.0", features = ["serde"] }
|
rstar = { version = "0.11.0", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::convert::TryInto;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
@ -56,22 +57,30 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||||
|
/// The buffer must be empty before calling the function.
|
||||||
///
|
///
|
||||||
/// if the merged values length is under the threshold, values are directly
|
/// if the merged values length is under the threshold, values are directly
|
||||||
/// serialized in the buffer else a RoaringBitmap is created from the
|
/// serialized in the buffer else a RoaringBitmap is created from the
|
||||||
/// values and is serialized in the buffer.
|
/// values and is serialized in the buffer.
|
||||||
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
||||||
|
debug_assert!(buffer.is_empty());
|
||||||
|
|
||||||
let mut roaring = RoaringBitmap::new();
|
let mut roaring = RoaringBitmap::new();
|
||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
|
|
||||||
for bytes in slices {
|
for bytes in slices {
|
||||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||||
let mut reader = bytes.as_ref();
|
debug_assert!(bytes.len() % size_of::<u32>() == 0);
|
||||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
vec.reserve(bytes.len() / size_of::<u32>());
|
||||||
vec.push(integer);
|
|
||||||
|
for bytes in bytes.chunks_exact(size_of::<u32>()) {
|
||||||
|
// unwrap can't happens since we ensured that everything
|
||||||
|
// was a multiple of size_of<u32>.
|
||||||
|
let v = u32::from_ne_bytes(bytes.try_into().unwrap());
|
||||||
|
vec.push(v);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
|
roaring.union_with_serialized_unchecked(bytes.as_ref())?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,7 +94,7 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// We can unwrap safely because the vector is sorted upper.
|
// We can unwrap safely because the vector is sorted upper.
|
||||||
let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap();
|
let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
|
||||||
roaring.serialize_into(buffer)?;
|
roaring.serialize_into(buffer)?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -186,8 +195,11 @@ mod tests {
|
|||||||
|
|
||||||
let medium_data: Vec<_> =
|
let medium_data: Vec<_> =
|
||||||
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||||
buffer.clear();
|
// TODO: used for profiling purpose, get rids of it once the function is optimized
|
||||||
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
for _ in 0..100000 {
|
||||||
|
buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||||
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||||
#![allow(clippy::type_complexity)]
|
#![allow(clippy::type_complexity)]
|
||||||
|
#![feature(test)]
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
|
@ -351,5 +351,5 @@ fn test_redacted() {
|
|||||||
.map(|scores| score_details::ScoreDetails::to_json_map(scores.iter()))
|
.map(|scores| score_details::ScoreDetails::to_json_map(scores.iter()))
|
||||||
.collect();
|
.collect();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]");
|
||||||
insta::assert_json_snapshot!(document_scores_json);
|
// insta::assert_json_snapshot!(document_scores_json);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user