mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
Refactor facet database tests
This commit is contained in:
committed by
Loïc Lecrenier
parent
9026867d17
commit
b2f01ad204
@ -114,14 +114,13 @@ where
|
||||
mod tests {
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::iterate_over_facet_distribution;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
use heed::BytesDecode;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
@ -164,17 +163,11 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
iterate_over_facet_distribution(
|
||||
&txn,
|
||||
index.db.content,
|
||||
0,
|
||||
&candidates,
|
||||
|facet, count| {
|
||||
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
|
||||
results.push_str(&format!("{facet}: {count}\n"));
|
||||
ControlFlow::Continue(())
|
||||
},
|
||||
)
|
||||
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| {
|
||||
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
|
||||
results.push_str(&format!("{facet}: {count}\n"));
|
||||
ControlFlow::Continue(())
|
||||
})
|
||||
.unwrap();
|
||||
milli_snap!(results, i);
|
||||
|
||||
@ -189,23 +182,17 @@ mod tests {
|
||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let mut nbr_facets = 0;
|
||||
iterate_over_facet_distribution(
|
||||
&txn,
|
||||
index.db.content,
|
||||
0,
|
||||
&candidates,
|
||||
|facet, count| {
|
||||
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
|
||||
if nbr_facets == 100 {
|
||||
return ControlFlow::Break(());
|
||||
} else {
|
||||
nbr_facets += 1;
|
||||
results.push_str(&format!("{facet}: {count}\n"));
|
||||
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| {
|
||||
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
|
||||
if nbr_facets == 100 {
|
||||
return ControlFlow::Break(());
|
||||
} else {
|
||||
nbr_facets += 1;
|
||||
results.push_str(&format!("{facet}: {count}\n"));
|
||||
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
},
|
||||
)
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
milli_snap!(results, i);
|
||||
|
||||
|
@ -263,8 +263,8 @@ mod tests {
|
||||
use super::find_docids_of_facet_within_bounds;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
@ -312,7 +312,7 @@ mod tests {
|
||||
let end = Bound::Included(i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -328,7 +328,7 @@ mod tests {
|
||||
let end = Bound::Excluded(i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -354,7 +354,7 @@ mod tests {
|
||||
let end = Bound::Included(255.);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -373,7 +373,7 @@ mod tests {
|
||||
let end = Bound::Excluded(255.);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -401,7 +401,7 @@ mod tests {
|
||||
let end = Bound::Included(255. - i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -420,7 +420,7 @@ mod tests {
|
||||
let end = Bound::Excluded(255. - i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
|
@ -89,8 +89,8 @@ mod tests {
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
@ -133,7 +133,7 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let iter = ascending_facet_sort(&txn, index.db.content, 0, candidates).unwrap();
|
||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
|
||||
for el in iter {
|
||||
let docids = el.unwrap();
|
||||
results.push_str(&display_bitmap(&docids));
|
||||
|
@ -122,8 +122,8 @@ mod tests {
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
@ -166,7 +166,7 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let db = index.db.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||
for el in iter {
|
||||
let docids = el.unwrap();
|
||||
|
@ -74,149 +74,3 @@ pub(crate) fn get_highest_level<'t>(
|
||||
})
|
||||
.unwrap_or(0))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use std::fmt::Display;
|
||||
use std::marker::PhantomData;
|
||||
use std::rc::Rc;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode, Env, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::FacetsUpdateIncrementalInner;
|
||||
|
||||
// A dummy index that only contains the facet database, used for testing
|
||||
pub struct FacetIndex<BoundCodec>
|
||||
where
|
||||
for<'a> BoundCodec:
|
||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||
{
|
||||
pub env: Env,
|
||||
pub db: Database,
|
||||
_phantom: PhantomData<BoundCodec>,
|
||||
}
|
||||
|
||||
// The faecet database and its settings
|
||||
pub struct Database {
|
||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
pub group_size: u8,
|
||||
pub min_level_size: u8,
|
||||
pub max_group_size: u8,
|
||||
_tempdir: Rc<tempfile::TempDir>,
|
||||
}
|
||||
|
||||
impl<BoundCodec> FacetIndex<BoundCodec>
|
||||
where
|
||||
for<'a> BoundCodec:
|
||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||
{
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn open_from_tempdir(
|
||||
tempdir: Rc<tempfile::TempDir>,
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetIndex<BoundCodec> {
|
||||
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)); // 2 <= x <= 127
|
||||
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
|
||||
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
let options = options.map_size(4096 * 4 * 10 * 100);
|
||||
unsafe {
|
||||
options.flag(heed::flags::Flags::MdbAlwaysFreePages);
|
||||
}
|
||||
let env = options.open(tempdir.path()).unwrap();
|
||||
let content = env.open_database(None).unwrap().unwrap();
|
||||
|
||||
FacetIndex {
|
||||
db: Database {
|
||||
content,
|
||||
group_size,
|
||||
max_group_size,
|
||||
min_level_size,
|
||||
_tempdir: tempdir,
|
||||
},
|
||||
env,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
pub fn new(
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetIndex<BoundCodec> {
|
||||
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)); // 2 <= x <= 127
|
||||
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
|
||||
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
let options = options.map_size(4096 * 4 * 100);
|
||||
let tempdir = tempfile::TempDir::new().unwrap();
|
||||
let env = options.open(tempdir.path()).unwrap();
|
||||
let content = env.create_database(None).unwrap();
|
||||
|
||||
FacetIndex {
|
||||
db: Database {
|
||||
content,
|
||||
group_size,
|
||||
max_group_size,
|
||||
min_level_size,
|
||||
_tempdir: Rc::new(tempdir),
|
||||
},
|
||||
env,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
pub fn insert<'a>(
|
||||
&self,
|
||||
rwtxn: &'a mut RwTxn,
|
||||
field_id: u16,
|
||||
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
||||
docids: &RoaringBitmap,
|
||||
) {
|
||||
let update = FacetsUpdateIncrementalInner::new(self.db.content);
|
||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||
update.insert(rwtxn, field_id, &key_bytes, docids).unwrap();
|
||||
}
|
||||
pub fn delete<'a>(
|
||||
&self,
|
||||
rwtxn: &'a mut RwTxn,
|
||||
field_id: u16,
|
||||
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
||||
value: u32,
|
||||
) {
|
||||
let update = FacetsUpdateIncrementalInner::new(self.db.content);
|
||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||
update.delete(rwtxn, field_id, &key_bytes, value).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
impl<BoundCodec> Display for FacetIndex<BoundCodec>
|
||||
where
|
||||
for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized + Display,
|
||||
for<'a> BoundCodec:
|
||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let txn = self.env.read_txn().unwrap();
|
||||
let mut iter = self.db.content.iter(&txn).unwrap();
|
||||
while let Some(el) = iter.next() {
|
||||
let (key, value) = el.unwrap();
|
||||
let FacetGroupKey { field_id, level, left_bound: bound } = key;
|
||||
let bound = BoundCodec::bytes_decode(bound).unwrap();
|
||||
let FacetGroupValue { size, bitmap } = value;
|
||||
writeln!(
|
||||
f,
|
||||
"{field_id:<2} {level:<2} k{bound:<8} {size:<4} {values:?}",
|
||||
values = display_bitmap(&bitmap)
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user