Refactor facet database tests

This commit is contained in:
Loïc Lecrenier
2022-09-06 11:52:57 +02:00
committed by Loïc Lecrenier
parent 9026867d17
commit b2f01ad204
28 changed files with 568 additions and 644 deletions

View File

@ -114,14 +114,13 @@ where
mod tests {
use std::ops::ControlFlow;
use heed::BytesDecode;
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use super::iterate_over_facet_distribution;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::milli_snap;
use crate::search::facet::test::FacetIndex;
use crate::update::facet::tests::FacetIndex;
use heed::BytesDecode;
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
@ -164,17 +163,11 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
iterate_over_facet_distribution(
&txn,
index.db.content,
0,
&candidates,
|facet, count| {
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
results.push_str(&format!("{facet}: {count}\n"));
ControlFlow::Continue(())
},
)
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| {
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
results.push_str(&format!("{facet}: {count}\n"));
ControlFlow::Continue(())
})
.unwrap();
milli_snap!(results, i);
@ -189,23 +182,17 @@ mod tests {
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let mut nbr_facets = 0;
iterate_over_facet_distribution(
&txn,
index.db.content,
0,
&candidates,
|facet, count| {
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
if nbr_facets == 100 {
return ControlFlow::Break(());
} else {
nbr_facets += 1;
results.push_str(&format!("{facet}: {count}\n"));
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| {
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
if nbr_facets == 100 {
return ControlFlow::Break(());
} else {
nbr_facets += 1;
results.push_str(&format!("{facet}: {count}\n"));
ControlFlow::Continue(())
}
},
)
ControlFlow::Continue(())
}
})
.unwrap();
milli_snap!(results, i);

View File

@ -263,8 +263,8 @@ mod tests {
use super::find_docids_of_facet_within_bounds;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::milli_snap;
use crate::search::facet::test::FacetIndex;
use crate::snapshot_tests::display_bitmap;
use crate::update::facet::tests::FacetIndex;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
@ -312,7 +312,7 @@ mod tests {
let end = Bound::Included(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -328,7 +328,7 @@ mod tests {
let end = Bound::Excluded(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -354,7 +354,7 @@ mod tests {
let end = Bound::Included(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -373,7 +373,7 @@ mod tests {
let end = Bound::Excluded(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -401,7 +401,7 @@ mod tests {
let end = Bound::Included(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -420,7 +420,7 @@ mod tests {
let end = Bound::Excluded(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,

View File

@ -89,8 +89,8 @@ mod tests {
use crate::heed_codec::facet::OrderedF64Codec;
use crate::milli_snap;
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::test::FacetIndex;
use crate::snapshot_tests::display_bitmap;
use crate::update::facet::tests::FacetIndex;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
@ -133,7 +133,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.db.content, 0, candidates).unwrap();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));

View File

@ -122,8 +122,8 @@ mod tests {
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, OrderedF64Codec};
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::test::FacetIndex;
use crate::snapshot_tests::display_bitmap;
use crate::update::facet::tests::FacetIndex;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
@ -166,7 +166,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.db.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();

View File

@ -74,149 +74,3 @@ pub(crate) fn get_highest_level<'t>(
})
.unwrap_or(0))
}
#[cfg(test)]
pub mod test {
use std::fmt::Display;
use std::marker::PhantomData;
use std::rc::Rc;
use heed::{BytesDecode, BytesEncode, Env, RwTxn};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::snapshot_tests::display_bitmap;
use crate::update::FacetsUpdateIncrementalInner;
// A dummy index that only contains the facet database, used for testing
pub struct FacetIndex<BoundCodec>
where
for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{
pub env: Env,
pub db: Database,
_phantom: PhantomData<BoundCodec>,
}
// The faecet database and its settings
pub struct Database {
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub group_size: u8,
pub min_level_size: u8,
pub max_group_size: u8,
_tempdir: Rc<tempfile::TempDir>,
}
impl<BoundCodec> FacetIndex<BoundCodec>
where
for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{
#[cfg(all(test, fuzzing))]
pub fn open_from_tempdir(
tempdir: Rc<tempfile::TempDir>,
group_size: u8,
max_group_size: u8,
min_level_size: u8,
) -> FacetIndex<BoundCodec> {
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)); // 2 <= x <= 127
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
let mut options = heed::EnvOpenOptions::new();
let options = options.map_size(4096 * 4 * 10 * 100);
unsafe {
options.flag(heed::flags::Flags::MdbAlwaysFreePages);
}
let env = options.open(tempdir.path()).unwrap();
let content = env.open_database(None).unwrap().unwrap();
FacetIndex {
db: Database {
content,
group_size,
max_group_size,
min_level_size,
_tempdir: tempdir,
},
env,
_phantom: PhantomData,
}
}
pub fn new(
group_size: u8,
max_group_size: u8,
min_level_size: u8,
) -> FacetIndex<BoundCodec> {
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)); // 2 <= x <= 127
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
let mut options = heed::EnvOpenOptions::new();
let options = options.map_size(4096 * 4 * 100);
let tempdir = tempfile::TempDir::new().unwrap();
let env = options.open(tempdir.path()).unwrap();
let content = env.create_database(None).unwrap();
FacetIndex {
db: Database {
content,
group_size,
max_group_size,
min_level_size,
_tempdir: Rc::new(tempdir),
},
env,
_phantom: PhantomData,
}
}
pub fn insert<'a>(
&self,
rwtxn: &'a mut RwTxn,
field_id: u16,
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
docids: &RoaringBitmap,
) {
let update = FacetsUpdateIncrementalInner::new(self.db.content);
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
update.insert(rwtxn, field_id, &key_bytes, docids).unwrap();
}
pub fn delete<'a>(
&self,
rwtxn: &'a mut RwTxn,
field_id: u16,
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
value: u32,
) {
let update = FacetsUpdateIncrementalInner::new(self.db.content);
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
update.delete(rwtxn, field_id, &key_bytes, value).unwrap();
}
}
impl<BoundCodec> Display for FacetIndex<BoundCodec>
where
for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized + Display,
for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let txn = self.env.read_txn().unwrap();
let mut iter = self.db.content.iter(&txn).unwrap();
while let Some(el) = iter.next() {
let (key, value) = el.unwrap();
let FacetGroupKey { field_id, level, left_bound: bound } = key;
let bound = BoundCodec::bytes_decode(bound).unwrap();
let FacetGroupValue { size, bitmap } = value;
writeln!(
f,
"{field_id:<2} {level:<2} k{bound:<8} {size:<4} {values:?}",
values = display_bitmap(&bitmap)
)?;
}
Ok(())
}
}
}