Move StrRefCodec and ByteSliceRefCodec to their own files

This commit is contained in:
Loïc Lecrenier
2022-10-12 09:42:55 +02:00
parent 1165ba2171
commit a034a1e628
18 changed files with 140 additions and 107 deletions

View File

@ -7,7 +7,8 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::query_tree::Operation;
@ -194,14 +195,14 @@ fn facet_ordered<'t>(
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates.clone(),
)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
)?;

View File

@ -9,9 +9,10 @@ use roaring::RoaringBitmap;
use crate::error::UserError;
use crate::facet::FacetType;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec,
FieldDocIdFacetStringCodec, OrderedF64Codec, StrRefCodec,
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
OrderedF64Codec,
};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::search::facet::facet_distribution_iter;
use crate::{FieldId, Index, Result};
@ -137,7 +138,9 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn,
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
self.index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, _| {
@ -160,7 +163,9 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn,
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
self.index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, any_docid| {

View File

@ -4,9 +4,8 @@ use heed::Result;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::DocumentId;
/// Call the given closure on the facet distribution of the candidate documents.
@ -22,7 +21,7 @@ use crate::DocumentId;
/// keep iterating over the different facet values or stop.
pub fn iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: &RoaringBitmap,
callback: CB,
@ -31,10 +30,13 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
let highest_level =
get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
return Ok(());
} else {
@ -47,7 +49,7 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
callback: CB,
}
@ -72,11 +74,13 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
// TODO: use real intersection and then take min()?
let docids_in_common = value.bitmap.intersection_len(candidates);
if docids_in_common > 0 {
// TODO: use min()
let any_docid = value.bitmap.iter().next().unwrap();
match (self.callback)(key.left_bound, docids_in_common, any_docid)? {
ControlFlow::Continue(_) => {}
ControlFlow::Continue(_) => (), // TODO use unit instead of empty scope
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
}
}

View File

@ -4,9 +4,8 @@ use heed::BytesEncode;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::Result;
/// Find all the document ids for which the given field contains a value contained within
@ -47,13 +46,16 @@ where
}
Bound::Unbounded => Bound::Unbounded,
};
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(starting_left_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let rightmost_bound =
Bound::Included(get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
if let Some(starting_left_bound) =
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
{
let rightmost_bound = Bound::Included(
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
); // will not fail because get_first_facet_value succeeded
let group_size = usize::MAX;
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
Ok(())
@ -65,7 +67,7 @@ where
/// Fetch the document ids that have a facet with a value between the two given bounds
struct FacetRangeSearch<'t, 'b, 'bitmap> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
left: Bound<&'b [u8]>,
right: Bound<&'b [u8]>,

View File

@ -3,8 +3,9 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
/// Return an iterator which iterates over the given candidate documents in
/// ascending order of their facet value for the given field id.
@ -30,12 +31,12 @@ use crate::heed_codec::facet::{
/// Note that once a document id is returned by the iterator, it is never returned again.
pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
@ -47,11 +48,13 @@ pub fn ascending_facet_sort<'t>(
struct AscendingFacetSort<'t, 'e> {
rtxn: &'t heed::RoTxn<'e>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
stack: Vec<(
RoaringBitmap,
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
std::iter::Take<
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
>,
)>,
}

View File

@ -5,22 +5,23 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
///
/// This function does the same thing, but in the opposite order.
pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(Box::new(DescendingFacetSort {
@ -36,12 +37,12 @@ pub fn descending_facet_sort<'t>(
struct DescendingFacetSort<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
stack: Vec<(
RoaringBitmap,
std::iter::Take<
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
>,
Bound<&'t [u8]>,
)>,
@ -97,7 +98,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*right_bound = Bound::Excluded(left_bound);
let iter = match self
.db
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
.rev_range(
&self.rtxn,
&(Bound::Included(starting_key_below), end_key_kelow),
@ -121,7 +122,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
mod tests {
use roaring::RoaringBitmap;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
@ -134,7 +136,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();

View File

@ -5,8 +5,8 @@ use heed::{BytesDecode, RoTxn};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::filter::Filter;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
mod facet_distribution;
mod facet_distribution_iter;
mod facet_range_search;
@ -17,7 +17,7 @@ mod filter;
/// Get the first facet value in the facet database
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -42,7 +42,7 @@ where
/// Get the last facet value in the facet database
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -67,7 +67,7 @@ where
/// Get the height of the highest level in the facet database
pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes();
@ -77,7 +77,7 @@ pub(crate) fn get_highest_level<'t>(
.next()
.map(|el| {
let (key, _) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
key.level
})
.unwrap_or(0))