Introduce the FacetStringIter iterator

This commit is contained in:
Kerollmops
2021-06-23 16:36:51 +02:00
parent a79661c6dc
commit adfd4da24c
2 changed files with 140 additions and 11 deletions

View File

@@ -147,7 +147,7 @@ impl<'t> FacetNumberIter<'t> {
field_id: FieldId, field_id: FieldId,
documents_ids: RoaringBitmap, documents_ids: RoaringBitmap,
) -> heed::Result<FacetNumberIter<'t>> { ) -> heed::Result<FacetNumberIter<'t>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>(); let db = index.facet_id_f64_docids;
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = let highest_iter =
FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;

View File

@@ -127,9 +127,10 @@
use std::num::NonZeroU8; use std::num::NonZeroU8;
use std::ops::Bound; use std::ops::Bound;
use std::ops::Bound::{Excluded, Included}; use std::ops::Bound::{Excluded, Included, Unbounded};
use heed::types::{ByteSlice, Str}; use either::{Either, Left, Right};
use heed::types::{ByteSlice, DecodeIgnore, Str};
use heed::{Database, LazyDecode, RoRange}; use heed::{Database, LazyDecode, RoRange};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -137,7 +138,7 @@ use crate::heed_codec::facet::{
FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringZeroBoundsValueCodec, FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringZeroBoundsValueCodec,
}; };
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::FieldId; use crate::{FieldId, Index};
/// An iterator that is used to explore the facets level strings /// An iterator that is used to explore the facets level strings
/// from the level 1 to infinity. /// from the level 1 to infinity.
@@ -155,17 +156,18 @@ pub struct FacetStringGroupRange<'t> {
} }
impl<'t> FacetStringGroupRange<'t> { impl<'t> FacetStringGroupRange<'t> {
pub fn new( pub fn new<X, Y>(
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
db: Database< db: Database<X, Y>,
FacetLevelValueU32Codec,
FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>,
>,
field_id: FieldId, field_id: FieldId,
level: NonZeroU8, level: NonZeroU8,
left: Bound<u32>, left: Bound<u32>,
right: Bound<u32>, right: Bound<u32>,
) -> heed::Result<FacetStringGroupRange<'t>> { ) -> heed::Result<FacetStringGroupRange<'t>> {
let db = db.remap_types::<
FacetLevelValueU32Codec,
FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>,
>();
let left_bound = match left { let left_bound = match left {
Included(left) => Included((field_id, level, left, u32::MIN)), Included(left) => Included((field_id, level, left, u32::MIN)),
Excluded(left) => Excluded((field_id, level, left, u32::MIN)), Excluded(left) => Excluded((field_id, level, left, u32::MIN)),
@@ -211,13 +213,14 @@ pub struct FacetStringLevelZeroRange<'t> {
} }
impl<'t> FacetStringLevelZeroRange<'t> { impl<'t> FacetStringLevelZeroRange<'t> {
pub fn new( pub fn new<X, Y>(
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
db: Database<FacetStringLevelZeroCodec, CboRoaringBitmapCodec>, db: Database<X, Y>,
field_id: FieldId, field_id: FieldId,
left: Bound<&str>, left: Bound<&str>,
right: Bound<&str>, right: Bound<&str>,
) -> heed::Result<FacetStringLevelZeroRange<'t>> { ) -> heed::Result<FacetStringLevelZeroRange<'t>> {
let db = db.remap_types::<FacetStringLevelZeroCodec, CboRoaringBitmapCodec>();
let left_bound = match left { let left_bound = match left {
Included(left) => Included((field_id, left)), Included(left) => Included((field_id, left)),
Excluded(left) => Excluded((field_id, left)), Excluded(left) => Excluded((field_id, left)),
@@ -245,3 +248,129 @@ impl<'t> Iterator for FacetStringLevelZeroRange<'t> {
} }
} }
} }
/// An iterator that is used to explore the facet strings level by level,
/// it will only return facets strings that are associated with the
/// candidates documents ids given.
pub struct FacetStringIter<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: Database<ByteSlice, ByteSlice>,
field_id: FieldId,
level_iters:
Vec<(RoaringBitmap, Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>)>,
}
impl<'t> FacetStringIter<'t> {
pub fn new_non_reducing(
rtxn: &'t heed::RoTxn,
index: &'t Index,
field_id: FieldId,
documents_ids: RoaringBitmap,
) -> heed::Result<FacetStringIter<'t>> {
// TODO make sure that we change the database before using it, or merging the PR.
let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = match NonZeroU8::new(highest_level) {
Some(highest_level) => Left(FacetStringGroupRange::new(
rtxn,
index.facet_id_string_docids,
field_id,
highest_level,
Unbounded,
Unbounded,
)?),
None => Right(FacetStringLevelZeroRange::new(
rtxn,
index.facet_id_string_docids,
field_id,
Unbounded,
Unbounded,
)?),
};
Ok(FacetStringIter { rtxn, db, field_id, level_iters: vec![(documents_ids, highest_iter)] })
}
fn highest_level<X, Y>(
rtxn: &'t heed::RoTxn,
db: Database<X, Y>,
fid: FieldId,
) -> heed::Result<Option<u8>> {
Ok(db
.remap_types::<ByteSlice, DecodeIgnore>()
.prefix_iter(rtxn, &[fid][..])? // the field id is the first bit
.last()
.transpose()?
.map(|(key_bytes, _)| key_bytes[1])) // the level is the second bit
}
}
impl<'t> Iterator for FacetStringIter<'t> {
type Item = heed::Result<(&'t str, RoaringBitmap)>;
fn next(&mut self) -> Option<Self::Item> {
'outer: loop {
let (documents_ids, last) = self.level_iters.last_mut()?;
match last {
Left(last) => {
for result in last {
match result {
Ok(((level, left, right), (string_bounds, mut docids))) => {
docids &= &*documents_ids;
if !docids.is_empty() {
*documents_ids -= &docids;
let result = match string_bounds {
Some((left, right)) => FacetStringLevelZeroRange::new(
self.rtxn,
self.db,
self.field_id,
Included(left),
Included(right),
)
.map(Right),
None => FacetStringGroupRange::new(
self.rtxn,
self.db,
self.field_id,
NonZeroU8::new(level.get() - 1).unwrap(),
Included(left),
Included(right),
)
.map(Left),
};
match result {
Ok(iter) => {
self.level_iters.push((docids, iter));
continue 'outer;
}
Err(e) => return Some(Err(e)),
}
}
}
Err(e) => return Some(Err(e)),
}
}
}
Right(last) => {
// level zero only
for result in last {
match result {
Ok((value, mut docids)) => {
docids &= &*documents_ids;
if !docids.is_empty() {
*documents_ids -= &docids;
return Some(Ok((value, docids)));
}
}
Err(e) => return Some(Err(e)),
}
}
}
}
self.level_iters.pop();
}
}
}