mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Add range search and incremental indexing algorithm
This commit is contained in:
		
				
					committed by
					
						
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							63ef0aba18
						
					
				
				
					commit
					b8a1caad5e
				
			@@ -54,7 +54,7 @@ big_s = "1.0.2"
 | 
			
		||||
insta = "1.21.0"
 | 
			
		||||
maplit = "1.0.2"
 | 
			
		||||
md5 = "0.7.0"
 | 
			
		||||
rand = "0.8.5"
 | 
			
		||||
rand = {version = "0.8.5", features = ["small_rng"] }
 | 
			
		||||
 | 
			
		||||
[features]
 | 
			
		||||
default = [ "charabia/default" ]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,8 @@
 | 
			
		||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
 | 
			
		||||
use crate::Result;
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
use std::ops::ControlFlow;
 | 
			
		||||
 | 
			
		||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
 | 
			
		||||
 | 
			
		||||
use super::{get_first_facet_value, get_highest_level};
 | 
			
		||||
 | 
			
		||||
pub fn iterate_over_facet_distribution<'t, CB>(
 | 
			
		||||
@@ -11,18 +11,19 @@ pub fn iterate_over_facet_distribution<'t, CB>(
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
    candidates: &RoaringBitmap,
 | 
			
		||||
    callback: CB,
 | 
			
		||||
) where
 | 
			
		||||
) -> Result<()>
 | 
			
		||||
where
 | 
			
		||||
    CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
 | 
			
		||||
{
 | 
			
		||||
    let mut fd = FacetDistribution { rtxn, db, field_id, callback };
 | 
			
		||||
    let highest_level =
 | 
			
		||||
        get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id);
 | 
			
		||||
        get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
 | 
			
		||||
 | 
			
		||||
    if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id) {
 | 
			
		||||
    if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
 | 
			
		||||
        fd.iterate(candidates, highest_level, first_bound, usize::MAX);
 | 
			
		||||
        return;
 | 
			
		||||
        return Ok(());
 | 
			
		||||
    } else {
 | 
			
		||||
        return;
 | 
			
		||||
        return Ok(());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -45,26 +46,26 @@ where
 | 
			
		||||
        candidates: &RoaringBitmap,
 | 
			
		||||
        starting_bound: &'t [u8],
 | 
			
		||||
        group_size: usize,
 | 
			
		||||
    ) -> ControlFlow<()> {
 | 
			
		||||
    ) -> Result<ControlFlow<()>> {
 | 
			
		||||
        let starting_key =
 | 
			
		||||
            FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
 | 
			
		||||
        let iter = self.db.range(self.rtxn, &(starting_key..)).unwrap().take(group_size);
 | 
			
		||||
        let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
 | 
			
		||||
        for el in iter {
 | 
			
		||||
            let (key, value) = el.unwrap();
 | 
			
		||||
            let (key, value) = el?;
 | 
			
		||||
            // The range is unbounded on the right and the group size for the highest level is MAX,
 | 
			
		||||
            // so we need to check that we are not iterating over the next field id
 | 
			
		||||
            if key.field_id != self.field_id {
 | 
			
		||||
                return ControlFlow::Break(());
 | 
			
		||||
                return Ok(ControlFlow::Break(()));
 | 
			
		||||
            }
 | 
			
		||||
            let docids_in_common = value.bitmap.intersection_len(candidates);
 | 
			
		||||
            if docids_in_common > 0 {
 | 
			
		||||
                match (self.callback)(key.left_bound, docids_in_common) {
 | 
			
		||||
                    ControlFlow::Continue(_) => {}
 | 
			
		||||
                    ControlFlow::Break(_) => return ControlFlow::Break(()),
 | 
			
		||||
                    ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return ControlFlow::Continue(());
 | 
			
		||||
        return Ok(ControlFlow::Continue(()));
 | 
			
		||||
    }
 | 
			
		||||
    fn iterate(
 | 
			
		||||
        &mut self,
 | 
			
		||||
@@ -72,7 +73,7 @@ where
 | 
			
		||||
        level: u8,
 | 
			
		||||
        starting_bound: &'t [u8],
 | 
			
		||||
        group_size: usize,
 | 
			
		||||
    ) -> ControlFlow<()> {
 | 
			
		||||
    ) -> Result<ControlFlow<()>> {
 | 
			
		||||
        if level == 0 {
 | 
			
		||||
            return self.iterate_level_0(candidates, starting_bound, group_size);
 | 
			
		||||
        }
 | 
			
		||||
@@ -84,34 +85,42 @@ where
 | 
			
		||||
            // The range is unbounded on the right and the group size for the highest level is MAX,
 | 
			
		||||
            // so we need to check that we are not iterating over the next field id
 | 
			
		||||
            if key.field_id != self.field_id {
 | 
			
		||||
                return ControlFlow::Break(());
 | 
			
		||||
                return Ok(ControlFlow::Break(()));
 | 
			
		||||
            }
 | 
			
		||||
            let docids_in_common = value.bitmap & candidates;
 | 
			
		||||
            if docids_in_common.len() > 0 {
 | 
			
		||||
                let cf =
 | 
			
		||||
                    self.iterate(&docids_in_common, level - 1, key.left_bound, value.size as usize);
 | 
			
		||||
                let cf = self.iterate(
 | 
			
		||||
                    &docids_in_common,
 | 
			
		||||
                    level - 1,
 | 
			
		||||
                    key.left_bound,
 | 
			
		||||
                    value.size as usize,
 | 
			
		||||
                )?;
 | 
			
		||||
                match cf {
 | 
			
		||||
                    ControlFlow::Continue(_) => {}
 | 
			
		||||
                    ControlFlow::Break(_) => return ControlFlow::Break(()),
 | 
			
		||||
                    ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return ControlFlow::Continue(());
 | 
			
		||||
        return Ok(ControlFlow::Continue(()));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use crate::{codec::U16Codec, Index};
 | 
			
		||||
    use heed::BytesDecode;
 | 
			
		||||
    use rand::{rngs::SmallRng, Rng, SeedableRng};
 | 
			
		||||
    use roaring::RoaringBitmap;
 | 
			
		||||
    use std::ops::ControlFlow;
 | 
			
		||||
 | 
			
		||||
    use crate::{
 | 
			
		||||
        heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec, search::facet::test::FacetIndex,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    use super::iterate_over_facet_distribution;
 | 
			
		||||
 | 
			
		||||
    fn get_simple_index() -> Index<U16Codec> {
 | 
			
		||||
        let index = Index::<U16Codec>::new(4, 8);
 | 
			
		||||
    fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
        for i in 0..256u16 {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
@@ -121,18 +130,19 @@ mod tests {
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
    }
 | 
			
		||||
    fn get_random_looking_index() -> Index<U16Codec> {
 | 
			
		||||
        let index = Index::<U16Codec>::new(4, 8);
 | 
			
		||||
    fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let rng = fastrand::Rng::with_seed(0);
 | 
			
		||||
        let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::<Vec<u32>>();
 | 
			
		||||
        let rng = rand::rngs::SmallRng::from_seed([0; 32]);
 | 
			
		||||
        let keys =
 | 
			
		||||
            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
 | 
			
		||||
 | 
			
		||||
        for (_i, key) in keys.into_iter().enumerate() {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
            bitmap.insert(key);
 | 
			
		||||
            bitmap.insert(key + 100);
 | 
			
		||||
            index.insert(&mut txn, 0, &(key as u16), &bitmap);
 | 
			
		||||
            bitmap.insert(key + 100.);
 | 
			
		||||
            index.insert(&mut txn, 0, &(key as f64), &bitmap);
 | 
			
		||||
        }
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
@@ -156,7 +166,7 @@ mod tests {
 | 
			
		||||
                0,
 | 
			
		||||
                &candidates,
 | 
			
		||||
                |facet, count| {
 | 
			
		||||
                    let facet = U16Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                    let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                    results.push_str(&format!("{facet}: {count}\n"));
 | 
			
		||||
                    ControlFlow::Continue(())
 | 
			
		||||
                },
 | 
			
		||||
@@ -180,7 +190,7 @@ mod tests {
 | 
			
		||||
                0,
 | 
			
		||||
                &candidates,
 | 
			
		||||
                |facet, count| {
 | 
			
		||||
                    let facet = U16Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                    let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                    if nbr_facets == 100 {
 | 
			
		||||
                        return ControlFlow::Break(());
 | 
			
		||||
                    } else {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										451
									
								
								milli/src/search/facet/facet_range_search.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										451
									
								
								milli/src/search/facet/facet_range_search.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,451 @@
 | 
			
		||||
use heed::BytesEncode;
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
use std::ops::Bound;
 | 
			
		||||
use std::ops::RangeBounds;
 | 
			
		||||
 | 
			
		||||
use crate::heed_codec::facet::new::FacetGroupValueCodec;
 | 
			
		||||
use crate::heed_codec::facet::new::FacetKey;
 | 
			
		||||
use crate::heed_codec::facet::new::FacetKeyCodec;
 | 
			
		||||
use crate::heed_codec::facet::new::MyByteSlice;
 | 
			
		||||
use crate::Result;
 | 
			
		||||
 | 
			
		||||
use super::get_first_facet_value;
 | 
			
		||||
use super::get_highest_level;
 | 
			
		||||
use super::get_last_facet_value;
 | 
			
		||||
 | 
			
		||||
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
 | 
			
		||||
    rtxn: &'t heed::RoTxn<'t>,
 | 
			
		||||
    db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
    left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
 | 
			
		||||
    right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
 | 
			
		||||
) -> Result<RoaringBitmap>
 | 
			
		||||
where
 | 
			
		||||
    BoundCodec: for<'a> BytesEncode<'a>,
 | 
			
		||||
    for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized,
 | 
			
		||||
{
 | 
			
		||||
    let inner;
 | 
			
		||||
    let left = match left {
 | 
			
		||||
        Bound::Included(left) => {
 | 
			
		||||
            inner = BoundCodec::bytes_encode(left).unwrap();
 | 
			
		||||
            Bound::Included(inner.as_ref())
 | 
			
		||||
        }
 | 
			
		||||
        Bound::Excluded(left) => {
 | 
			
		||||
            inner = BoundCodec::bytes_encode(left).unwrap();
 | 
			
		||||
            Bound::Excluded(inner.as_ref())
 | 
			
		||||
        }
 | 
			
		||||
        Bound::Unbounded => Bound::Unbounded,
 | 
			
		||||
    };
 | 
			
		||||
    let inner;
 | 
			
		||||
    let right = match right {
 | 
			
		||||
        Bound::Included(right) => {
 | 
			
		||||
            inner = BoundCodec::bytes_encode(right).unwrap();
 | 
			
		||||
            Bound::Included(inner.as_ref())
 | 
			
		||||
        }
 | 
			
		||||
        Bound::Excluded(right) => {
 | 
			
		||||
            inner = BoundCodec::bytes_encode(right).unwrap();
 | 
			
		||||
            Bound::Excluded(inner.as_ref())
 | 
			
		||||
        }
 | 
			
		||||
        Bound::Unbounded => Bound::Unbounded,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    let mut docids = RoaringBitmap::new();
 | 
			
		||||
    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
 | 
			
		||||
    let highest_level = get_highest_level(rtxn, db, field_id)?;
 | 
			
		||||
 | 
			
		||||
    if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
 | 
			
		||||
        let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
 | 
			
		||||
        f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
 | 
			
		||||
        Ok(docids)
 | 
			
		||||
    } else {
 | 
			
		||||
        return Ok(RoaringBitmap::new());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Fetch the document ids that have a facet with a value between the two given bounds
 | 
			
		||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
 | 
			
		||||
    rtxn: &'t heed::RoTxn<'t>,
 | 
			
		||||
    db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
    left: Bound<&'b [u8]>,
 | 
			
		||||
    right: Bound<&'b [u8]>,
 | 
			
		||||
    docids: &'bitmap mut RoaringBitmap,
 | 
			
		||||
}
 | 
			
		||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
 | 
			
		||||
    fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
 | 
			
		||||
        let left_key =
 | 
			
		||||
            FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
 | 
			
		||||
        let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
 | 
			
		||||
        for el in iter {
 | 
			
		||||
            let (key, value) = el?;
 | 
			
		||||
            // the right side of the iter range is unbounded, so we need to make sure that we are not iterating
 | 
			
		||||
            // on the next field id
 | 
			
		||||
            if key.field_id != self.field_id {
 | 
			
		||||
                return Ok(());
 | 
			
		||||
            }
 | 
			
		||||
            let should_skip = {
 | 
			
		||||
                match self.left {
 | 
			
		||||
                    Bound::Included(left) => left > key.left_bound,
 | 
			
		||||
                    Bound::Excluded(left) => left >= key.left_bound,
 | 
			
		||||
                    Bound::Unbounded => false,
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
            if should_skip {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            let should_stop = {
 | 
			
		||||
                match self.right {
 | 
			
		||||
                    Bound::Included(right) => right < key.left_bound,
 | 
			
		||||
                    Bound::Excluded(right) => right <= key.left_bound,
 | 
			
		||||
                    Bound::Unbounded => false,
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
            if should_stop {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
 | 
			
		||||
                *self.docids |= value.bitmap;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Recursive part of the algorithm for level > 0
 | 
			
		||||
    fn run(
 | 
			
		||||
        &mut self,
 | 
			
		||||
        level: u8,
 | 
			
		||||
        starting_left_bound: &'t [u8],
 | 
			
		||||
        rightmost_bound: Bound<&'t [u8]>,
 | 
			
		||||
        group_size: usize,
 | 
			
		||||
    ) -> Result<()> {
 | 
			
		||||
        if level == 0 {
 | 
			
		||||
            return self.run_level_0(starting_left_bound, group_size);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound };
 | 
			
		||||
        let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
 | 
			
		||||
 | 
			
		||||
        let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
 | 
			
		||||
        for el in iter {
 | 
			
		||||
            let (next_key, next_value) = el?;
 | 
			
		||||
            // the right of the iter range is unbounded, so we need to make sure that we are not iterating
 | 
			
		||||
            // on the next field id
 | 
			
		||||
            if next_key.field_id != self.field_id {
 | 
			
		||||
                return Ok(());
 | 
			
		||||
            }
 | 
			
		||||
            // now, do we skip, stop, or visit?
 | 
			
		||||
            let should_skip = {
 | 
			
		||||
                match self.left {
 | 
			
		||||
                    Bound::Included(left) => left >= next_key.left_bound,
 | 
			
		||||
                    Bound::Excluded(left) => left >= next_key.left_bound, // TODO: use > instead?
 | 
			
		||||
                    Bound::Unbounded => false,
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
            if should_skip {
 | 
			
		||||
                previous_key = next_key;
 | 
			
		||||
                previous_value = next_value;
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // should we stop?
 | 
			
		||||
            let should_stop = {
 | 
			
		||||
                match self.right {
 | 
			
		||||
                    Bound::Included(right) => right < previous_key.left_bound,
 | 
			
		||||
                    Bound::Excluded(right) => right <= previous_key.left_bound,
 | 
			
		||||
                    Bound::Unbounded => false,
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
            if should_stop {
 | 
			
		||||
                return Ok(());
 | 
			
		||||
            }
 | 
			
		||||
            // should we take the whole thing, without recursing down?
 | 
			
		||||
            let should_take_whole_group = {
 | 
			
		||||
                let left_condition = match self.left {
 | 
			
		||||
                    Bound::Included(left) => previous_key.left_bound >= left,
 | 
			
		||||
                    Bound::Excluded(left) => previous_key.left_bound > left,
 | 
			
		||||
                    Bound::Unbounded => true,
 | 
			
		||||
                };
 | 
			
		||||
                let right_condition = match self.right {
 | 
			
		||||
                    Bound::Included(right) => next_key.left_bound <= right,
 | 
			
		||||
                    Bound::Excluded(right) => next_key.left_bound <= right,
 | 
			
		||||
                    Bound::Unbounded => true,
 | 
			
		||||
                };
 | 
			
		||||
                left_condition && right_condition
 | 
			
		||||
            };
 | 
			
		||||
            if should_take_whole_group {
 | 
			
		||||
                *self.docids |= &previous_value.bitmap;
 | 
			
		||||
                previous_key = next_key;
 | 
			
		||||
                previous_value = next_value;
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            let level = level - 1;
 | 
			
		||||
            let starting_left_bound = previous_key.left_bound;
 | 
			
		||||
            let rightmost_bound = Bound::Excluded(next_key.left_bound);
 | 
			
		||||
            let group_size = previous_value.size as usize;
 | 
			
		||||
 | 
			
		||||
            self.run(level, starting_left_bound, rightmost_bound, group_size)?;
 | 
			
		||||
 | 
			
		||||
            previous_key = next_key;
 | 
			
		||||
            previous_value = next_value;
 | 
			
		||||
        }
 | 
			
		||||
        // previous_key/previous_value are the last element
 | 
			
		||||
 | 
			
		||||
        // now, do we skip, stop, or visit?
 | 
			
		||||
        let should_skip = {
 | 
			
		||||
            match (self.left, rightmost_bound) {
 | 
			
		||||
                (Bound::Included(left), Bound::Included(right)) => left > right,
 | 
			
		||||
                (Bound::Included(left), Bound::Excluded(right)) => left >= right,
 | 
			
		||||
                (Bound::Excluded(left), Bound::Included(right) | Bound::Excluded(right)) => {
 | 
			
		||||
                    left >= right
 | 
			
		||||
                }
 | 
			
		||||
                (Bound::Unbounded, _) => false,
 | 
			
		||||
                (_, Bound::Unbounded) => false, // should never run?
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
        if should_skip {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // should we stop?
 | 
			
		||||
        let should_stop = {
 | 
			
		||||
            match self.right {
 | 
			
		||||
                Bound::Included(right) => right <= previous_key.left_bound,
 | 
			
		||||
                Bound::Excluded(right) => right < previous_key.left_bound,
 | 
			
		||||
                Bound::Unbounded => false,
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
        if should_stop {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
        // should we take the whole thing, without recursing down?
 | 
			
		||||
        let should_take_whole_group = {
 | 
			
		||||
            let left_condition = match self.left {
 | 
			
		||||
                Bound::Included(left) => previous_key.left_bound >= left,
 | 
			
		||||
                Bound::Excluded(left) => previous_key.left_bound > left,
 | 
			
		||||
                Bound::Unbounded => true,
 | 
			
		||||
            };
 | 
			
		||||
            let right_condition = match (self.right, rightmost_bound) {
 | 
			
		||||
                (Bound::Included(right), Bound::Included(rightmost)) => rightmost <= right,
 | 
			
		||||
                (Bound::Included(right), Bound::Excluded(rightmost)) => rightmost < right,
 | 
			
		||||
                // e.g. x < 8 and rightmost is <= y
 | 
			
		||||
                // condition met if rightmost < 8
 | 
			
		||||
                (Bound::Excluded(right), Bound::Included(rightmost)) => rightmost < right,
 | 
			
		||||
                // e.g. x < 8 and rightmost is < y
 | 
			
		||||
                // condition met only if y <= 8?
 | 
			
		||||
                (Bound::Excluded(right), Bound::Excluded(rightmost)) => rightmost <= right,
 | 
			
		||||
                // e.g. x < inf. , so yes we take the whole thing
 | 
			
		||||
                (Bound::Unbounded, _) => true,
 | 
			
		||||
                // e.g. x < 7 , righmost is inf
 | 
			
		||||
                (_, Bound::Unbounded) => false, // panic?
 | 
			
		||||
            };
 | 
			
		||||
            left_condition && right_condition
 | 
			
		||||
        };
 | 
			
		||||
        if should_take_whole_group {
 | 
			
		||||
            *self.docids |= &previous_value.bitmap;
 | 
			
		||||
        } else {
 | 
			
		||||
            let level = level - 1;
 | 
			
		||||
            let starting_left_bound = previous_key.left_bound;
 | 
			
		||||
            let group_size = previous_value.size as usize;
 | 
			
		||||
 | 
			
		||||
            self.run(level, starting_left_bound, rightmost_bound, group_size)?;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use crate::{
 | 
			
		||||
        heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec,
 | 
			
		||||
        search::facet::test::FacetIndex, snapshot_tests::display_bitmap,
 | 
			
		||||
    };
 | 
			
		||||
    use rand::{Rng, SeedableRng};
 | 
			
		||||
    use roaring::RoaringBitmap;
 | 
			
		||||
    use std::ops::Bound;
 | 
			
		||||
 | 
			
		||||
    use super::find_docids_of_facet_within_bounds;
 | 
			
		||||
 | 
			
		||||
    fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
        for i in 0..256u16 {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
            bitmap.insert(i as u32);
 | 
			
		||||
            index.insert(&mut txn, 0, &(i as f64), &bitmap);
 | 
			
		||||
        }
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
    }
 | 
			
		||||
    fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let rng = rand::rngs::SmallRng::from_seed([0; 32]);
 | 
			
		||||
        let keys =
 | 
			
		||||
            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
 | 
			
		||||
 | 
			
		||||
        for (_i, key) in keys.into_iter().enumerate() {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
            bitmap.insert(key);
 | 
			
		||||
            bitmap.insert(key + 100);
 | 
			
		||||
            index.insert(&mut txn, 0, &(key as f64), &bitmap);
 | 
			
		||||
        }
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn random_looking_index_snap() {
 | 
			
		||||
        let index = get_random_looking_index();
 | 
			
		||||
        insta::assert_display_snapshot!(index)
 | 
			
		||||
    }
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn filter_range_increasing() {
 | 
			
		||||
        let indexes = [get_simple_index(), get_random_looking_index()];
 | 
			
		||||
        for (i, index) in indexes.into_iter().enumerate() {
 | 
			
		||||
            let txn = index.env.read_txn().unwrap();
 | 
			
		||||
            let mut results = String::new();
 | 
			
		||||
            for i in 0..=255 {
 | 
			
		||||
                let i = i as f64;
 | 
			
		||||
                let start = Bound::Included(0.);
 | 
			
		||||
                let end = Bound::Included(i);
 | 
			
		||||
                let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
 | 
			
		||||
                    &txn,
 | 
			
		||||
                    &index.db.content,
 | 
			
		||||
                    0,
 | 
			
		||||
                    &start,
 | 
			
		||||
                    &end,
 | 
			
		||||
                )
 | 
			
		||||
                .unwrap();
 | 
			
		||||
                results.push_str(&format!("{}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
            insta::assert_snapshot!(
 | 
			
		||||
                format!("filter_range_{i}_increasing_included_bounds"),
 | 
			
		||||
                results
 | 
			
		||||
            );
 | 
			
		||||
            let mut results = String::new();
 | 
			
		||||
            for i in 0..=255 {
 | 
			
		||||
                let i = i as f64;
 | 
			
		||||
                let start = Bound::Excluded(0.);
 | 
			
		||||
                let end = Bound::Excluded(i);
 | 
			
		||||
                let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
 | 
			
		||||
                    &txn,
 | 
			
		||||
                    &index.db.content,
 | 
			
		||||
                    0,
 | 
			
		||||
                    &start,
 | 
			
		||||
                    &end,
 | 
			
		||||
                )
 | 
			
		||||
                .unwrap();
 | 
			
		||||
                results.push_str(&format!("{}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
            insta::assert_snapshot!(
 | 
			
		||||
                format!("filter_range_{i}_increasing_excluded_bounds"),
 | 
			
		||||
                results
 | 
			
		||||
            );
 | 
			
		||||
            txn.commit().unwrap();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn filter_range_decreasing() {
 | 
			
		||||
        let indexes = [get_simple_index(), get_random_looking_index()];
 | 
			
		||||
        for (i, index) in indexes.into_iter().enumerate() {
 | 
			
		||||
            let txn = index.env.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
            let mut results = String::new();
 | 
			
		||||
 | 
			
		||||
            for i in (0..=255).into_iter().rev() {
 | 
			
		||||
                let i = i as f64;
 | 
			
		||||
                let start = Bound::Included(i);
 | 
			
		||||
                let end = Bound::Included(255.);
 | 
			
		||||
                let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
 | 
			
		||||
                    &txn,
 | 
			
		||||
                    &index.db.content,
 | 
			
		||||
                    0,
 | 
			
		||||
                    &start,
 | 
			
		||||
                    &end,
 | 
			
		||||
                )
 | 
			
		||||
                .unwrap();
 | 
			
		||||
                results.push_str(&format!("{}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            insta::assert_snapshot!(
 | 
			
		||||
                format!("filter_range_{i}_decreasing_included_bounds"),
 | 
			
		||||
                results
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            let mut results = String::new();
 | 
			
		||||
 | 
			
		||||
            for i in (0..=255).into_iter().rev() {
 | 
			
		||||
                let i = i as f64;
 | 
			
		||||
                let start = Bound::Excluded(i);
 | 
			
		||||
                let end = Bound::Excluded(255.);
 | 
			
		||||
                let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
 | 
			
		||||
                    &txn,
 | 
			
		||||
                    &index.db.content,
 | 
			
		||||
                    0,
 | 
			
		||||
                    &start,
 | 
			
		||||
                    &end,
 | 
			
		||||
                )
 | 
			
		||||
                .unwrap();
 | 
			
		||||
                results.push_str(&format!("{}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            insta::assert_snapshot!(
 | 
			
		||||
                format!("filter_range_{i}_decreasing_excluded_bounds"),
 | 
			
		||||
                results
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            txn.commit().unwrap();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn filter_range_pinch() {
 | 
			
		||||
        let indexes = [get_simple_index(), get_random_looking_index()];
 | 
			
		||||
        for (i, index) in indexes.into_iter().enumerate() {
 | 
			
		||||
            let txn = index.env.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
            let mut results = String::new();
 | 
			
		||||
 | 
			
		||||
            for i in (0..=128).into_iter().rev() {
 | 
			
		||||
                let i = i as f64;
 | 
			
		||||
                let start = Bound::Included(i);
 | 
			
		||||
                let end = Bound::Included(255. - i);
 | 
			
		||||
                let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
 | 
			
		||||
                    &txn,
 | 
			
		||||
                    &index.db.content,
 | 
			
		||||
                    0,
 | 
			
		||||
                    &start,
 | 
			
		||||
                    &end,
 | 
			
		||||
                )
 | 
			
		||||
                .unwrap();
 | 
			
		||||
                results.push_str(&format!("{}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            insta::assert_snapshot!(format!("filter_range_{i}_pinch_included_bounds"), results);
 | 
			
		||||
 | 
			
		||||
            let mut results = String::new();
 | 
			
		||||
 | 
			
		||||
            for i in (0..=128).into_iter().rev() {
 | 
			
		||||
                let i = i as f64;
 | 
			
		||||
                let start = Bound::Excluded(i);
 | 
			
		||||
                let end = Bound::Excluded(255. - i);
 | 
			
		||||
                let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
 | 
			
		||||
                    &txn,
 | 
			
		||||
                    &index.db.content,
 | 
			
		||||
                    0,
 | 
			
		||||
                    &start,
 | 
			
		||||
                    &end,
 | 
			
		||||
                )
 | 
			
		||||
                .unwrap();
 | 
			
		||||
                results.push_str(&format!("{}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            insta::assert_snapshot!(format!("filter_range_{i}_pinch_excluded_bounds"), results);
 | 
			
		||||
 | 
			
		||||
            txn.commit().unwrap();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -1,8 +1,8 @@
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use crate::heed_codec::facet::new::{
 | 
			
		||||
    FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
 | 
			
		||||
};
 | 
			
		||||
use crate::Result;
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use super::{get_first_facet_value, get_highest_level};
 | 
			
		||||
 | 
			
		||||
@@ -11,20 +11,20 @@ pub fn ascending_facet_sort<'t>(
 | 
			
		||||
    db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
    candidates: RoaringBitmap,
 | 
			
		||||
) -> Box<dyn Iterator<Item = (&'t [u8], RoaringBitmap)> + 't> {
 | 
			
		||||
) -> Result<Box<dyn Iterator<Item = Result<(&'t [u8], RoaringBitmap)>> + 't>> {
 | 
			
		||||
    let highest_level =
 | 
			
		||||
        get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id);
 | 
			
		||||
        get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
 | 
			
		||||
    if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(
 | 
			
		||||
        rtxn,
 | 
			
		||||
        &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
 | 
			
		||||
        field_id,
 | 
			
		||||
    ) {
 | 
			
		||||
    )? {
 | 
			
		||||
        let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
 | 
			
		||||
        let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
 | 
			
		||||
 | 
			
		||||
        Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] })
 | 
			
		||||
        Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
 | 
			
		||||
    } else {
 | 
			
		||||
        return Box::new(std::iter::empty());
 | 
			
		||||
        Ok(Box::new(std::iter::empty()))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -39,7 +39,7 @@ struct AscendingFacetSort<'t, 'e> {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
 | 
			
		||||
    type Item = (&'t [u8], RoaringBitmap);
 | 
			
		||||
    type Item = Result<(&'t [u8], RoaringBitmap)>;
 | 
			
		||||
 | 
			
		||||
    fn next(&mut self) -> Option<Self::Item> {
 | 
			
		||||
        'outer: loop {
 | 
			
		||||
@@ -67,15 +67,15 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
 | 
			
		||||
                    *documents_ids -= &bitmap;
 | 
			
		||||
 | 
			
		||||
                    if level == 0 {
 | 
			
		||||
                        return Some((left_bound, bitmap));
 | 
			
		||||
                        return Some(Ok((left_bound, bitmap)));
 | 
			
		||||
                    }
 | 
			
		||||
                    let starting_key_below =
 | 
			
		||||
                        FacetKey { field_id: self.field_id, level: level - 1, left_bound };
 | 
			
		||||
                    let iter = self
 | 
			
		||||
                        .db
 | 
			
		||||
                        .range(&self.rtxn, &(starting_key_below..))
 | 
			
		||||
                        .unwrap()
 | 
			
		||||
                        .take(group_size as usize);
 | 
			
		||||
                    let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) {
 | 
			
		||||
                        Ok(iter) => iter,
 | 
			
		||||
                        Err(e) => return Some(Err(e.into())),
 | 
			
		||||
                    }
 | 
			
		||||
                    .take(group_size as usize);
 | 
			
		||||
 | 
			
		||||
                    self.stack.push((bitmap, iter));
 | 
			
		||||
                    continue 'outer;
 | 
			
		||||
@@ -88,14 +88,19 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use crate::{
 | 
			
		||||
        ascending_facet_sort::ascending_facet_sort, codec::U16Codec, display_bitmap, Index,
 | 
			
		||||
    };
 | 
			
		||||
    use heed::BytesDecode;
 | 
			
		||||
    use rand::Rng;
 | 
			
		||||
    use rand::SeedableRng;
 | 
			
		||||
    use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
    fn get_simple_index() -> Index<U16Codec> {
 | 
			
		||||
        let index = Index::<U16Codec>::new(4, 8);
 | 
			
		||||
    use crate::{
 | 
			
		||||
        heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec,
 | 
			
		||||
        search::facet::{facet_sort_ascending::ascending_facet_sort, test::FacetIndex},
 | 
			
		||||
        snapshot_tests::display_bitmap,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
        for i in 0..256u16 {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
@@ -105,18 +110,19 @@ mod tests {
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
    }
 | 
			
		||||
    fn get_random_looking_index() -> Index<U16Codec> {
 | 
			
		||||
        let index = Index::<U16Codec>::new(4, 8);
 | 
			
		||||
    fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let rng = fastrand::Rng::with_seed(0);
 | 
			
		||||
        let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::<Vec<u32>>();
 | 
			
		||||
        let rng = rand::rngs::SmallRng::from_seed([0; 32]);
 | 
			
		||||
        let keys =
 | 
			
		||||
            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
 | 
			
		||||
 | 
			
		||||
        for (_i, key) in keys.into_iter().enumerate() {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
            bitmap.insert(key);
 | 
			
		||||
            bitmap.insert(key + 100);
 | 
			
		||||
            index.insert(&mut txn, 0, &(key as u16), &bitmap);
 | 
			
		||||
            index.insert(&mut txn, 0, &(key as f64), &bitmap);
 | 
			
		||||
        }
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
@@ -136,7 +142,7 @@ mod tests {
 | 
			
		||||
            let mut results = String::new();
 | 
			
		||||
            let iter = ascending_facet_sort(&txn, &index.db.content, 0, candidates);
 | 
			
		||||
            for (facet, docids) in iter {
 | 
			
		||||
                let facet = U16Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
            insta::assert_snapshot!(format!("filter_sort_{i}_ascending"), results);
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,10 @@
 | 
			
		||||
use std::ops::Bound;
 | 
			
		||||
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use crate::heed_codec::facet::new::{
 | 
			
		||||
    FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
 | 
			
		||||
};
 | 
			
		||||
use crate::Result;
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
 | 
			
		||||
 | 
			
		||||
@@ -13,21 +13,21 @@ fn descending_facet_sort<'t>(
 | 
			
		||||
    db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
    candidates: RoaringBitmap,
 | 
			
		||||
) -> Box<dyn Iterator<Item = (&'t [u8], RoaringBitmap)> + 't> {
 | 
			
		||||
    let highest_level = get_highest_level(rtxn, db, field_id);
 | 
			
		||||
    if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id) {
 | 
			
		||||
) -> Result<Box<dyn Iterator<Item = Result<(&'t [u8], RoaringBitmap)>> + 't>> {
 | 
			
		||||
    let highest_level = get_highest_level(rtxn, db, field_id)?;
 | 
			
		||||
    if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
 | 
			
		||||
        let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
 | 
			
		||||
        let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id).unwrap();
 | 
			
		||||
        let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
 | 
			
		||||
        let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound };
 | 
			
		||||
        let iter = db.rev_range(rtxn, &(first_key..=last_key)).unwrap().take(usize::MAX);
 | 
			
		||||
        Box::new(DescendingFacetSort {
 | 
			
		||||
        let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
 | 
			
		||||
        Ok(Box::new(DescendingFacetSort {
 | 
			
		||||
            rtxn,
 | 
			
		||||
            db,
 | 
			
		||||
            field_id,
 | 
			
		||||
            stack: vec![(candidates, iter, Bound::Included(last_bound))],
 | 
			
		||||
        })
 | 
			
		||||
        }))
 | 
			
		||||
    } else {
 | 
			
		||||
        return Box::new(std::iter::empty());
 | 
			
		||||
        Ok(Box::new(std::iter::empty()))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -43,7 +43,7 @@ struct DescendingFacetSort<'t> {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<'t> Iterator for DescendingFacetSort<'t> {
 | 
			
		||||
    type Item = (&'t [u8], RoaringBitmap);
 | 
			
		||||
    type Item = Result<(&'t [u8], RoaringBitmap)>;
 | 
			
		||||
 | 
			
		||||
    fn next(&mut self) -> Option<Self::Item> {
 | 
			
		||||
        'outer: loop {
 | 
			
		||||
@@ -70,7 +70,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
 | 
			
		||||
                    *documents_ids -= &bitmap;
 | 
			
		||||
 | 
			
		||||
                    if level == 0 {
 | 
			
		||||
                        return Some((left_bound, bitmap));
 | 
			
		||||
                        return Some(Ok((left_bound, bitmap)));
 | 
			
		||||
                    }
 | 
			
		||||
                    let starting_key_below = FacetKey { field_id, level: level - 1, left_bound };
 | 
			
		||||
 | 
			
		||||
@@ -89,14 +89,14 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
 | 
			
		||||
                    };
 | 
			
		||||
                    let prev_right_bound = *right_bound;
 | 
			
		||||
                    *right_bound = Bound::Excluded(left_bound);
 | 
			
		||||
                    let iter = self
 | 
			
		||||
                        .db
 | 
			
		||||
                        .rev_range(
 | 
			
		||||
                            &self.rtxn,
 | 
			
		||||
                            &(Bound::Included(starting_key_below), end_key_kelow),
 | 
			
		||||
                        )
 | 
			
		||||
                        .unwrap()
 | 
			
		||||
                        .take(group_size as usize);
 | 
			
		||||
                    let iter = match self.db.rev_range(
 | 
			
		||||
                        &self.rtxn,
 | 
			
		||||
                        &(Bound::Included(starting_key_below), end_key_kelow),
 | 
			
		||||
                    ) {
 | 
			
		||||
                        Ok(iter) => iter,
 | 
			
		||||
                        Err(e) => return Some(Err(e.into())),
 | 
			
		||||
                    }
 | 
			
		||||
                    .take(group_size as usize);
 | 
			
		||||
 | 
			
		||||
                    self.stack.push((bitmap, iter, prev_right_bound));
 | 
			
		||||
                    continue 'outer;
 | 
			
		||||
@@ -110,16 +110,20 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use crate::{
 | 
			
		||||
        codec::{MyByteSlice, U16Codec},
 | 
			
		||||
        descending_facet_sort::descending_facet_sort,
 | 
			
		||||
        display_bitmap, FacetKeyCodec, Index,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    use heed::BytesDecode;
 | 
			
		||||
    use rand::Rng;
 | 
			
		||||
    use rand::SeedableRng;
 | 
			
		||||
    use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
    fn get_simple_index() -> Index<U16Codec> {
 | 
			
		||||
        let index = Index::<U16Codec>::new(4, 8);
 | 
			
		||||
    use crate::{
 | 
			
		||||
        heed_codec::facet::new::{ordered_f64_codec::OrderedF64Codec, FacetKeyCodec, MyByteSlice},
 | 
			
		||||
        search::facet::{facet_sort_descending::descending_facet_sort, test::FacetIndex},
 | 
			
		||||
        snapshot_tests::display_bitmap,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
        for i in 0..256u16 {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
@@ -129,18 +133,19 @@ mod tests {
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
    }
 | 
			
		||||
    fn get_random_looking_index() -> Index<U16Codec> {
 | 
			
		||||
        let index = Index::<U16Codec>::new(4, 8);
 | 
			
		||||
    fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
 | 
			
		||||
        let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let rng = fastrand::Rng::with_seed(0);
 | 
			
		||||
        let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::<Vec<u32>>();
 | 
			
		||||
        let rng = rand::rngs::SmallRng::from_seed([0; 32]);
 | 
			
		||||
        let keys =
 | 
			
		||||
            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
 | 
			
		||||
 | 
			
		||||
        for (_i, key) in keys.into_iter().enumerate() {
 | 
			
		||||
            let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
            bitmap.insert(key);
 | 
			
		||||
            bitmap.insert(key + 100);
 | 
			
		||||
            index.insert(&mut txn, 0, &(key as u16), &bitmap);
 | 
			
		||||
            bitmap.insert(key + 100.);
 | 
			
		||||
            index.insert(&mut txn, 0, &(key as f64), &bitmap);
 | 
			
		||||
        }
 | 
			
		||||
        txn.commit().unwrap();
 | 
			
		||||
        index
 | 
			
		||||
@@ -161,7 +166,7 @@ mod tests {
 | 
			
		||||
            let db = index.db.content.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
 | 
			
		||||
            let iter = descending_facet_sort(&txn, &db, 0, candidates);
 | 
			
		||||
            for (facet, docids) in iter {
 | 
			
		||||
                let facet = U16Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
 | 
			
		||||
                results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids)));
 | 
			
		||||
            }
 | 
			
		||||
            insta::assert_snapshot!(format!("filter_sort_{i}_descending"), results);
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,6 @@ use either::Either;
 | 
			
		||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
 | 
			
		||||
use heed::types::DecodeIgnore;
 | 
			
		||||
use heed::LazyDecode;
 | 
			
		||||
use log::debug;
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
// use super::FacetNumberRange;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										459
									
								
								milli/src/search/facet/incremental_update.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										459
									
								
								milli/src/search/facet/incremental_update.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,459 @@
 | 
			
		||||
use crate::heed_codec::facet::new::{
 | 
			
		||||
    FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
 | 
			
		||||
};
 | 
			
		||||
use crate::Result;
 | 
			
		||||
use heed::Error;
 | 
			
		||||
use heed::{types::ByteSlice, BytesDecode, RoTxn, RwTxn};
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use super::get_highest_level;
 | 
			
		||||
 | 
			
		||||
enum InsertionResult {
 | 
			
		||||
    InPlace,
 | 
			
		||||
    Insert,
 | 
			
		||||
}
 | 
			
		||||
enum DeletionResult {
 | 
			
		||||
    InPlace,
 | 
			
		||||
    Reduce { prev: Option<Vec<u8>>, next: Option<Vec<u8>> },
 | 
			
		||||
    Remove { prev: Option<Vec<u8>>, next: Option<Vec<u8>> },
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct IncrementalFacetUpdate<'i> {
 | 
			
		||||
    db: &'i heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    group_size: usize,
 | 
			
		||||
    min_level_size: usize,
 | 
			
		||||
    max_group_size: usize,
 | 
			
		||||
}
 | 
			
		||||
impl<'i> IncrementalFacetUpdate<'i> {
 | 
			
		||||
    fn find_insertion_key_value<'a>(
 | 
			
		||||
        &self,
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        level: u8,
 | 
			
		||||
        search_key: &[u8],
 | 
			
		||||
        txn: &RoTxn,
 | 
			
		||||
    ) -> Result<(FacetKey<Vec<u8>>, FacetGroupValue)> {
 | 
			
		||||
        let mut prefix = vec![];
 | 
			
		||||
        prefix.extend_from_slice(&field_id.to_be_bytes());
 | 
			
		||||
        prefix.push(level);
 | 
			
		||||
        prefix.extend_from_slice(search_key);
 | 
			
		||||
 | 
			
		||||
        let mut prefix_iter = self
 | 
			
		||||
            .db
 | 
			
		||||
            .as_polymorph()
 | 
			
		||||
            .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
 | 
			
		||||
        if let Some(e) = prefix_iter.next() {
 | 
			
		||||
            let (key_bytes, value) = e?;
 | 
			
		||||
            let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
 | 
			
		||||
                .ok_or(heed::Error::Encoding)?;
 | 
			
		||||
            Ok((
 | 
			
		||||
                FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
 | 
			
		||||
                    .ok_or(Error::Encoding)?
 | 
			
		||||
                    .into_owned(),
 | 
			
		||||
                value,
 | 
			
		||||
            ))
 | 
			
		||||
        } else {
 | 
			
		||||
            let key = FacetKey { field_id, level, left_bound: search_key };
 | 
			
		||||
            match self.db.get_lower_than(txn, &key)? {
 | 
			
		||||
                Some((key, value)) => {
 | 
			
		||||
                    if key.level != level || key.field_id != field_id {
 | 
			
		||||
                        let mut prefix = vec![];
 | 
			
		||||
                        prefix.extend_from_slice(&field_id.to_be_bytes());
 | 
			
		||||
                        prefix.push(level);
 | 
			
		||||
 | 
			
		||||
                        let mut iter = self
 | 
			
		||||
                            .db
 | 
			
		||||
                            .as_polymorph()
 | 
			
		||||
                            .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(
 | 
			
		||||
                                txn,
 | 
			
		||||
                                &prefix.as_slice(),
 | 
			
		||||
                            )?;
 | 
			
		||||
                        let (key_bytes, value) = iter.next().unwrap()?;
 | 
			
		||||
                        Ok((
 | 
			
		||||
                            FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
 | 
			
		||||
                                .ok_or(Error::Encoding)?
 | 
			
		||||
                                .into_owned(),
 | 
			
		||||
                            value,
 | 
			
		||||
                        ))
 | 
			
		||||
                    } else {
 | 
			
		||||
                        Ok((key.into_owned(), value))
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                None => panic!(),
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn insert_in_level_0<'t>(
 | 
			
		||||
        &self,
 | 
			
		||||
        txn: &'t mut RwTxn,
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        new_key: &[u8],
 | 
			
		||||
        new_values: &RoaringBitmap,
 | 
			
		||||
    ) -> Result<InsertionResult> {
 | 
			
		||||
        let key = FacetKey { field_id, level: 0, left_bound: new_key };
 | 
			
		||||
        let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 };
 | 
			
		||||
 | 
			
		||||
        let mut level0_prefix = vec![];
 | 
			
		||||
        level0_prefix.extend_from_slice(&field_id.to_be_bytes());
 | 
			
		||||
        level0_prefix.push(0);
 | 
			
		||||
 | 
			
		||||
        let mut iter = self
 | 
			
		||||
            .db
 | 
			
		||||
            .as_polymorph()
 | 
			
		||||
            .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &level0_prefix)?;
 | 
			
		||||
 | 
			
		||||
        if iter.next().is_none() {
 | 
			
		||||
            drop(iter);
 | 
			
		||||
            self.db.put(txn, &key, &value)?;
 | 
			
		||||
            return Ok(InsertionResult::Insert);
 | 
			
		||||
        } else {
 | 
			
		||||
            drop(iter);
 | 
			
		||||
            let old_value = self.db.get(&txn, &key)?;
 | 
			
		||||
            match old_value {
 | 
			
		||||
                Some(mut updated_value) => {
 | 
			
		||||
                    // now merge the two
 | 
			
		||||
                    updated_value.bitmap |= value.bitmap;
 | 
			
		||||
                    self.db.put(txn, &key, &updated_value)?;
 | 
			
		||||
                    Ok(InsertionResult::InPlace)
 | 
			
		||||
                }
 | 
			
		||||
                None => {
 | 
			
		||||
                    self.db.put(txn, &key, &value)?;
 | 
			
		||||
                    Ok(InsertionResult::Insert)
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    fn insert_in_level<'t>(
 | 
			
		||||
        &self,
 | 
			
		||||
        txn: &'t mut RwTxn,
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        level: u8,
 | 
			
		||||
        new_key: &[u8],
 | 
			
		||||
        new_values: &RoaringBitmap,
 | 
			
		||||
    ) -> Result<InsertionResult> {
 | 
			
		||||
        if level == 0 {
 | 
			
		||||
            return self.insert_in_level_0(txn, field_id, new_key, new_values);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let max_group_size = self.max_group_size;
 | 
			
		||||
 | 
			
		||||
        let (insertion_key, insertion_value) =
 | 
			
		||||
            self.find_insertion_key_value(field_id, level, new_key, txn)?;
 | 
			
		||||
 | 
			
		||||
        let result = self.insert_in_level(txn, field_id, level - 1, new_key.clone(), new_values)?;
 | 
			
		||||
        // level below inserted an element
 | 
			
		||||
 | 
			
		||||
        let insertion_key = {
 | 
			
		||||
            let mut new_insertion_key = insertion_key.clone();
 | 
			
		||||
            let mut modified = false;
 | 
			
		||||
 | 
			
		||||
            if new_key < insertion_key.left_bound.as_slice() {
 | 
			
		||||
                new_insertion_key.left_bound = new_key.to_vec();
 | 
			
		||||
                modified = true;
 | 
			
		||||
            }
 | 
			
		||||
            if modified {
 | 
			
		||||
                let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
 | 
			
		||||
                assert!(is_deleted);
 | 
			
		||||
                self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?;
 | 
			
		||||
            }
 | 
			
		||||
            new_insertion_key
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        match result {
 | 
			
		||||
            // TODO: this could go above the block recomputing insertion key
 | 
			
		||||
            // because we know that if we inserted in place, the key is not a new one
 | 
			
		||||
            // thus it doesn't extend a group
 | 
			
		||||
            InsertionResult::InPlace => {
 | 
			
		||||
                let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap();
 | 
			
		||||
                updated_value.bitmap |= new_values;
 | 
			
		||||
                self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
 | 
			
		||||
 | 
			
		||||
                return Ok(InsertionResult::InPlace);
 | 
			
		||||
            }
 | 
			
		||||
            InsertionResult::Insert => {}
 | 
			
		||||
        }
 | 
			
		||||
        let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap();
 | 
			
		||||
 | 
			
		||||
        updated_value.size += 1;
 | 
			
		||||
        if updated_value.size as usize == max_group_size {
 | 
			
		||||
            // need to split it
 | 
			
		||||
            // recompute left element and right element
 | 
			
		||||
            // replace current group by left element
 | 
			
		||||
            // add one more group to the right
 | 
			
		||||
 | 
			
		||||
            let size_left = max_group_size / 2;
 | 
			
		||||
            let size_right = max_group_size - size_left;
 | 
			
		||||
 | 
			
		||||
            let level_below = level - 1;
 | 
			
		||||
 | 
			
		||||
            let (start_key, _) = self
 | 
			
		||||
                .db
 | 
			
		||||
                .get_greater_than_or_equal_to(
 | 
			
		||||
                    &txn,
 | 
			
		||||
                    &FacetKey {
 | 
			
		||||
                        field_id,
 | 
			
		||||
                        level: level_below,
 | 
			
		||||
                        left_bound: insertion_key.left_bound.as_slice(),
 | 
			
		||||
                    },
 | 
			
		||||
                )?
 | 
			
		||||
                .unwrap();
 | 
			
		||||
 | 
			
		||||
            let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size);
 | 
			
		||||
 | 
			
		||||
            let group_left = {
 | 
			
		||||
                let mut values_left = RoaringBitmap::new();
 | 
			
		||||
 | 
			
		||||
                let mut i = 0;
 | 
			
		||||
                while let Some(next) = iter.next() {
 | 
			
		||||
                    let (_key, value) = next?;
 | 
			
		||||
                    i += 1;
 | 
			
		||||
                    values_left |= &value.bitmap;
 | 
			
		||||
                    if i == size_left {
 | 
			
		||||
                        break;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                let key =
 | 
			
		||||
                    FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
 | 
			
		||||
                let value = FacetGroupValue { size: size_left as u8, bitmap: values_left };
 | 
			
		||||
                (key, value)
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            let group_right = {
 | 
			
		||||
                let mut values_right = RoaringBitmap::new();
 | 
			
		||||
                let mut right_start_key = None;
 | 
			
		||||
 | 
			
		||||
                while let Some(next) = iter.next() {
 | 
			
		||||
                    let (key, value) = next?;
 | 
			
		||||
                    if right_start_key.is_none() {
 | 
			
		||||
                        right_start_key = Some(key.left_bound);
 | 
			
		||||
                    }
 | 
			
		||||
                    values_right |= &value.bitmap;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                let key =
 | 
			
		||||
                    FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
 | 
			
		||||
                let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
 | 
			
		||||
                (key, value)
 | 
			
		||||
            };
 | 
			
		||||
            drop(iter);
 | 
			
		||||
 | 
			
		||||
            let _ = self.db.delete(txn, &insertion_key.as_ref())?;
 | 
			
		||||
 | 
			
		||||
            self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?;
 | 
			
		||||
            self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?;
 | 
			
		||||
 | 
			
		||||
            Ok(InsertionResult::Insert)
 | 
			
		||||
        } else {
 | 
			
		||||
            let mut value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap();
 | 
			
		||||
            value.bitmap |= new_values;
 | 
			
		||||
            value.size += 1;
 | 
			
		||||
            self.db.put(txn, &insertion_key.as_ref(), &value).unwrap();
 | 
			
		||||
 | 
			
		||||
            Ok(InsertionResult::InPlace)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn insert<'a, 't>(
 | 
			
		||||
        &self,
 | 
			
		||||
        txn: &'t mut RwTxn,
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        new_key: &[u8],
 | 
			
		||||
        new_values: &RoaringBitmap,
 | 
			
		||||
    ) -> Result<()> {
 | 
			
		||||
        if new_values.is_empty() {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
        let group_size = self.group_size;
 | 
			
		||||
 | 
			
		||||
        let highest_level = get_highest_level(&txn, &self.db, field_id)?;
 | 
			
		||||
 | 
			
		||||
        let result =
 | 
			
		||||
            self.insert_in_level(txn, field_id, highest_level as u8, new_key, new_values)?;
 | 
			
		||||
        match result {
 | 
			
		||||
            InsertionResult::InPlace => return Ok(()),
 | 
			
		||||
            InsertionResult::Insert => {}
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let mut highest_level_prefix = vec![];
 | 
			
		||||
        highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
 | 
			
		||||
        highest_level_prefix.push(highest_level);
 | 
			
		||||
 | 
			
		||||
        let size_highest_level = self
 | 
			
		||||
            .db
 | 
			
		||||
            .as_polymorph()
 | 
			
		||||
            .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)?
 | 
			
		||||
            .count();
 | 
			
		||||
 | 
			
		||||
        if size_highest_level < self.min_level_size {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let mut groups_iter = self
 | 
			
		||||
            .db
 | 
			
		||||
            .as_polymorph()
 | 
			
		||||
            .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &highest_level_prefix)?;
 | 
			
		||||
 | 
			
		||||
        let mut to_add = vec![];
 | 
			
		||||
        for _ in 0..group_size {
 | 
			
		||||
            let mut first_key = None;
 | 
			
		||||
            let mut values = RoaringBitmap::new();
 | 
			
		||||
            for _ in 0..group_size {
 | 
			
		||||
                let (key_bytes, value_i) = groups_iter.next().unwrap()?;
 | 
			
		||||
                let key_i = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
 | 
			
		||||
                    .ok_or(Error::Encoding)?;
 | 
			
		||||
 | 
			
		||||
                if first_key.is_none() {
 | 
			
		||||
                    first_key = Some(key_i);
 | 
			
		||||
                }
 | 
			
		||||
                values |= value_i.bitmap;
 | 
			
		||||
            }
 | 
			
		||||
            let key = FacetKey {
 | 
			
		||||
                field_id,
 | 
			
		||||
                level: highest_level + 1,
 | 
			
		||||
                left_bound: first_key.unwrap().left_bound,
 | 
			
		||||
            };
 | 
			
		||||
            let value = FacetGroupValue { size: group_size as u8, bitmap: values };
 | 
			
		||||
            to_add.push((key.into_owned(), value));
 | 
			
		||||
        }
 | 
			
		||||
        drop(groups_iter);
 | 
			
		||||
        for (key, value) in to_add {
 | 
			
		||||
            self.db.put(txn, &key.as_ref(), &value)?;
 | 
			
		||||
        }
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn delete_in_level<'t>(
 | 
			
		||||
        &self,
 | 
			
		||||
        txn: &'t mut RwTxn,
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        level: u8,
 | 
			
		||||
        key: &[u8],
 | 
			
		||||
        value: u32,
 | 
			
		||||
    ) -> Result<DeletionResult> {
 | 
			
		||||
        if level == 0 {
 | 
			
		||||
            return self.delete_in_level_0(txn, field_id, key, value);
 | 
			
		||||
        }
 | 
			
		||||
        let (deletion_key, mut bitmap) =
 | 
			
		||||
            self.find_insertion_key_value(field_id, level, key, txn)?;
 | 
			
		||||
 | 
			
		||||
        let result = self.delete_in_level(txn, field_id, level - 1, key.clone(), value)?;
 | 
			
		||||
 | 
			
		||||
        let mut decrease_size = false;
 | 
			
		||||
        let (prev_key, next_key) = match result {
 | 
			
		||||
            DeletionResult::InPlace => {
 | 
			
		||||
                bitmap.bitmap.remove(value);
 | 
			
		||||
                self.db.put(txn, &deletion_key.as_ref(), &bitmap)?;
 | 
			
		||||
                return Ok(DeletionResult::InPlace);
 | 
			
		||||
            }
 | 
			
		||||
            DeletionResult::Reduce { prev, next } => (prev, next),
 | 
			
		||||
            DeletionResult::Remove { prev, next } => {
 | 
			
		||||
                decrease_size = true;
 | 
			
		||||
                (prev, next)
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let mut updated_value = bitmap;
 | 
			
		||||
        if decrease_size {
 | 
			
		||||
            updated_value.size -= 1;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if updated_value.size == 0 {
 | 
			
		||||
            self.db.delete(txn, &deletion_key.as_ref())?;
 | 
			
		||||
            Ok(DeletionResult::Remove { prev: prev_key, next: next_key })
 | 
			
		||||
        } else {
 | 
			
		||||
            let mut updated_deletion_key = deletion_key.clone();
 | 
			
		||||
            if key == deletion_key.left_bound {
 | 
			
		||||
                updated_deletion_key.left_bound = next_key.clone().unwrap();
 | 
			
		||||
            }
 | 
			
		||||
            updated_value.bitmap.remove(value);
 | 
			
		||||
            let _ = self.db.delete(txn, &deletion_key.as_ref())?;
 | 
			
		||||
            self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?;
 | 
			
		||||
 | 
			
		||||
            Ok(DeletionResult::Reduce { prev: prev_key, next: next_key })
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn delete_in_level_0<'t>(
 | 
			
		||||
        &self,
 | 
			
		||||
        txn: &'t mut RwTxn,
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        key: &[u8],
 | 
			
		||||
        value: u32,
 | 
			
		||||
    ) -> Result<DeletionResult> {
 | 
			
		||||
        let key = FacetKey { field_id, level: 0, left_bound: key };
 | 
			
		||||
        let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
 | 
			
		||||
        bitmap.remove(value);
 | 
			
		||||
 | 
			
		||||
        if bitmap.is_empty() {
 | 
			
		||||
            let mut prev_key = None;
 | 
			
		||||
            let mut next_key = None;
 | 
			
		||||
 | 
			
		||||
            if let Some(prev) = self.db.get_lower_than(&txn, &key)? {
 | 
			
		||||
                prev_key = Some(prev.0.left_bound.to_vec());
 | 
			
		||||
            }
 | 
			
		||||
            if let Some(next) = self.db.get_greater_than(&txn, &key)? {
 | 
			
		||||
                if next.0.level == 0 {
 | 
			
		||||
                    next_key = Some(next.0.left_bound.to_vec());
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            self.db.delete(txn, &key)?;
 | 
			
		||||
            Ok(DeletionResult::Remove { prev: prev_key, next: next_key })
 | 
			
		||||
        } else {
 | 
			
		||||
            self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?;
 | 
			
		||||
            Ok(DeletionResult::InPlace)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn delete<'a, 't>(
 | 
			
		||||
        &self,
 | 
			
		||||
        txn: &'t mut RwTxn,
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        key: &[u8],
 | 
			
		||||
        value: u32,
 | 
			
		||||
    ) -> Result<()> {
 | 
			
		||||
        if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
        let highest_level = get_highest_level(&txn, &self.db, field_id)?;
 | 
			
		||||
 | 
			
		||||
        // let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
 | 
			
		||||
 | 
			
		||||
        let result = self.delete_in_level(txn, field_id, highest_level as u8, key, value)?;
 | 
			
		||||
        match result {
 | 
			
		||||
            DeletionResult::InPlace => return Ok(()),
 | 
			
		||||
            DeletionResult::Reduce { .. } => {}
 | 
			
		||||
            DeletionResult::Remove { .. } => {}
 | 
			
		||||
        }
 | 
			
		||||
        let mut highest_level_prefix = vec![];
 | 
			
		||||
        highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
 | 
			
		||||
        highest_level_prefix.push(highest_level);
 | 
			
		||||
 | 
			
		||||
        if highest_level == 0
 | 
			
		||||
            || self
 | 
			
		||||
                .db
 | 
			
		||||
                .as_polymorph()
 | 
			
		||||
                .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)?
 | 
			
		||||
                .count()
 | 
			
		||||
                >= self.group_size
 | 
			
		||||
        {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
        let mut to_delete = vec![];
 | 
			
		||||
        let mut iter = self
 | 
			
		||||
            .db
 | 
			
		||||
            .as_polymorph()
 | 
			
		||||
            .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?;
 | 
			
		||||
        while let Some(el) = iter.next() {
 | 
			
		||||
            let (k, _) = el?;
 | 
			
		||||
            to_delete.push(
 | 
			
		||||
                FacetKeyCodec::<MyByteSlice>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
 | 
			
		||||
            );
 | 
			
		||||
        }
 | 
			
		||||
        drop(iter);
 | 
			
		||||
        for k in to_delete {
 | 
			
		||||
            self.db.delete(txn, &k.as_ref())?;
 | 
			
		||||
        }
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -10,38 +10,39 @@ pub use self::filter::Filter;
 | 
			
		||||
 | 
			
		||||
mod facet_distribution;
 | 
			
		||||
mod facet_distribution_iter;
 | 
			
		||||
mod facet_range_search;
 | 
			
		||||
mod facet_sort_ascending;
 | 
			
		||||
mod facet_sort_descending;
 | 
			
		||||
mod filter;
 | 
			
		||||
mod incremental_update;
 | 
			
		||||
 | 
			
		||||
fn get_first_facet_value<'t, BoundCodec>(
 | 
			
		||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
 | 
			
		||||
    txn: &'t RoTxn,
 | 
			
		||||
    db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
) -> Option<BoundCodec::DItem>
 | 
			
		||||
) -> crate::Result<Option<BoundCodec::DItem>>
 | 
			
		||||
where
 | 
			
		||||
    BoundCodec: BytesDecode<'t>,
 | 
			
		||||
{
 | 
			
		||||
    let mut level0prefix = vec![];
 | 
			
		||||
    level0prefix.extend_from_slice(&field_id.to_be_bytes());
 | 
			
		||||
    level0prefix.push(0);
 | 
			
		||||
    let mut level0_iter_forward = db
 | 
			
		||||
        .as_polymorph()
 | 
			
		||||
        .prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())
 | 
			
		||||
        .unwrap();
 | 
			
		||||
    let mut level0_iter_forward =
 | 
			
		||||
        db.as_polymorph().prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?;
 | 
			
		||||
    if let Some(first) = level0_iter_forward.next() {
 | 
			
		||||
        let (first_key, _) = first.unwrap();
 | 
			
		||||
        let first_key = FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).unwrap();
 | 
			
		||||
        Some(first_key.left_bound)
 | 
			
		||||
        let (first_key, _) = first?;
 | 
			
		||||
        let first_key =
 | 
			
		||||
            FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
        Ok(Some(first_key.left_bound))
 | 
			
		||||
    } else {
 | 
			
		||||
        None
 | 
			
		||||
        Ok(None)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
fn get_last_facet_value<'t, BoundCodec>(
 | 
			
		||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
 | 
			
		||||
    txn: &'t RoTxn,
 | 
			
		||||
    db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
) -> Option<BoundCodec::DItem>
 | 
			
		||||
) -> crate::Result<Option<BoundCodec::DItem>>
 | 
			
		||||
where
 | 
			
		||||
    BoundCodec: BytesDecode<'t>,
 | 
			
		||||
{
 | 
			
		||||
@@ -50,30 +51,129 @@ where
 | 
			
		||||
    level0prefix.push(0);
 | 
			
		||||
    let mut level0_iter_backward = db
 | 
			
		||||
        .as_polymorph()
 | 
			
		||||
        .rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())
 | 
			
		||||
        .unwrap();
 | 
			
		||||
        .rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?;
 | 
			
		||||
    if let Some(last) = level0_iter_backward.next() {
 | 
			
		||||
        let (last_key, _) = last.unwrap();
 | 
			
		||||
        let last_key = FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).unwrap();
 | 
			
		||||
        Some(last_key.left_bound)
 | 
			
		||||
        let (last_key, _) = last?;
 | 
			
		||||
        let last_key =
 | 
			
		||||
            FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
        Ok(Some(last_key.left_bound))
 | 
			
		||||
    } else {
 | 
			
		||||
        None
 | 
			
		||||
        Ok(None)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
fn get_highest_level<'t>(
 | 
			
		||||
pub(crate) fn get_highest_level<'t>(
 | 
			
		||||
    txn: &'t RoTxn<'t>,
 | 
			
		||||
    db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
    field_id: u16,
 | 
			
		||||
) -> u8 {
 | 
			
		||||
) -> crate::Result<u8> {
 | 
			
		||||
    let field_id_prefix = &field_id.to_be_bytes();
 | 
			
		||||
    db.as_polymorph()
 | 
			
		||||
        .rev_prefix_iter::<_, ByteSlice, ByteSlice>(&txn, field_id_prefix)
 | 
			
		||||
        .unwrap()
 | 
			
		||||
    Ok(db
 | 
			
		||||
        .as_polymorph()
 | 
			
		||||
        .rev_prefix_iter::<_, ByteSlice, ByteSlice>(&txn, field_id_prefix)?
 | 
			
		||||
        .next()
 | 
			
		||||
        .map(|el| {
 | 
			
		||||
            let (key, _) = el.unwrap();
 | 
			
		||||
            let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(key).unwrap();
 | 
			
		||||
            key.level
 | 
			
		||||
        })
 | 
			
		||||
        .unwrap_or(0)
 | 
			
		||||
        .unwrap_or(0))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod test {
 | 
			
		||||
    use std::{fmt::Display, marker::PhantomData, rc::Rc};
 | 
			
		||||
 | 
			
		||||
    use heed::{BytesDecode, BytesEncode, Env};
 | 
			
		||||
    use tempfile::TempDir;
 | 
			
		||||
 | 
			
		||||
    use crate::{
 | 
			
		||||
        heed_codec::facet::new::{
 | 
			
		||||
            FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
 | 
			
		||||
        },
 | 
			
		||||
        snapshot_tests::display_bitmap,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    pub struct FacetIndex<BoundCodec>
 | 
			
		||||
    where
 | 
			
		||||
        for<'a> BoundCodec:
 | 
			
		||||
            BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
 | 
			
		||||
    {
 | 
			
		||||
        pub env: Env,
 | 
			
		||||
        pub db: Database,
 | 
			
		||||
        _phantom: PhantomData<BoundCodec>,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub struct Database {
 | 
			
		||||
        pub content: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
 | 
			
		||||
        pub group_size: usize,
 | 
			
		||||
        pub max_group_size: usize,
 | 
			
		||||
        _tempdir: Rc<tempfile::TempDir>,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    impl<BoundCodec> FacetIndex<BoundCodec>
 | 
			
		||||
    where
 | 
			
		||||
        for<'a> BoundCodec:
 | 
			
		||||
            BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
 | 
			
		||||
    {
 | 
			
		||||
        pub fn open_from_tempdir(
 | 
			
		||||
            tempdir: Rc<TempDir>,
 | 
			
		||||
            group_size: u8,
 | 
			
		||||
            max_group_size: u8,
 | 
			
		||||
        ) -> FacetIndex<BoundCodec> {
 | 
			
		||||
            let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)) as usize;
 | 
			
		||||
            let max_group_size = std::cmp::max(group_size * 2, max_group_size as usize);
 | 
			
		||||
            let mut options = heed::EnvOpenOptions::new();
 | 
			
		||||
            let options = options.map_size(4096 * 4 * 10 * 100);
 | 
			
		||||
            unsafe {
 | 
			
		||||
                options.flag(heed::flags::Flags::MdbAlwaysFreePages);
 | 
			
		||||
            }
 | 
			
		||||
            let env = options.open(tempdir.path()).unwrap();
 | 
			
		||||
            let content = env.open_database(None).unwrap().unwrap();
 | 
			
		||||
 | 
			
		||||
            FacetIndex {
 | 
			
		||||
                db: Database { content, group_size, max_group_size, _tempdir: tempdir },
 | 
			
		||||
                env,
 | 
			
		||||
                _phantom: PhantomData,
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        pub fn new(group_size: u8, max_group_size: u8) -> FacetIndex<BoundCodec> {
 | 
			
		||||
            let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)) as usize;
 | 
			
		||||
            let max_group_size = std::cmp::max(group_size * 2, max_group_size as usize);
 | 
			
		||||
            let mut options = heed::EnvOpenOptions::new();
 | 
			
		||||
            let options = options.map_size(4096 * 4 * 100);
 | 
			
		||||
            let tempdir = tempfile::TempDir::new_in("databases/").unwrap();
 | 
			
		||||
            let env = options.open(tempdir.path()).unwrap();
 | 
			
		||||
            let content = env.create_database(None).unwrap();
 | 
			
		||||
 | 
			
		||||
            FacetIndex {
 | 
			
		||||
                db: Database { content, group_size, max_group_size, _tempdir: Rc::new(tempdir) },
 | 
			
		||||
                env,
 | 
			
		||||
                _phantom: PhantomData,
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    impl<BoundCodec> Display for FacetIndex<BoundCodec>
 | 
			
		||||
    where
 | 
			
		||||
        for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized + Display,
 | 
			
		||||
        for<'a> BoundCodec:
 | 
			
		||||
            BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
 | 
			
		||||
    {
 | 
			
		||||
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
			
		||||
            let txn = self.env.read_txn().unwrap();
 | 
			
		||||
            let mut iter = self.db.content.iter(&txn).unwrap();
 | 
			
		||||
            while let Some(el) = iter.next() {
 | 
			
		||||
                let (key, value) = el.unwrap();
 | 
			
		||||
                let FacetKey { field_id, level, left_bound: bound } = key;
 | 
			
		||||
                let bound = BoundCodec::bytes_decode(bound).unwrap();
 | 
			
		||||
                let FacetGroupValue { size, bitmap } = value;
 | 
			
		||||
                writeln!(
 | 
			
		||||
                    f,
 | 
			
		||||
                    "{field_id:<2} {level:<2} k{bound:<8} {size:<4} {values:?}",
 | 
			
		||||
                    values = display_bitmap(&bitmap)
 | 
			
		||||
                )?;
 | 
			
		||||
            }
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user