Introduce the AvailableDocumentsIds iterator

This commit is contained in:
Clément Renault
2020-10-22 17:41:22 +02:00
parent 2a4cd81c86
commit 8d82e37ec0
2 changed files with 68 additions and 1 deletions

View File

@@ -0,0 +1,67 @@
use std::iter::{Chain, FromIterator};
use std::ops::RangeInclusive;
use roaring::bitmap::{RoaringBitmap, IntoIter};
pub struct AvailableDocumentsIds {
iter: Chain<IntoIter, RangeInclusive<u32>>,
}
impl AvailableDocumentsIds {
pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
match docids.max() {
Some(last_id) => {
let mut available = RoaringBitmap::from_iter(0..last_id);
available.difference_with(&docids);
let iter = match last_id.checked_add(1) {
Some(id) => id..=u32::max_value(),
None => 1..=0, // empty range iterator
};
AvailableDocumentsIds {
iter: available.into_iter().chain(iter),
}
},
None => {
let empty = RoaringBitmap::new().into_iter();
AvailableDocumentsIds {
iter: empty.chain(0..=u32::max_value()),
}
},
}
}
}
impl Iterator for AvailableDocumentsIds {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty() {
let base = RoaringBitmap::new();
let left = AvailableDocumentsIds::from_documents_ids(&base);
let right = 0..=u32::max_value();
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
}
#[test]
fn scattered() {
let mut base = RoaringBitmap::new();
base.insert(0);
base.insert(10);
base.insert(100);
base.insert(405);
let left = AvailableDocumentsIds::from_documents_ids(&base);
let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
}
}

View File

@@ -1,3 +1,4 @@
mod available_documents_ids;
mod criterion; mod criterion;
mod fields_ids_map; mod fields_ids_map;
mod index; mod index;
@@ -34,4 +35,3 @@ pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
pub type DocumentId = u32; pub type DocumentId = u32;
pub type Attribute = u32; pub type Attribute = u32;
pub type Position = u32; pub type Position = u32;