add documentation

This commit is contained in:
Marin Postma
2021-04-14 12:00:45 +02:00
parent 45c45e11dd
commit 2f73fa55ae
5 changed files with 41 additions and 16 deletions

View File

@@ -343,6 +343,20 @@ impl Index {
} }
} }
/* Distinct attribute */
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> {
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute)
}
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY)
}
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY)
}
/* criteria */ /* criteria */
pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> { pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
@@ -462,17 +476,6 @@ impl Index {
self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, UPDATED_AT_KEY, &time) self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, UPDATED_AT_KEY, &time)
} }
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> {
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute)
}
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY)
}
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY)
}
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -6,6 +6,12 @@ use crate::heed_codec::facet::*;
use crate::{facet::FacetType, DocumentId, FieldId, Index}; use crate::{facet::FacetType, DocumentId, FieldId, Index};
use super::{Distinct, DocIter}; use super::{Distinct, DocIter};
/// A distinct implementer that is backed by facets. On each iteration, the facet values for the
/// distinct attribute of the first document are retrieved. The document ids for these facet values
/// are then retrieved and taken out of the the candidate and added to the excluded set. We take
/// care to keep the document we are currently on, and remove it from the excluded list. The next
/// iterations will never contain any occurence of a document with the same distinct value as a
/// document from previous iterations.
pub struct FacetDistinct<'a> { pub struct FacetDistinct<'a> {
distinct: FieldId, distinct: FieldId,
index: &'a Index, index: &'a Index,
@@ -114,6 +120,9 @@ impl<'a> FacetDistinctIter<'a> {
Ok(()) Ok(())
} }
/// Performs the next iteration of the facet distinct. This is a convenience method that is
/// called by the Iterator::next implementation that tranposes the result. It makes error
/// handling easier.
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> { fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
// The first step is to remove all the excluded documents from our candidates // The first step is to remove all the excluded documents from our candidates
self.candidates.difference_with(&self.excluded); self.candidates.difference_with(&self.excluded);
@@ -127,8 +136,10 @@ impl<'a> FacetDistinctIter<'a> {
FacetType::Float => self.distinct_float(id)?, FacetType::Float => self.distinct_float(id)?,
}; };
// On every iteration, the first document is always a distinct one, since it // The first document of each iteration is kept, since the next call to
// hasn't been discarded by the previous difference. // `difference_with` will filter out all the documents for that facet value. By
// increasing the offset we make sure to get the first valid value for the next
// distinct document to keep.
self.iter_offset += 1; self.iter_offset += 1;
Ok(Some(id)) Ok(Some(id))
} }

View File

@@ -6,6 +6,9 @@ use serde_json::Value;
use super::{Distinct, DocIter}; use super::{Distinct, DocIter};
use crate::{DocumentId, FieldId, Index}; use crate::{DocumentId, FieldId, Index};
/// A distinct implementer that is backed by an `HashMap`. Each time a document is seen, the value
/// for its distinct field is added to the map. If the map already contains an entry for this
/// value, then the document is filtered out, and is added to the excluded set.
pub struct MapDistinct<'a> { pub struct MapDistinct<'a> {
distinct: FieldId, distinct: FieldId,
map: HashMap<String, usize>, map: HashMap<String, usize>,

View File

@@ -9,11 +9,17 @@ pub use map_distinct::MapDistinct;
pub use noop_distinct::NoopDistinct; pub use noop_distinct::NoopDistinct;
use crate::DocumentId; use crate::DocumentId;
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
/// It provides a way to get back the ownership to the excluded set.
pub trait DocIter: Iterator<Item=anyhow::Result<DocumentId>> { pub trait DocIter: Iterator<Item=anyhow::Result<DocumentId>> {
/// Returns ownership on the internal RoaringBitmaps: (candidates, excluded) /// Returns ownership on the internal exluded set.
fn into_excluded(self) -> RoaringBitmap; fn into_excluded(self) -> RoaringBitmap;
} }
/// A trait that is implemented by structs that perform a distinct on `candidates`. Calling distinct
/// must return an iterator containing only distinct documents, and add the discarded documents to
/// the excluded set. The excluded set can later be retrieved by calling `DocIter::excluded` on the
/// returned iterator.
pub trait Distinct<'a> { pub trait Distinct<'a> {
type Iter: DocIter; type Iter: DocIter;

View File

@@ -1,12 +1,14 @@
use roaring::RoaringBitmap; use roaring::{RoaringBitmap, bitmap::IntoIter};
use crate::DocumentId; use crate::DocumentId;
use super::{DocIter, Distinct}; use super::{DocIter, Distinct};
/// A distinct implementer that does not perform any distinct, and simply returns an iterator to
/// the candidates.
pub struct NoopDistinct; pub struct NoopDistinct;
pub struct NoopDistinctIter { pub struct NoopDistinctIter {
candidates: roaring::bitmap::IntoIter, candidates: IntoIter,
excluded: RoaringBitmap, excluded: RoaringBitmap,
} }