Use the De Morgan law to simplify the NOT operation

This commit is contained in:
Clément Renault
2020-11-23 13:08:57 +01:00
parent 7370ef8c5e
commit fc686aaca7
9 changed files with 182 additions and 58 deletions

View File

@ -24,12 +24,18 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
// We retrieve the number of documents ids that we are deleting.
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
// We clean some of the main engine datastructures.
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
// We clean all the faceted documents ids.
for (field_id, _) in faceted_fields {
self.index.put_faceted_documents_ids(self.wtxn, field_id, &RoaringBitmap::default())?;
}
// Clear the other databases.
word_docids.clear(self.wtxn)?;
docid_word_positions.clear(self.wtxn)?;

View File

@ -184,6 +184,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
drop(iter);
// Remove the documents ids from the faceted documents ids.
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
for (field_id, _) in faceted_fields {
let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?;
docids.difference_with(&self.documents_ids);
self.index.put_faceted_documents_ids(self.wtxn, field_id, &docids)?;
}
// We delete the documents ids that are under the facet field id values.
let mut iter = facet_field_id_value_docids.iter_mut(self.wtxn)?;
while let Some(result) = iter.next() {

View File

@ -24,7 +24,7 @@ pub enum EasingName {
Linear,
}
pub struct FacetLevels<'t, 'u, 'i> {
pub struct Facets<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
pub(crate) chunk_compression_type: CompressionType,
@ -35,9 +35,9 @@ pub struct FacetLevels<'t, 'u, 'i> {
easing_function: EasingName,
}
impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> FacetLevels<'t, 'u, 'i> {
FacetLevels {
impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> Facets<'t, 'u, 'i> {
Facets {
wtxn,
index,
chunk_compression_type: CompressionType::None,
@ -70,7 +70,7 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
for (field_id, facet_type) in faceted_fields {
let content = match facet_type {
let (content, documents_ids) = match facet_type {
FacetType::Integer => {
clear_field_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn,
@ -78,7 +78,13 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
field_id,
)?;
compute_facet_levels::<i64, FacetLevelValueI64Codec>(
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let content = compute_facet_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
self.chunk_compression_type,
@ -88,7 +94,9 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
self.number_of_levels,
self.easing_function,
field_id,
)?
)?;
(Some(content), documents_ids)
},
FacetType::Float => {
clear_field_levels::<f64, FacetLevelValueF64Codec>(
@ -97,7 +105,13 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
field_id,
)?;
compute_facet_levels::<f64, FacetLevelValueF64Codec>(
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let content = compute_facet_levels::<f64, FacetLevelValueF64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
self.chunk_compression_type,
@ -107,18 +121,32 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
self.number_of_levels,
self.easing_function,
field_id,
)?
)?;
(Some(content), documents_ids)
},
FacetType::String => {
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
(None, documents_ids)
},
FacetType::String => continue,
};
write_into_lmdb_database(
self.wtxn,
*self.index.facet_field_id_value_docids.as_polymorph(),
content,
|_, _| anyhow::bail!("invalid facet level merging"),
WriteMethod::GetMergePut,
)?;
if let Some(content) = content {
write_into_lmdb_database(
self.wtxn,
*self.index.facet_field_id_value_docids.as_polymorph(),
content,
|_, _| anyhow::bail!("invalid facet level merging"),
WriteMethod::GetMergePut,
)?;
}
self.index.put_faceted_documents_ids(self.wtxn, field_id, &documents_ids)?;
}
Ok(())
@ -138,9 +166,7 @@ where
let left = (field_id, 1, T::min_value(), T::min_value());
let right = (field_id, u8::MAX, T::max_value(), T::max_value());
let range = left..=right;
db.remap_key_type::<KC>()
.delete_range(wtxn, &range)
.map(drop)
db.remap_key_type::<KC>().delete_range(wtxn, &range).map(drop)
}
fn compute_facet_levels<'t, T: 't, KC>(
@ -217,6 +243,20 @@ where
writer_into_reader(writer, shrink_size)
}
fn compute_faceted_documents_ids(
rtxn: &heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8,
) -> anyhow::Result<RoaringBitmap>
{
let mut documents_ids = RoaringBitmap::new();
for result in db.prefix_iter(rtxn, &[field_id])? {
let (_key, docids) = result?;
documents_ids.union_with(&docids);
}
Ok(documents_ids)
}
fn write_entry<T, KC>(
writer: &mut Writer<File>,
field_id: u8,

View File

@ -16,7 +16,7 @@ use rayon::prelude::*;
use rayon::ThreadPool;
use crate::index::Index;
use crate::update::{FacetLevels, UpdateIndexingStep};
use crate::update::{Facets, UpdateIndexingStep};
use self::store::{Store, Readers};
use self::merge_function::{
main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
@ -584,7 +584,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
});
}
let mut builder = FacetLevels::new(self.wtxn, self.index);
let mut builder = Facets::new(self.wtxn, self.index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;
builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;

View File

@ -1,7 +1,7 @@
mod available_documents_ids;
mod clear_documents;
mod delete_documents;
mod facet_levels;
mod facets;
mod index_documents;
mod settings;
mod update_builder;
@ -12,7 +12,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::DeleteDocuments;
pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
pub use self::facet_levels::{FacetLevels, EasingName};
pub use self::facets::{Facets, EasingName};
pub use self::settings::Settings;
pub use self::update_builder::UpdateBuilder;
pub use self::update_step::UpdateIndexingStep;

View File

@ -2,7 +2,7 @@ use grenad::CompressionType;
use rayon::ThreadPool;
use crate::Index;
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, FacetLevels};
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets};
pub struct UpdateBuilder<'a> {
pub(crate) log_every_n: Option<usize>,
@ -119,13 +119,13 @@ impl<'a> UpdateBuilder<'a> {
builder
}
pub fn facet_levels<'t, 'u, 'i>(
pub fn facets<'t, 'u, 'i>(
self,
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
) -> FacetLevels<'t, 'u, 'i>
) -> Facets<'t, 'u, 'i>
{
let mut builder = FacetLevels::new(wtxn, index);
let mut builder = Facets::new(wtxn, index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;