269: Fix bug when inserting previously deleted documents r=Kerollmops a=Kerollmops

This PR fixes #268.

The issue was in the `ExternalDocumentsIds` implementation in the specific case that an external document id was in the soft map marked as deleted.

The bug was due to a wrong assumption on my side about how the FST unions were returning the `IndexedValue`s, I thought the values returned in an array were in the same order as the FSTs given to the `OpBuilder` but in fact, [the `IndexedValue`'s `index` field was here to indicate from which FST the values were coming from](https://docs.rs/fst/0.4.7/fst/map/struct.IndexedValue.html).

271: Remove the roaring operation functions warnings r=Kerollmops a=Kerollmops

In this PR we are just replacing the usages of the roaring operations function by the new operators. This removes a lot of warnings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot]
2021-06-30 12:34:55 +00:00
committed by GitHub
16 changed files with 164 additions and 65 deletions

View File

@ -12,7 +12,7 @@ impl AvailableDocumentsIds {
match docids.max() {
Some(last_id) => {
let mut available = RoaringBitmap::from_iter(0..last_id);
available.difference_with(&docids);
available -= docids;
let iter = match last_id.checked_add(1) {
Some(id) => id..=u32::max_value(),

View File

@ -43,7 +43,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
}
pub fn delete_documents(&mut self, docids: &RoaringBitmap) {
self.documents_ids.union_with(docids);
self.documents_ids |= docids;
}
pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> {
@ -65,7 +65,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// We remove the documents ids that we want to delete
// from the documents in the database and write them back.
let current_documents_ids_len = documents_ids.len();
documents_ids.difference_with(&self.documents_ids);
documents_ids -= &self.documents_ids;
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
// We can execute a ClearDocuments operation when the number of documents
@ -194,7 +194,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
if let Some((key, mut docids)) = iter.next().transpose()? {
if key == word.as_ref() {
let previous_len = docids.len();
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
@ -245,7 +245,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let (prefix, mut docids) = result?;
let prefix = prefix.to_owned();
let previous_len = docids.len();
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
@ -285,7 +285,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() {
let (key, mut docids) = result?;
let previous_len = docids.len();
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
@ -306,7 +306,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;
let previous_len = docids.len();
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
@ -325,7 +325,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;
let previous_len = docids.len();
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
@ -344,7 +344,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;
let previous_len = docids.len();
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
@ -361,7 +361,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let mut iter = field_id_word_count_docids.iter_mut(self.wtxn)?;
while let Some((key, mut docids)) = iter.next().transpose()? {
let previous_len = docids.len();
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
@ -390,7 +390,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
// Remove docids from the number faceted documents ids
let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?;
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?;
remove_docids_from_field_id_docid_facet_value(
@ -403,7 +403,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// Remove docids from the string faceted documents ids
let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?;
docids.difference_with(&self.documents_ids);
docids -= &self.documents_ids;
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?;
remove_docids_from_field_id_docid_facet_value(
@ -456,7 +456,7 @@ where
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;
let previous_len = docids.len();
docids.difference_with(to_remove);
docids -= to_remove;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };

View File

@ -181,7 +181,7 @@ fn compute_facet_number_levels<'t>(
}
// The right bound is always the bound we run through.
group_docids.union_with(&docids);
group_docids |= docids;
right = value;
}

View File

@ -61,8 +61,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
for value in tail {
let bitmap = RoaringBitmap::deserialize_from(&value[..])?;
head.union_with(&bitmap);
head |= RoaringBitmap::deserialize_from(&value[..])?;
}
let mut vec = Vec::with_capacity(head.serialized_size());
@ -75,8 +74,7 @@ pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
for value in tail {
let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?;
head.union_with(&bitmap);
head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
}
let mut vec = Vec::new();

View File

@ -608,8 +608,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
// We merge the new documents ids with the existing ones.
documents_ids.union_with(&new_documents_ids);
documents_ids.union_with(&replaced_documents_ids);
documents_ids |= new_documents_ids;
documents_ids |= replaced_documents_ids;
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
let mut database_count = 0;
@ -845,6 +845,7 @@ mod tests {
use heed::EnvOpenOptions;
use super::*;
use crate::update::DeleteDocuments;
#[test]
fn simple_document_replacement() {
@ -1303,4 +1304,52 @@ mod tests {
builder.execute(Cursor::new(documents), |_, _| ()).unwrap();
wtxn.commit().unwrap();
}
#[test]
fn delete_documents_then_insert() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
let mut wtxn = index.write_txn().unwrap();
let content = &br#"[
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
{ "objectId": 30, "title": "Hamlet" }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
// Delete not all of the documents but some of them.
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
builder.delete_external_id("30");
builder.execute().unwrap();
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
assert!(external_documents_ids.get("30").is_none());
let content = &br#"[
{ "objectId": 30, "title": "Hamlet" }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
assert!(external_documents_ids.get("30").is_some());
let content = &br#"[
{ "objectId": 30, "title": "Hamlet" }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();
}
}

View File

@ -236,7 +236,7 @@ fn compute_positions_levels(
}
// The right bound is always the bound we run through.
group_docids.union_with(&docids);
group_docids |= docids;
}
if !group_docids.is_empty() {