mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 09:11:00 +00:00
269: Fix bug when inserting previously deleted documents r=Kerollmops a=Kerollmops This PR fixes #268. The issue was in the `ExternalDocumentsIds` implementation in the specific case that an external document id was in the soft map marked as deleted. The bug was due to a wrong assumption on my side about how the FST unions were returning the `IndexedValue`s, I thought the values returned in an array were in the same order as the FSTs given to the `OpBuilder` but in fact, [the `IndexedValue`'s `index` field was here to indicate from which FST the values were coming from](https://docs.rs/fst/0.4.7/fst/map/struct.IndexedValue.html). 271: Remove the roaring operation functions warnings r=Kerollmops a=Kerollmops In this PR we are just replacing the usages of the roaring operations function by the new operators. This removes a lot of warnings. Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
@ -12,7 +12,7 @@ impl AvailableDocumentsIds {
|
||||
match docids.max() {
|
||||
Some(last_id) => {
|
||||
let mut available = RoaringBitmap::from_iter(0..last_id);
|
||||
available.difference_with(&docids);
|
||||
available -= docids;
|
||||
|
||||
let iter = match last_id.checked_add(1) {
|
||||
Some(id) => id..=u32::max_value(),
|
||||
|
@ -43,7 +43,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
}
|
||||
|
||||
pub fn delete_documents(&mut self, docids: &RoaringBitmap) {
|
||||
self.documents_ids.union_with(docids);
|
||||
self.documents_ids |= docids;
|
||||
}
|
||||
|
||||
pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> {
|
||||
@ -65,7 +65,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
// We remove the documents ids that we want to delete
|
||||
// from the documents in the database and write them back.
|
||||
let current_documents_ids_len = documents_ids.len();
|
||||
documents_ids.difference_with(&self.documents_ids);
|
||||
documents_ids -= &self.documents_ids;
|
||||
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
||||
|
||||
// We can execute a ClearDocuments operation when the number of documents
|
||||
@ -194,7 +194,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
if let Some((key, mut docids)) = iter.next().transpose()? {
|
||||
if key == word.as_ref() {
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
@ -245,7 +245,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
let (prefix, mut docids) = result?;
|
||||
let prefix = prefix.to_owned();
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
@ -285,7 +285,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
while let Some(result) = iter.next() {
|
||||
let (key, mut docids) = result?;
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
@ -306,7 +306,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
@ -325,7 +325,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
@ -344,7 +344,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
@ -361,7 +361,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
let mut iter = field_id_word_count_docids.iter_mut(self.wtxn)?;
|
||||
while let Some((key, mut docids)) = iter.next().transpose()? {
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
@ -390,7 +390,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||
// Remove docids from the number faceted documents ids
|
||||
let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?;
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
||||
|
||||
remove_docids_from_field_id_docid_facet_value(
|
||||
@ -403,7 +403,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
|
||||
// Remove docids from the string faceted documents ids
|
||||
let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?;
|
||||
docids.difference_with(&self.documents_ids);
|
||||
docids -= &self.documents_ids;
|
||||
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
||||
|
||||
remove_docids_from_field_id_docid_facet_value(
|
||||
@ -456,7 +456,7 @@ where
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
let previous_len = docids.len();
|
||||
docids.difference_with(to_remove);
|
||||
docids -= to_remove;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
|
@ -181,7 +181,7 @@ fn compute_facet_number_levels<'t>(
|
||||
}
|
||||
|
||||
// The right bound is always the bound we run through.
|
||||
group_docids.union_with(&docids);
|
||||
group_docids |= docids;
|
||||
right = value;
|
||||
}
|
||||
|
||||
|
@ -61,8 +61,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>
|
||||
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
|
||||
|
||||
for value in tail {
|
||||
let bitmap = RoaringBitmap::deserialize_from(&value[..])?;
|
||||
head.union_with(&bitmap);
|
||||
head |= RoaringBitmap::deserialize_from(&value[..])?;
|
||||
}
|
||||
|
||||
let mut vec = Vec::with_capacity(head.serialized_size());
|
||||
@ -75,8 +74,7 @@ pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec
|
||||
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
||||
|
||||
for value in tail {
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?;
|
||||
head.union_with(&bitmap);
|
||||
head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
|
||||
}
|
||||
|
||||
let mut vec = Vec::new();
|
||||
|
@ -608,8 +608,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||
|
||||
// We merge the new documents ids with the existing ones.
|
||||
documents_ids.union_with(&new_documents_ids);
|
||||
documents_ids.union_with(&replaced_documents_ids);
|
||||
documents_ids |= new_documents_ids;
|
||||
documents_ids |= replaced_documents_ids;
|
||||
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
||||
|
||||
let mut database_count = 0;
|
||||
@ -845,6 +845,7 @@ mod tests {
|
||||
use heed::EnvOpenOptions;
|
||||
|
||||
use super::*;
|
||||
use crate::update::DeleteDocuments;
|
||||
|
||||
#[test]
|
||||
fn simple_document_replacement() {
|
||||
@ -1303,4 +1304,52 @@ mod tests {
|
||||
builder.execute(Cursor::new(documents), |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_documents_then_insert() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let content = &br#"[
|
||||
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
|
||||
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
|
||||
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
|
||||
{ "objectId": 30, "title": "Hamlet" }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
|
||||
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
|
||||
|
||||
// Delete not all of the documents but some of them.
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
|
||||
builder.delete_external_id("30");
|
||||
builder.execute().unwrap();
|
||||
|
||||
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||
assert!(external_documents_ids.get("30").is_none());
|
||||
|
||||
let content = &br#"[
|
||||
{ "objectId": 30, "title": "Hamlet" }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
|
||||
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||
assert!(external_documents_ids.get("30").is_some());
|
||||
|
||||
let content = &br#"[
|
||||
{ "objectId": 30, "title": "Hamlet" }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
}
|
||||
|
@ -236,7 +236,7 @@ fn compute_positions_levels(
|
||||
}
|
||||
|
||||
// The right bound is always the bound we run through.
|
||||
group_docids.union_with(&docids);
|
||||
group_docids |= docids;
|
||||
}
|
||||
|
||||
if !group_docids.is_empty() {
|
||||
|
Reference in New Issue
Block a user