mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 21:46:27 +00:00 
			
		
		
		
	Merge #4433
4433: Enhance facet incremental r=Kerollmops a=ManyTheFish # Pull Request ## Related issue Fixes #4367 Fixes #4409 ## What does this PR do? - Add a test reproducing #4409 - Fix #4409 by removing a document from a level only if it is no more present in all the linked sub-level nodes - Optimize facet Incremental indexing by creating or deleting a complete level once per field id instead of for each facet value - Optimize facet Incremental indexing by doing the additions and the deletions in the same process instead of doing them separately Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -18,15 +18,39 @@ use crate::update::index_documents::valid_lmdb_key; | ||||
| use crate::update::MergeFn; | ||||
| use crate::{CboRoaringBitmapCodec, Index, Result}; | ||||
|  | ||||
| enum InsertionResult { | ||||
| /// Enum used as a return value for the facet incremental indexing. | ||||
| /// | ||||
| /// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have | ||||
| /// an effect on the number of keys in that level. Therefore, it did not increase the number of children | ||||
| /// of the parent node. | ||||
| /// | ||||
| /// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted | ||||
| /// in the addition of a new key in that level, and that therefore the number of children | ||||
| /// of the parent node should be incremented. | ||||
| /// | ||||
| /// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the | ||||
| /// number of keys in the level. For example, removing a document id from the facet value `3` could | ||||
| /// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted | ||||
| /// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must | ||||
| /// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well. | ||||
| /// | ||||
| /// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the | ||||
| /// bounds of the keys of the level. For example, removing a document id from the facet value | ||||
| /// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore, | ||||
| /// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). | ||||
| /// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust | ||||
| /// its left bound as well. | ||||
| /// | ||||
| /// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`. | ||||
| /// This case is reachable when a document id is removed from a sub-level node but is still present in another one. | ||||
| /// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`. | ||||
| enum ModificationResult { | ||||
|     InPlace, | ||||
|     Expand, | ||||
|     Insert, | ||||
| } | ||||
| enum DeletionResult { | ||||
|     InPlace, | ||||
|     Reduce { next: Option<Vec<u8>> }, | ||||
|     Remove { next: Option<Vec<u8>> }, | ||||
|     Nothing, | ||||
| } | ||||
|  | ||||
| /// Algorithm to incrementally insert and delete elememts into the | ||||
| @@ -65,8 +89,9 @@ impl FacetsUpdateIncremental { | ||||
|  | ||||
|     #[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")] | ||||
|     pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> { | ||||
|         let mut current_field_id = None; | ||||
|         let mut facet_level_may_be_updated = false; | ||||
|         let mut iter = self.delta_data.into_stream_merger_iter()?; | ||||
|  | ||||
|         while let Some((key, value)) = iter.next()? { | ||||
|             if !valid_lmdb_key(key) { | ||||
|                 continue; | ||||
| @@ -74,25 +99,47 @@ impl FacetsUpdateIncremental { | ||||
|  | ||||
|             let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key) | ||||
|                 .map_err(heed::Error::Encoding)?; | ||||
|  | ||||
|             if facet_level_may_be_updated | ||||
|                 && current_field_id.map_or(false, |fid| fid != key.field_id) | ||||
|             { | ||||
|                 // Only add or remove a level after making all the field modifications. | ||||
|                 self.inner.add_or_delete_level(wtxn, current_field_id.unwrap())?; | ||||
|                 facet_level_may_be_updated = false; | ||||
|             } | ||||
|             current_field_id = Some(key.field_id); | ||||
|  | ||||
|             let value = KvReader::new(value); | ||||
|             let docids_to_delete = value | ||||
|                 .get(DelAdd::Deletion) | ||||
|                 .map(CboRoaringBitmapCodec::bytes_decode) | ||||
|                 .map(|o| o.map_err(heed::Error::Encoding)); | ||||
|                 .map(|o| o.map_err(heed::Error::Encoding)) | ||||
|                 .transpose()?; | ||||
|  | ||||
|             let docids_to_add = value | ||||
|                 .get(DelAdd::Addition) | ||||
|                 .map(CboRoaringBitmapCodec::bytes_decode) | ||||
|                 .map(|o| o.map_err(heed::Error::Encoding)); | ||||
|                 .map(|o| o.map_err(heed::Error::Encoding)) | ||||
|                 .transpose()?; | ||||
|  | ||||
|             if let Some(docids_to_delete) = docids_to_delete { | ||||
|                 let docids_to_delete = docids_to_delete?; | ||||
|                 self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?; | ||||
|             let level_size_changed = self.inner.modify( | ||||
|                 wtxn, | ||||
|                 key.field_id, | ||||
|                 key.left_bound, | ||||
|                 docids_to_add.as_ref(), | ||||
|                 docids_to_delete.as_ref(), | ||||
|             )?; | ||||
|  | ||||
|             if level_size_changed { | ||||
|                 // if a node has been added or removed from the highest level, | ||||
|                 // we may have to update the facet level. | ||||
|                 facet_level_may_be_updated = true; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|             if let Some(docids_to_add) = docids_to_add { | ||||
|                 let docids_to_add = docids_to_add?; | ||||
|                 self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?; | ||||
|         if let Some(field_id) = current_field_id { | ||||
|             if facet_level_may_be_updated { | ||||
|                 self.inner.add_or_delete_level(wtxn, field_id)?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
| @@ -166,138 +213,78 @@ impl FacetsUpdateIncrementalInner { | ||||
|     /// | ||||
|     /// ## Return | ||||
|     /// See documentation of `insert_in_level` | ||||
|     fn insert_in_level_0( | ||||
|     fn modify_in_level_0( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> Result<InsertionResult> { | ||||
|         add_docids: Option<&RoaringBitmap>, | ||||
|         del_docids: Option<&RoaringBitmap>, | ||||
|     ) -> Result<ModificationResult> { | ||||
|         let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value }; | ||||
|         let value = FacetGroupValue { bitmap: docids.clone(), size: 1 }; | ||||
|  | ||||
|         let mut level0_prefix = vec![]; | ||||
|         level0_prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|         level0_prefix.push(0); | ||||
|  | ||||
|         let mut iter = | ||||
|             self.db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?; | ||||
|  | ||||
|         if iter.next().is_none() { | ||||
|             drop(iter); | ||||
|             self.db.put(txn, &key, &value)?; | ||||
|             Ok(InsertionResult::Insert) | ||||
|         } else { | ||||
|             drop(iter); | ||||
|             let old_value = self.db.get(txn, &key)?; | ||||
|             match old_value { | ||||
|                 Some(mut updated_value) => { | ||||
|                     // now merge the two | ||||
|                     updated_value.bitmap |= value.bitmap; | ||||
|                     self.db.put(txn, &key, &updated_value)?; | ||||
|                     Ok(InsertionResult::InPlace) | ||||
|                 } | ||||
|                 None => { | ||||
|         let old_value = self.db.get(txn, &key)?; | ||||
|         match (old_value, add_docids, del_docids) { | ||||
|             // Addition + deletion on an existing value | ||||
|             (Some(FacetGroupValue { bitmap, .. }), Some(add_docids), Some(del_docids)) => { | ||||
|                 let value = FacetGroupValue { bitmap: (bitmap - del_docids) | add_docids, size: 1 }; | ||||
|                 self.db.put(txn, &key, &value)?; | ||||
|                 Ok(ModificationResult::InPlace) | ||||
|             } | ||||
|             // Addition on an existing value | ||||
|             (Some(FacetGroupValue { bitmap, .. }), Some(add_docids), None) => { | ||||
|                 let value = FacetGroupValue { bitmap: bitmap | add_docids, size: 1 }; | ||||
|                 self.db.put(txn, &key, &value)?; | ||||
|                 Ok(ModificationResult::InPlace) | ||||
|             } | ||||
|             // Addition of a new value (ignore deletion) | ||||
|             (None, Some(add_docids), _) => { | ||||
|                 let value = FacetGroupValue { bitmap: add_docids.clone(), size: 1 }; | ||||
|                 self.db.put(txn, &key, &value)?; | ||||
|                 Ok(ModificationResult::Insert) | ||||
|             } | ||||
|             // Deletion on an existing value, fully delete the key if the resulted value is empty. | ||||
|             (Some(FacetGroupValue { mut bitmap, .. }), None, Some(del_docids)) => { | ||||
|                 bitmap -= del_docids; | ||||
|                 if bitmap.is_empty() { | ||||
|                     // Full deletion | ||||
|                     let mut next_key = None; | ||||
|                     if let Some((next, _)) = | ||||
|                         self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)? | ||||
|                     { | ||||
|                         if next.field_id == field_id && next.level == 0 { | ||||
|                             next_key = Some(next.left_bound.to_vec()); | ||||
|                         } | ||||
|                     } | ||||
|                     self.db.delete(txn, &key)?; | ||||
|                     Ok(ModificationResult::Remove { next: next_key }) | ||||
|                 } else { | ||||
|                     // Partial deletion | ||||
|                     let value = FacetGroupValue { bitmap, size: 1 }; | ||||
|                     self.db.put(txn, &key, &value)?; | ||||
|                     Ok(InsertionResult::Insert) | ||||
|                     Ok(ModificationResult::InPlace) | ||||
|                 } | ||||
|             } | ||||
|             // Otherwise do nothing (None + no addition + deletion == Some + no addition + no deletion == Nothing), | ||||
|             // may be unreachable at some point. | ||||
|             (None, None, _) | (Some(_), None, None) => Ok(ModificationResult::Nothing), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Insert the given facet value  and corresponding document ids in all the levels of the database up to the given `level`. | ||||
|     /// This function works recursively. | ||||
|     /// Split a level node into two balanced nodes. | ||||
|     /// | ||||
|     /// ## Return | ||||
|     /// Returns the effect of adding the facet value to the database on the given `level`. | ||||
|     /// | ||||
|     /// - `InsertionResult::InPlace` means that inserting the `facet_value` into the `level` did not have | ||||
|     /// an effect on the number of keys in that level. Therefore, it did not increase the number of children | ||||
|     /// of the parent node. | ||||
|     /// | ||||
|     /// - `InsertionResult::Insert` means that inserting the `facet_value` into the `level` resulted | ||||
|     /// in the addition of a new key in that level, and that therefore the number of children | ||||
|     /// of the parent node should be incremented. | ||||
|     fn insert_in_level( | ||||
|     /// # Return | ||||
|     /// Returns `ModificationResult::Insert` if the split is successful. | ||||
|     fn split_group( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         level: u8, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> Result<InsertionResult> { | ||||
|         if level == 0 { | ||||
|             return self.insert_in_level_0(txn, field_id, facet_value, docids); | ||||
|         } | ||||
|  | ||||
|         let max_group_size = self.max_group_size; | ||||
|  | ||||
|         let result = self.insert_in_level(txn, field_id, level - 1, facet_value, docids)?; | ||||
|         // level below inserted an element | ||||
|  | ||||
|         let (insertion_key, insertion_value) = | ||||
|             self.find_insertion_key_value(field_id, level, facet_value, txn)?; | ||||
|  | ||||
|         match result { | ||||
|             // because we know that we inserted in place, the facet_value is not a new one | ||||
|             // thus it doesn't extend a group, and thus the insertion key computed above is | ||||
|             // still correct | ||||
|             InsertionResult::InPlace => { | ||||
|                 let mut updated_value = insertion_value; | ||||
|                 updated_value.bitmap |= docids; | ||||
|                 self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; | ||||
|  | ||||
|                 return Ok(InsertionResult::InPlace); | ||||
|             } | ||||
|             InsertionResult::Expand => {} | ||||
|             InsertionResult::Insert => {} | ||||
|         } | ||||
|  | ||||
|         // Here we know that inserting the facet value in the level below resulted in the creation | ||||
|         // of a new key. Therefore, it may be the case that we need to modify the left bound of the | ||||
|         // insertion key (see documentation of `find_insertion_key_value` for an example of when that | ||||
|         // could happen). | ||||
|         let (insertion_key, insertion_key_was_modified) = { | ||||
|             let mut new_insertion_key = insertion_key.clone(); | ||||
|             let mut key_should_be_modified = false; | ||||
|  | ||||
|             if facet_value < insertion_key.left_bound.as_slice() { | ||||
|                 new_insertion_key.left_bound = facet_value.to_vec(); | ||||
|                 key_should_be_modified = true; | ||||
|             } | ||||
|             if key_should_be_modified { | ||||
|                 let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?; | ||||
|                 assert!(is_deleted); | ||||
|                 self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?; | ||||
|             } | ||||
|             (new_insertion_key, key_should_be_modified) | ||||
|         }; | ||||
|         // Now we know that the insertion key contains the `facet_value`. | ||||
|  | ||||
|         // We still need to update the insertion value by: | ||||
|         // 1. Incrementing the number of children (since the recursive call returned `InsertionResult::Insert`) | ||||
|         // 2. Merge the previous docids with the new one | ||||
|         let mut updated_value = insertion_value; | ||||
|  | ||||
|         if matches!(result, InsertionResult::Insert) { | ||||
|             updated_value.size += 1; | ||||
|         } | ||||
|  | ||||
|         if updated_value.size < max_group_size { | ||||
|             updated_value.bitmap |= docids; | ||||
|             self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; | ||||
|             if insertion_key_was_modified { | ||||
|                 return Ok(InsertionResult::Expand); | ||||
|             } else { | ||||
|                 return Ok(InsertionResult::InPlace); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // We've increased the group size of the value and realised it has become greater than or equal to `max_group_size` | ||||
|         // Therefore it must be split into two nodes. | ||||
|  | ||||
|         let size_left = updated_value.size / 2; | ||||
|         let size_right = updated_value.size - size_left; | ||||
|         insertion_key: FacetGroupKey<Vec<u8>>, | ||||
|         insertion_value: FacetGroupValue, | ||||
|     ) -> Result<ModificationResult> { | ||||
|         let size_left = insertion_value.size / 2; | ||||
|         let size_right = insertion_value.size - size_left; | ||||
|  | ||||
|         let level_below = level - 1; | ||||
|  | ||||
| @@ -351,34 +338,228 @@ impl FacetsUpdateIncrementalInner { | ||||
|         self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?; | ||||
|         self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?; | ||||
|  | ||||
|         Ok(InsertionResult::Insert) | ||||
|         Ok(ModificationResult::Insert) | ||||
|     } | ||||
|  | ||||
|     /// Insert the given facet value and corresponding document ids in the database. | ||||
|     pub fn insert( | ||||
|     /// Remove the docids still present in the related sub-level nodes from the del_docids. | ||||
|     /// | ||||
|     /// This process is needed to avoid removing docids from a group node where the docid is present in several sub-nodes. | ||||
|     fn trim_del_docids<'a>( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         level: u8, | ||||
|         insertion_key: &FacetGroupKey<Vec<u8>>, | ||||
|         insertion_value_size: usize, | ||||
|         del_docids: &'a RoaringBitmap, | ||||
|     ) -> Result<std::borrow::Cow<'a, RoaringBitmap>> { | ||||
|         let level_below = level - 1; | ||||
|  | ||||
|         let start_key = FacetGroupKey { | ||||
|             field_id, | ||||
|             level: level_below, | ||||
|             left_bound: insertion_key.left_bound.as_slice(), | ||||
|         }; | ||||
|  | ||||
|         let mut del_docids = std::borrow::Cow::Borrowed(del_docids); | ||||
|         let iter = self.db.range(txn, &(start_key..))?.take(insertion_value_size); | ||||
|         for next in iter { | ||||
|             let (_, value) = next?; | ||||
|             // if a sublevel bitmap as common docids with del_docids, | ||||
|             // then these docids shouldn't be removed and so, remove them from the deletion list. | ||||
|             if !value.bitmap.is_disjoint(&del_docids) { | ||||
|                 *del_docids.to_mut() -= value.bitmap; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(del_docids) | ||||
|     } | ||||
|  | ||||
|     /// Modify the given facet value and corresponding document ids in all the levels of the database up to the given `level`. | ||||
|     /// This function works recursively. | ||||
|     /// | ||||
|     /// ## Return | ||||
|     /// Returns the effect of modifying the facet value to the database on the given `level`. | ||||
|     /// | ||||
|     fn modify_in_level( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         level: u8, | ||||
|         facet_value: &[u8], | ||||
|         add_docids: Option<&RoaringBitmap>, | ||||
|         del_docids: Option<&RoaringBitmap>, | ||||
|     ) -> Result<ModificationResult> { | ||||
|         if level == 0 { | ||||
|             return self.modify_in_level_0(txn, field_id, facet_value, add_docids, del_docids); | ||||
|         } | ||||
|  | ||||
|         let result = | ||||
|             self.modify_in_level(txn, field_id, level - 1, facet_value, add_docids, del_docids)?; | ||||
|         // level below inserted an element | ||||
|  | ||||
|         if let ModificationResult::Nothing = result { | ||||
|             // if the previous level has not been modified, | ||||
|             // early return ModificationResult::Nothing. | ||||
|             return Ok(ModificationResult::Nothing); | ||||
|         } | ||||
|  | ||||
|         let (insertion_key, insertion_value) = | ||||
|             self.find_insertion_key_value(field_id, level, facet_value, txn)?; | ||||
|         let insertion_value_size = insertion_value.size as usize; | ||||
|  | ||||
|         let mut insertion_value_was_modified = false; | ||||
|         let mut updated_value = insertion_value; | ||||
|  | ||||
|         if let ModificationResult::Insert = result { | ||||
|             // if a key has been inserted in the sub-level raise the value size. | ||||
|             updated_value.size += 1; | ||||
|             insertion_value_was_modified = true; | ||||
|         } else if let ModificationResult::Remove { .. } = result { | ||||
|             if updated_value.size <= 1 { | ||||
|                 // if the only remaining node is the one to delete, | ||||
|                 // delete the key instead and early return. | ||||
|                 let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?; | ||||
|                 assert!(is_deleted); | ||||
|                 return Ok(result); | ||||
|             } else { | ||||
|                 // Reduce the value size | ||||
|                 updated_value.size -= 1; | ||||
|                 insertion_value_was_modified = true; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let (insertion_key, insertion_key_modification) = | ||||
|             if let ModificationResult::InPlace = result { | ||||
|                 (insertion_key, ModificationResult::InPlace) | ||||
|             } else { | ||||
|                 // Inserting or deleting the facet value in the level below resulted in the creation | ||||
|                 // of a new key. Therefore, it may be the case that we need to modify the left bound of the | ||||
|                 // insertion key (see documentation of `find_insertion_key_value` for an example of when that | ||||
|                 // could happen). | ||||
|                 let mut new_insertion_key = insertion_key.clone(); | ||||
|                 let mut key_modification = ModificationResult::InPlace; | ||||
|  | ||||
|                 if let ModificationResult::Remove { next } | ModificationResult::Reduce { next } = | ||||
|                     result | ||||
|                 { | ||||
|                     // if the deleted facet_value is the left_bound of the current node, | ||||
|                     // the left_bound should be updated reducing the current node. | ||||
|                     let reduced_range = facet_value == insertion_key.left_bound; | ||||
|                     if reduced_range { | ||||
|                         new_insertion_key.left_bound = next.clone().unwrap(); | ||||
|                         key_modification = ModificationResult::Reduce { next }; | ||||
|                     } | ||||
|                 } else if facet_value < insertion_key.left_bound.as_slice() { | ||||
|                     // if the added facet_value is the under the left_bound of the current node, | ||||
|                     // the left_bound should be updated expanding the current node. | ||||
|                     new_insertion_key.left_bound = facet_value.to_vec(); | ||||
|                     key_modification = ModificationResult::Expand; | ||||
|                 } | ||||
|  | ||||
|                 if matches!( | ||||
|                     key_modification, | ||||
|                     ModificationResult::Expand | ModificationResult::Reduce { .. } | ||||
|                 ) { | ||||
|                     // if the node should be updated, delete it, it will be recreated using a new key later. | ||||
|                     let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?; | ||||
|                     assert!(is_deleted); | ||||
|                 } | ||||
|                 (new_insertion_key, key_modification) | ||||
|             }; | ||||
|  | ||||
|         if updated_value.size < self.max_group_size { | ||||
|             // If there are docids to delete, trim them avoiding unexpected removal. | ||||
|             if let Some(del_docids) = del_docids | ||||
|                 .map(|ids| { | ||||
|                     self.trim_del_docids( | ||||
|                         txn, | ||||
|                         field_id, | ||||
|                         level, | ||||
|                         &insertion_key, | ||||
|                         insertion_value_size, | ||||
|                         ids, | ||||
|                     ) | ||||
|                 }) | ||||
|                 .transpose()? | ||||
|                 .filter(|ids| !ids.is_empty()) | ||||
|             { | ||||
|                 updated_value.bitmap -= &*del_docids; | ||||
|                 insertion_value_was_modified = true; | ||||
|             } | ||||
|  | ||||
|             if let Some(add_docids) = add_docids { | ||||
|                 updated_value.bitmap |= add_docids; | ||||
|                 insertion_value_was_modified = true; | ||||
|             } | ||||
|  | ||||
|             if insertion_value_was_modified | ||||
|                 || matches!( | ||||
|                     insertion_key_modification, | ||||
|                     ModificationResult::Expand | ModificationResult::Reduce { .. } | ||||
|                 ) | ||||
|             { | ||||
|                 // if any modification occured, insert it in the database. | ||||
|                 self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; | ||||
|                 Ok(insertion_key_modification) | ||||
|             } else { | ||||
|                 // this case is reachable when a docid is removed from a sub-level node but is still present in another one. | ||||
|                 // For instance, a document containing 2 and 3, if 2 is removed, the docid should remain in the group node [1..4]. | ||||
|                 Ok(ModificationResult::Nothing) | ||||
|             } | ||||
|         } else { | ||||
|             // We've increased the group size of the value and realised it has become greater than or equal to `max_group_size` | ||||
|             // Therefore it must be split into two nodes. | ||||
|             self.split_group(txn, field_id, level, insertion_key, updated_value) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Modify the given facet value and corresponding document ids in the database. | ||||
|     /// If no more document ids correspond to the facet value, delete it completely. | ||||
|     /// | ||||
|     /// ## Return | ||||
|     /// Returns `true` if some tree-nodes of the highest level have been removed or added implying a potential | ||||
|     /// addition or deletion of a facet level. | ||||
|     /// Otherwise returns `false` if the tree-nodes have been modified in place. | ||||
|     pub fn modify( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> Result<()> { | ||||
|         if docids.is_empty() { | ||||
|             return Ok(()); | ||||
|         add_docids: Option<&RoaringBitmap>, | ||||
|         del_docids: Option<&RoaringBitmap>, | ||||
|     ) -> Result<bool> { | ||||
|         if add_docids.map_or(true, RoaringBitmap::is_empty) | ||||
|             && del_docids.map_or(true, RoaringBitmap::is_empty) | ||||
|         { | ||||
|             return Ok(false); | ||||
|         } | ||||
|         let group_size = self.group_size; | ||||
|  | ||||
|         let highest_level = get_highest_level(txn, self.db, field_id)?; | ||||
|  | ||||
|         let result = self.insert_in_level(txn, field_id, highest_level, facet_value, docids)?; | ||||
|         let result = self.modify_in_level( | ||||
|             txn, | ||||
|             field_id, | ||||
|             highest_level, | ||||
|             facet_value, | ||||
|             add_docids, | ||||
|             del_docids, | ||||
|         )?; | ||||
|         match result { | ||||
|             InsertionResult::InPlace => return Ok(()), | ||||
|             InsertionResult::Expand => return Ok(()), | ||||
|             InsertionResult::Insert => {} | ||||
|             ModificationResult::InPlace | ||||
|             | ModificationResult::Expand | ||||
|             | ModificationResult::Nothing | ||||
|             | ModificationResult::Reduce { .. } => Ok(false), | ||||
|             ModificationResult::Insert | ModificationResult::Remove { .. } => Ok(true), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|         // Here we check whether the highest level has exceeded `min_level_size` * `self.group_size`. | ||||
|         // If it has, we must build an addition level above it. | ||||
|  | ||||
|     /// Check whether the highest level has exceeded `min_level_size` * `self.group_size`. | ||||
|     /// If it has, we must build an addition level above it. | ||||
|     /// Then check whether the highest level is under `min_level_size`. | ||||
|     /// If it has, we must remove the complete level. | ||||
|     pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn, field_id: u16) -> Result<()> { | ||||
|         let highest_level = get_highest_level(txn, self.db, field_id)?; | ||||
|         let mut highest_level_prefix = vec![]; | ||||
|         highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|         highest_level_prefix.push(highest_level); | ||||
| @@ -386,14 +567,48 @@ impl FacetsUpdateIncrementalInner { | ||||
|         let size_highest_level = | ||||
|             self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count(); | ||||
|  | ||||
|         if size_highest_level < self.group_size as usize * self.min_level_size as usize { | ||||
|             return Ok(()); | ||||
|         if size_highest_level >= self.group_size as usize * self.min_level_size as usize { | ||||
|             self.add_level(txn, field_id, highest_level, &highest_level_prefix, size_highest_level) | ||||
|         } else if size_highest_level < self.min_level_size as usize && highest_level != 0 { | ||||
|             self.delete_level(txn, &highest_level_prefix) | ||||
|         } else { | ||||
|             Ok(()) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Delete a level. | ||||
|     fn delete_level(&self, txn: &mut RwTxn, highest_level_prefix: &[u8]) -> Result<()> { | ||||
|         let mut to_delete = vec![]; | ||||
|         let mut iter = | ||||
|             self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, highest_level_prefix)?; | ||||
|         for el in iter.by_ref() { | ||||
|             let (k, _) = el?; | ||||
|             to_delete.push( | ||||
|                 FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k) | ||||
|                     .map_err(Error::Encoding)? | ||||
|                     .into_owned(), | ||||
|             ); | ||||
|         } | ||||
|         drop(iter); | ||||
|         for k in to_delete { | ||||
|             self.db.delete(txn, &k.as_ref())?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Build an additional level for the field id. | ||||
|     fn add_level( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         highest_level: u8, | ||||
|         highest_level_prefix: &[u8], | ||||
|         size_highest_level: usize, | ||||
|     ) -> Result<()> { | ||||
|         let mut groups_iter = self | ||||
|             .db | ||||
|             .remap_types::<Bytes, FacetGroupValueCodec>() | ||||
|             .prefix_iter(txn, &highest_level_prefix)?; | ||||
|             .prefix_iter(txn, highest_level_prefix)?; | ||||
|  | ||||
|         let nbr_new_groups = size_highest_level / self.group_size as usize; | ||||
|         let nbr_leftover_elements = size_highest_level % self.group_size as usize; | ||||
| @@ -402,7 +617,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|         for _ in 0..nbr_new_groups { | ||||
|             let mut first_key = None; | ||||
|             let mut values = RoaringBitmap::new(); | ||||
|             for _ in 0..group_size { | ||||
|             for _ in 0..self.group_size { | ||||
|                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; | ||||
|                 let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes) | ||||
|                     .map_err(Error::Encoding)?; | ||||
| @@ -417,7 +632,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|                 level: highest_level + 1, | ||||
|                 left_bound: first_key.unwrap().left_bound, | ||||
|             }; | ||||
|             let value = FacetGroupValue { size: group_size, bitmap: values }; | ||||
|             let value = FacetGroupValue { size: self.group_size, bitmap: values }; | ||||
|             to_add.push((key.into_owned(), value)); | ||||
|         } | ||||
|         // now we add the rest of the level, in case its size is > group_size * min_level_size | ||||
| @@ -452,173 +667,6 @@ impl FacetsUpdateIncrementalInner { | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Delete the given document id from the given facet value in the database, from level 0 to the | ||||
|     /// the given level. | ||||
|     /// | ||||
|     /// ## Return | ||||
|     /// Returns the effect of removing the document id from the database on the given `level`. | ||||
|     /// | ||||
|     /// - `DeletionResult::InPlace` means that deleting the document id did not have | ||||
|     /// an effect on the keys in that level. | ||||
|     /// | ||||
|     /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the | ||||
|     /// number of keys in the level. For example, removing a document id from the facet value `3` could | ||||
|     /// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted | ||||
|     /// entirely. In that case, `DeletionResult::Remove` is returned. The parent of the deleted key must | ||||
|     /// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well. | ||||
|     /// | ||||
|     /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the | ||||
|     /// bounds of the keys of the level. For example, removing a document id from the facet value | ||||
|     /// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore, | ||||
|     /// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). | ||||
|     /// In that case `DeletionResult::Reduce` is returned. The parent of the reduced key may need to adjust | ||||
|     /// its left bound as well. | ||||
|     fn delete_in_level( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         level: u8, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> Result<DeletionResult> { | ||||
|         if level == 0 { | ||||
|             return self.delete_in_level_0(txn, field_id, facet_value, docids); | ||||
|         } | ||||
|         let (deletion_key, mut bitmap) = | ||||
|             self.find_insertion_key_value(field_id, level, facet_value, txn)?; | ||||
|  | ||||
|         let result = self.delete_in_level(txn, field_id, level - 1, facet_value, docids)?; | ||||
|  | ||||
|         let mut decrease_size = false; | ||||
|         let next_key = match result { | ||||
|             DeletionResult::InPlace => { | ||||
|                 bitmap.bitmap -= docids; | ||||
|                 self.db.put(txn, &deletion_key.as_ref(), &bitmap)?; | ||||
|                 return Ok(DeletionResult::InPlace); | ||||
|             } | ||||
|             DeletionResult::Reduce { next } => next, | ||||
|             DeletionResult::Remove { next } => { | ||||
|                 decrease_size = true; | ||||
|                 next | ||||
|             } | ||||
|         }; | ||||
|         // If either DeletionResult::Reduce or DeletionResult::Remove was returned, | ||||
|         // then we may need to adjust the left_bound of the deletion key. | ||||
|  | ||||
|         // If DeletionResult::Remove was returned, then we need to decrease the group | ||||
|         // size of the deletion key. | ||||
|         let mut updated_value = bitmap; | ||||
|         if decrease_size { | ||||
|             updated_value.size -= 1; | ||||
|         } | ||||
|  | ||||
|         if updated_value.size == 0 { | ||||
|             self.db.delete(txn, &deletion_key.as_ref())?; | ||||
|             Ok(DeletionResult::Remove { next: next_key }) | ||||
|         } else { | ||||
|             let mut updated_deletion_key = deletion_key.clone(); | ||||
|             let reduced_range = facet_value == deletion_key.left_bound; | ||||
|             if reduced_range { | ||||
|                 updated_deletion_key.left_bound = next_key.clone().unwrap(); | ||||
|             } | ||||
|             updated_value.bitmap -= docids; | ||||
|             let _ = self.db.delete(txn, &deletion_key.as_ref())?; | ||||
|             self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?; | ||||
|             if reduced_range { | ||||
|                 Ok(DeletionResult::Reduce { next: next_key }) | ||||
|             } else { | ||||
|                 Ok(DeletionResult::InPlace) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn delete_in_level_0( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> Result<DeletionResult> { | ||||
|         let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value }; | ||||
|         let mut bitmap = self.db.get(txn, &key)?.unwrap().bitmap; | ||||
|         bitmap -= docids; | ||||
|  | ||||
|         if bitmap.is_empty() { | ||||
|             let mut next_key = None; | ||||
|             if let Some((next, _)) = | ||||
|                 self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)? | ||||
|             { | ||||
|                 if next.field_id == field_id && next.level == 0 { | ||||
|                     next_key = Some(next.left_bound.to_vec()); | ||||
|                 } | ||||
|             } | ||||
|             self.db.delete(txn, &key)?; | ||||
|             Ok(DeletionResult::Remove { next: next_key }) | ||||
|         } else { | ||||
|             self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?; | ||||
|             Ok(DeletionResult::InPlace) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn delete( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> Result<()> { | ||||
|         if self | ||||
|             .db | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })? | ||||
|             .is_none() | ||||
|         { | ||||
|             return Ok(()); | ||||
|         } | ||||
|         let highest_level = get_highest_level(txn, self.db, field_id)?; | ||||
|  | ||||
|         let result = self.delete_in_level(txn, field_id, highest_level, facet_value, docids)?; | ||||
|         match result { | ||||
|             DeletionResult::InPlace => return Ok(()), | ||||
|             DeletionResult::Reduce { .. } => return Ok(()), | ||||
|             DeletionResult::Remove { .. } => {} | ||||
|         } | ||||
|  | ||||
|         // if we either removed a key from the highest level, its size may have fallen | ||||
|         // below `min_level_size`, in which case we need to remove the entire level | ||||
|  | ||||
|         let mut highest_level_prefix = vec![]; | ||||
|         highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|         highest_level_prefix.push(highest_level); | ||||
|  | ||||
|         if highest_level == 0 | ||||
|             || self | ||||
|                 .db | ||||
|                 .remap_types::<Bytes, Bytes>() | ||||
|                 .prefix_iter(txn, &highest_level_prefix)? | ||||
|                 .count() | ||||
|                 >= self.min_level_size as usize | ||||
|         { | ||||
|             return Ok(()); | ||||
|         } | ||||
|         let mut to_delete = vec![]; | ||||
|         let mut iter = | ||||
|             self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?; | ||||
|         for el in iter.by_ref() { | ||||
|             let (k, _) = el?; | ||||
|             to_delete.push( | ||||
|                 FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k) | ||||
|                     .map_err(Error::Encoding)? | ||||
|                     .into_owned(), | ||||
|             ); | ||||
|         } | ||||
|         drop(iter); | ||||
|         for k in to_delete { | ||||
|             self.db.delete(txn, &k.as_ref())?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> FacetGroupKey<&'a [u8]> { | ||||
|   | ||||
| @@ -429,7 +429,8 @@ pub(crate) mod test_helpers { | ||||
|                 max_group_size: self.max_group_size.get(), | ||||
|             }; | ||||
|             let key_bytes = BoundCodec::bytes_encode(key).unwrap(); | ||||
|             update.insert(wtxn, field_id, &key_bytes, docids).unwrap(); | ||||
|             update.modify(wtxn, field_id, &key_bytes, Some(docids), None).unwrap(); | ||||
|             update.add_or_delete_level(wtxn, field_id).unwrap(); | ||||
|         } | ||||
|         pub fn delete_single_docid<'a>( | ||||
|             &self, | ||||
| @@ -455,7 +456,8 @@ pub(crate) mod test_helpers { | ||||
|                 max_group_size: self.max_group_size.get(), | ||||
|             }; | ||||
|             let key_bytes = BoundCodec::bytes_encode(key).unwrap(); | ||||
|             update.delete(wtxn, field_id, &key_bytes, docids).unwrap(); | ||||
|             update.modify(wtxn, field_id, &key_bytes, None, Some(docids)).unwrap(); | ||||
|             update.add_or_delete_level(wtxn, field_id).unwrap(); | ||||
|         } | ||||
|  | ||||
|         pub fn bulk_insert<'a, 'b>( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user