mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 15:36:28 +00:00 
			
		
		
		
	Remove Index::faceted_documents_ids
This commit is contained in:
		| @@ -55,7 +55,6 @@ pub mod main_key { | ||||
|     /// e.g. vector-hnsw0x0032. | ||||
|     pub const VECTOR_HNSW_KEY_PREFIX: &str = "vector-hnsw"; | ||||
|     pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; | ||||
|     pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids"; | ||||
|     pub const PRIMARY_KEY_KEY: &str = "primary-key"; | ||||
|     pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields"; | ||||
|     pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields"; | ||||
| @@ -64,7 +63,6 @@ pub mod main_key { | ||||
|     pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens"; | ||||
|     pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens"; | ||||
|     pub const DICTIONARY_KEY: &str = "dictionary"; | ||||
|     pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids"; | ||||
|     pub const SYNONYMS_KEY: &str = "synonyms"; | ||||
|     pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms"; | ||||
|     pub const WORDS_FST_KEY: &str = "words-fst"; | ||||
| @@ -926,44 +924,6 @@ impl Index { | ||||
|  | ||||
|     /* faceted documents ids */ | ||||
|  | ||||
|     /// Writes the documents ids that are faceted under this field id for the given facet type. | ||||
|     pub fn put_faceted_documents_ids( | ||||
|         &self, | ||||
|         wtxn: &mut RwTxn, | ||||
|         field_id: FieldId, | ||||
|         facet_type: FacetType, | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> heed::Result<()> { | ||||
|         let key = match facet_type { | ||||
|             FacetType::String => main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX, | ||||
|             FacetType::Number => main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX, | ||||
|         }; | ||||
|         let mut buffer = vec![0u8; key.len() + size_of::<FieldId>()]; | ||||
|         buffer[..key.len()].copy_from_slice(key.as_bytes()); | ||||
|         buffer[key.len()..].copy_from_slice(&field_id.to_be_bytes()); | ||||
|         self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids) | ||||
|     } | ||||
|  | ||||
|     /// Retrieve all the documents ids that are faceted under this field id for the given facet type. | ||||
|     pub fn faceted_documents_ids( | ||||
|         &self, | ||||
|         rtxn: &RoTxn, | ||||
|         field_id: FieldId, | ||||
|         facet_type: FacetType, | ||||
|     ) -> heed::Result<RoaringBitmap> { | ||||
|         let key = match facet_type { | ||||
|             FacetType::String => main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX, | ||||
|             FacetType::Number => main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX, | ||||
|         }; | ||||
|         let mut buffer = vec![0u8; key.len() + size_of::<FieldId>()]; | ||||
|         buffer[..key.len()].copy_from_slice(key.as_bytes()); | ||||
|         buffer[key.len()..].copy_from_slice(&field_id.to_be_bytes()); | ||||
|         match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? { | ||||
|             Some(docids) => Ok(docids), | ||||
|             None => Ok(RoaringBitmap::new()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Retrieve all the documents which contain this field id set as null | ||||
|     pub fn null_faceted_documents_ids( | ||||
|         &self, | ||||
|   | ||||
| @@ -359,31 +359,7 @@ pub fn snap_external_documents_ids(index: &Index) -> String { | ||||
|  | ||||
|     snap | ||||
| } | ||||
| pub fn snap_number_faceted_documents_ids(index: &Index) -> String { | ||||
|     let rtxn = index.read_txn().unwrap(); | ||||
|     let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|     let mut snap = String::new(); | ||||
|     for field_id in fields_ids_map.ids() { | ||||
|         let number_faceted_documents_ids = | ||||
|             index.faceted_documents_ids(&rtxn, field_id, FacetType::Number).unwrap(); | ||||
|         writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&number_faceted_documents_ids)) | ||||
|             .unwrap(); | ||||
|     } | ||||
|     snap | ||||
| } | ||||
| pub fn snap_string_faceted_documents_ids(index: &Index) -> String { | ||||
|     let rtxn = index.read_txn().unwrap(); | ||||
|     let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|  | ||||
|     let mut snap = String::new(); | ||||
|     for field_id in fields_ids_map.ids() { | ||||
|         let string_faceted_documents_ids = | ||||
|             index.faceted_documents_ids(&rtxn, field_id, FacetType::String).unwrap(); | ||||
|         writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&string_faceted_documents_ids)) | ||||
|             .unwrap(); | ||||
|     } | ||||
|     snap | ||||
| } | ||||
| pub fn snap_words_fst(index: &Index) -> String { | ||||
|     let rtxn = index.read_txn().unwrap(); | ||||
|     let words_fst = index.words_fst(&rtxn).unwrap(); | ||||
| @@ -531,12 +507,6 @@ macro_rules! full_snap_of_db { | ||||
|     ($index:ident, external_documents_ids) => {{ | ||||
|         $crate::snapshot_tests::snap_external_documents_ids(&$index) | ||||
|     }}; | ||||
|     ($index:ident, number_faceted_documents_ids) => {{ | ||||
|         $crate::snapshot_tests::snap_number_faceted_documents_ids(&$index) | ||||
|     }}; | ||||
|     ($index:ident, string_faceted_documents_ids) => {{ | ||||
|         $crate::snapshot_tests::snap_string_faceted_documents_ids(&$index) | ||||
|     }}; | ||||
|     ($index:ident, words_fst) => {{ | ||||
|         $crate::snapshot_tests::snap_words_fst(&$index) | ||||
|     }}; | ||||
|   | ||||
| @@ -64,22 +64,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | ||||
|         self.index.delete_geo_faceted_documents_ids(self.wtxn)?; | ||||
|         self.index.delete_vector_hnsw(self.wtxn)?; | ||||
|  | ||||
|         // We clean all the faceted documents ids. | ||||
|         for field_id in faceted_fields { | ||||
|             self.index.put_faceted_documents_ids( | ||||
|                 self.wtxn, | ||||
|                 field_id, | ||||
|                 FacetType::Number, | ||||
|                 &empty_roaring, | ||||
|             )?; | ||||
|             self.index.put_faceted_documents_ids( | ||||
|                 self.wtxn, | ||||
|                 field_id, | ||||
|                 FacetType::String, | ||||
|                 &empty_roaring, | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         // Clear the other databases. | ||||
|         word_docids.clear(self.wtxn)?; | ||||
|         exact_word_docids.clear(self.wtxn)?; | ||||
|   | ||||
| @@ -384,12 +384,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | ||||
|         for facet_type in [FacetType::Number, FacetType::String] { | ||||
|             let mut affected_facet_values = HashMap::new(); | ||||
|             for field_id in self.index.faceted_fields_ids(self.wtxn)? { | ||||
|                 // Remove docids from the number faceted documents ids | ||||
|                 let mut docids = | ||||
|                     self.index.faceted_documents_ids(self.wtxn, field_id, facet_type)?; | ||||
|                 docids -= &self.to_delete_docids; | ||||
|                 self.index.put_faceted_documents_ids(self.wtxn, field_id, facet_type, &docids)?; | ||||
|  | ||||
|                 let facet_values = remove_docids_from_field_id_docid_facet_value( | ||||
|                     self.index, | ||||
|                     self.wtxn, | ||||
|   | ||||
| @@ -23,9 +23,6 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | ||||
| /// | ||||
| /// First, the new elements are inserted into the level 0 of the database. Then, the | ||||
| /// higher levels are cleared and recomputed from the content of level 0. | ||||
| /// | ||||
| /// Finally, the `faceted_documents_ids` value in the main database of `Index` | ||||
| /// is updated to contain the new set of faceted documents. | ||||
| pub struct FacetsUpdateBulk<'i> { | ||||
|     index: &'i Index, | ||||
|     group_size: u8, | ||||
| @@ -86,7 +83,7 @@ impl<'i> FacetsUpdateBulk<'i> { | ||||
|         let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size }; | ||||
|  | ||||
|         inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| { | ||||
|             index.put_faceted_documents_ids(wtxn, field_id, facet_type, &all_docids)?; | ||||
|             // TODO: remove the lambda altogether | ||||
|             Ok(()) | ||||
|         })?; | ||||
|  | ||||
| @@ -507,7 +504,6 @@ mod tests { | ||||
|         index.add_documents(documents).unwrap(); | ||||
|  | ||||
|         db_snap!(index, facet_id_f64_docids, "initial", @"c34f499261f3510d862fa0283bbe843a"); | ||||
|         db_snap!(index, number_faceted_documents_ids, "initial", @"01594fecbb316798ce3651d6730a4521"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|   | ||||
| @@ -160,7 +160,6 @@ mod tests { | ||||
|         index.add_documents(documents).unwrap(); | ||||
|  | ||||
|         db_snap!(index, facet_id_f64_docids, 1, @"550cd138d6fe31ccdd42cd5392fbd576"); | ||||
|         db_snap!(index, number_faceted_documents_ids, 1, @"9a0ea88e7c9dcf6dc0ef0b601736ffcf"); | ||||
|  | ||||
|         let mut wtxn = index.env.write_txn().unwrap(); | ||||
|  | ||||
| @@ -178,7 +177,6 @@ mod tests { | ||||
|  | ||||
|         db_snap!(index, soft_deleted_documents_ids, @"[]"); | ||||
|         db_snap!(index, facet_id_f64_docids, 2, @"d4d5f14e7f1e1f09b86821a0b6defcc6"); | ||||
|         db_snap!(index, number_faceted_documents_ids, 2, @"3570e0ac0fdb21be9ebe433f59264b56"); | ||||
|     } | ||||
|  | ||||
|     // Same test as above but working with string values for the facets | ||||
| @@ -219,7 +217,6 @@ mod tests { | ||||
|  | ||||
|         // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022) | ||||
|         db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503"); | ||||
|         db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5"); | ||||
|  | ||||
|         let mut wtxn = index.env.write_txn().unwrap(); | ||||
|  | ||||
| @@ -237,7 +234,6 @@ mod tests { | ||||
|  | ||||
|         db_snap!(index, soft_deleted_documents_ids, @"[]"); | ||||
|         db_snap!(index, facet_id_string_docids, 2, @"7f9c00b29e04d58c1821202a5dda0ebc"); | ||||
|         db_snap!(index, string_faceted_documents_ids, 2, @"504152afa5c94fd4e515dcdfa4c7161f"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -274,7 +270,6 @@ mod tests { | ||||
|  | ||||
|         // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022) | ||||
|         db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503"); | ||||
|         db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5"); | ||||
|  | ||||
|         let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); | ||||
|  | ||||
| @@ -291,12 +286,6 @@ mod tests { | ||||
|  | ||||
|         db_snap!(index, soft_deleted_documents_ids, @"[]"); | ||||
|         db_snap!(index, facet_id_string_docids, 2, @"ece56086e76d50e661fb2b58475b9f7d"); | ||||
|         db_snap!(index, string_faceted_documents_ids, 2, @r###" | ||||
|         0   [] | ||||
|         1   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ] | ||||
|         2   [292, 324, 358, 381, 493, 839, 852, ] | ||||
|         3   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ] | ||||
|         "###); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -30,9 +30,6 @@ enum DeletionResult { | ||||
|  | ||||
| /// Algorithm to incrementally insert and delete elememts into the | ||||
| /// `facet_id_(string/f64)_docids` databases. | ||||
| /// | ||||
| /// Rhe `faceted_documents_ids` value in the main database of `Index` | ||||
| /// is also updated to contain the new set of faceted documents. | ||||
| pub struct FacetsUpdateIncremental<'i> { | ||||
|     index: &'i Index, | ||||
|     inner: FacetsUpdateIncrementalInner, | ||||
| @@ -70,29 +67,6 @@ impl<'i> FacetsUpdateIncremental<'i> { | ||||
|     } | ||||
|  | ||||
|     pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> { | ||||
|         #[derive(Default)] | ||||
|         struct DeltaDocids { | ||||
|             deleted: RoaringBitmap, | ||||
|             added: RoaringBitmap, | ||||
|         } | ||||
|         impl DeltaDocids { | ||||
|             fn add(&mut self, added: &RoaringBitmap) { | ||||
|                 self.deleted -= added; | ||||
|                 self.added |= added; | ||||
|             } | ||||
|             fn delete(&mut self, deleted: &RoaringBitmap) { | ||||
|                 self.deleted |= deleted; | ||||
|                 self.added -= deleted; | ||||
|             } | ||||
|             fn applied(self, mut docids: RoaringBitmap) -> RoaringBitmap { | ||||
|                 docids -= self.deleted; | ||||
|                 docids |= self.added; | ||||
|                 docids | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let mut new_faceted_docids = HashMap::<FieldId, DeltaDocids>::default(); | ||||
|  | ||||
|         let mut cursor = self.delta_data.into_cursor()?; | ||||
|         while let Some((key, value)) = cursor.move_on_next()? { | ||||
|             if !valid_lmdb_key(key) { | ||||
| @@ -102,8 +76,6 @@ impl<'i> FacetsUpdateIncremental<'i> { | ||||
|                 .ok_or(heed::Error::Encoding)?; | ||||
|             let value = KvReader::new(value); | ||||
|  | ||||
|             let entry = new_faceted_docids.entry(key.field_id).or_default(); | ||||
|  | ||||
|             let docids_to_delete = value | ||||
|                 .get(DelAdd::Deletion) | ||||
|                 .map(CboRoaringBitmapCodec::bytes_decode) | ||||
| @@ -117,31 +89,14 @@ impl<'i> FacetsUpdateIncremental<'i> { | ||||
|             if let Some(docids_to_delete) = docids_to_delete { | ||||
|                 let docids_to_delete = docids_to_delete?; | ||||
|                 self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?; | ||||
|                 entry.delete(&docids_to_delete); | ||||
|             } | ||||
|  | ||||
|             if let Some(docids_to_add) = docids_to_add { | ||||
|                 let docids_to_add = docids_to_add?; | ||||
|                 self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?; | ||||
|                 entry.add(&docids_to_add); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // FIXME: broken for multi-value facets? | ||||
|         // | ||||
|         // Consider an incremental update: `facet="tags", facet_value="Action", {Del: Some([0, 1]), Add: None }` | ||||
|         // The current code will inconditionally remove docs 0 and 1 from faceted docs for "tags". | ||||
|         // Now for doc 0: `"tags": "Action"`, it's correct behavior | ||||
|         // for doc 1: `"tags": "Action, Adventure"`, it's incorrect behavior | ||||
|         for (field_id, new_docids) in new_faceted_docids { | ||||
|             let old_docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?; | ||||
|             self.index.put_faceted_documents_ids( | ||||
|                 wtxn, | ||||
|                 field_id, | ||||
|                 self.facet_type, | ||||
|                 &new_docids.applied(old_docids), | ||||
|             )?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -599,7 +599,6 @@ mod tests { | ||||
|         index.add_documents(documents).unwrap(); | ||||
|  | ||||
|         db_snap!(index, facet_id_f64_docids, "initial", @"777e0e221d778764b472c512617eeb3b"); | ||||
|         db_snap!(index, number_faceted_documents_ids, "initial", @"bd916ef32b05fd5c3c4c518708f431a9"); | ||||
|         db_snap!(index, soft_deleted_documents_ids, "initial", @"[]"); | ||||
|  | ||||
|         let mut documents = vec![]; | ||||
| @@ -622,7 +621,6 @@ mod tests { | ||||
|         index.add_documents(documents).unwrap(); | ||||
|  | ||||
|         db_snap!(index, facet_id_f64_docids, "replaced_1_soft", @"abba175d7bed727d0efadaef85a4388f"); | ||||
|         db_snap!(index, number_faceted_documents_ids, "replaced_1_soft", @"de76488bd05ad94c6452d725acf1bd06"); | ||||
|         db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123"); | ||||
|  | ||||
|         // Then replace the last document while disabling soft_deletion | ||||
| @@ -647,7 +645,6 @@ mod tests { | ||||
|         index.add_documents(documents).unwrap(); | ||||
|  | ||||
|         db_snap!(index, facet_id_f64_docids, "replaced_2_hard", @"029e27a46d09c574ae949aa4289b45e6"); | ||||
|         db_snap!(index, number_faceted_documents_ids, "replaced_2_hard", @"60b19824f136affe6b240a7200779028"); | ||||
|         db_snap!(index, soft_deleted_documents_ids, "replaced_2_hard", @"[]"); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1499,12 +1499,6 @@ mod tests { | ||||
|         3   2    second       second | ||||
|         3   3    third        third | ||||
|         "###); | ||||
|         db_snap!(index, string_faceted_documents_ids, @r###" | ||||
|         0   [] | ||||
|         1   [] | ||||
|         2   [] | ||||
|         3   [0, 1, 2, 3, ] | ||||
|         "###); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
| @@ -1528,12 +1522,6 @@ mod tests { | ||||
|  | ||||
|         db_snap!(index, facet_id_string_docids, @""); | ||||
|         db_snap!(index, field_id_docid_facet_strings, @""); | ||||
|         db_snap!(index, string_faceted_documents_ids, @r###" | ||||
|         0   [] | ||||
|         1   [] | ||||
|         2   [] | ||||
|         3   [0, 1, 2, 3, ] | ||||
|         "###); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
| @@ -1560,12 +1548,6 @@ mod tests { | ||||
|         3   2    second       second | ||||
|         3   3    third        third | ||||
|         "###); | ||||
|         db_snap!(index, string_faceted_documents_ids, @r###" | ||||
|         0   [] | ||||
|         1   [] | ||||
|         2   [] | ||||
|         3   [0, 1, 2, 3, ] | ||||
|         "###); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user