mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Use the new ExternalDocumentsIds struct in the engine
This commit is contained in:
		
							
								
								
									
										18
									
								
								src/index.rs
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								src/index.rs
									
									
									
									
									
								
							| @@ -7,11 +7,10 @@ use heed::types::*; | |||||||
| use heed::{PolyDatabase, Database, RwTxn, RoTxn}; | use heed::{PolyDatabase, Database, RwTxn, RoTxn}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::external_documents_ids::ExternalDocumentsIds; |  | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::fields_ids_map::FieldsIdsMap; | use crate::fields_ids_map::FieldsIdsMap; | ||||||
| use crate::Search; | use crate::Search; | ||||||
| use crate::{BEU32, DocumentId}; | use crate::{BEU32, DocumentId, ExternalDocumentsIds}; | ||||||
| use crate::{ | use crate::{ | ||||||
|     RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec, |     RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec, | ||||||
|     BoRoaringBitmapCodec, CboRoaringBitmapCodec, |     BoRoaringBitmapCodec, CboRoaringBitmapCodec, | ||||||
| @@ -143,14 +142,15 @@ impl Index { | |||||||
|     pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<ExternalDocumentsIds<'t>> { |     pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<ExternalDocumentsIds<'t>> { | ||||||
|         let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?; |         let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?; | ||||||
|         let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?; |         let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?; | ||||||
|         match hard.zip(soft) { |         let hard = match hard { | ||||||
|             Some((hard, soft)) => { |             Some(hard) => fst::Map::new(hard)?.map_data(Cow::Borrowed)?, | ||||||
|                 let hard = fst::Map::new(hard)?.map_data(Cow::Borrowed)?; |             None => fst::Map::default().map_data(Cow::Owned)?, | ||||||
|                 let soft = fst::Map::new(soft)?.map_data(Cow::Borrowed)?; |         }; | ||||||
|  |         let soft = match soft { | ||||||
|  |             Some(soft) => fst::Map::new(soft)?.map_data(Cow::Borrowed)?, | ||||||
|  |             None => fst::Map::default().map_data(Cow::Owned)?, | ||||||
|  |         }; | ||||||
|         Ok(ExternalDocumentsIds::new(hard, soft)) |         Ok(ExternalDocumentsIds::new(hard, soft)) | ||||||
|             }, |  | ||||||
|             None => Ok(ExternalDocumentsIds::default()), |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* fields ids map */ |     /* fields ids map */ | ||||||
|   | |||||||
| @@ -21,6 +21,7 @@ use fxhash::{FxHasher32, FxHasher64}; | |||||||
| use serde_json::{Map, Value}; | use serde_json::{Map, Value}; | ||||||
|  |  | ||||||
| pub use self::criterion::{Criterion, default_criteria}; | pub use self::criterion::{Criterion, default_criteria}; | ||||||
|  | pub use self::external_documents_ids::ExternalDocumentsIds; | ||||||
| pub use self::fields_ids_map::FieldsIdsMap; | pub use self::fields_ids_map::FieldsIdsMap; | ||||||
| pub use self::index::Index; | pub use self::index::Index; | ||||||
| pub use self::search::{Search, SearchResult}; | pub use self::search::{Search, SearchResult}; | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use crate::Index; | use crate::{ExternalDocumentsIds, Index}; | ||||||
|  |  | ||||||
| pub struct ClearDocuments<'t, 'u, 'i> { | pub struct ClearDocuments<'t, 'u, 'i> { | ||||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, |     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||||
| @@ -27,7 +27,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | |||||||
|  |  | ||||||
|         // We clean some of the main engine datastructures. |         // We clean some of the main engine datastructures. | ||||||
|         self.index.put_words_fst(self.wtxn, &fst::Set::default())?; |         self.index.put_words_fst(self.wtxn, &fst::Set::default())?; | ||||||
|         self.index.put_external_documents_ids(self.wtxn, &fst::Map::default())?; |         self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?; | ||||||
|         self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?; |         self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?; | ||||||
|  |  | ||||||
|         // Clear the other databases. |         // Clear the other databases. | ||||||
|   | |||||||
| @@ -1,16 +1,13 @@ | |||||||
| use std::borrow::Cow; | use fst::IntoStreamer; | ||||||
| use std::convert::TryFrom; |  | ||||||
|  |  | ||||||
| use fst::{IntoStreamer, Streamer}; |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::{Index, BEU32, SmallString32}; | use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds}; | ||||||
| use super::ClearDocuments; | use super::ClearDocuments; | ||||||
|  |  | ||||||
| pub struct DeleteDocuments<'t, 'u, 'i> { | pub struct DeleteDocuments<'t, 'u, 'i> { | ||||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, |     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||||
|     index: &'i Index, |     index: &'i Index, | ||||||
|     external_documents_ids: fst::Map<Vec<u8>>, |     external_documents_ids: ExternalDocumentsIds<'static>, | ||||||
|     documents_ids: RoaringBitmap, |     documents_ids: RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -22,7 +19,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|     { |     { | ||||||
|         let external_documents_ids = index |         let external_documents_ids = index | ||||||
|             .external_documents_ids(wtxn)? |             .external_documents_ids(wtxn)? | ||||||
|             .map_data(Cow::into_owned)?; |             .into_static(); | ||||||
|  |  | ||||||
|         Ok(DeleteDocuments { |         Ok(DeleteDocuments { | ||||||
|             wtxn, |             wtxn, | ||||||
| @@ -41,7 +38,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> { |     pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> { | ||||||
|         let docid = self.external_documents_ids.get(external_id).map(|id| u32::try_from(id).unwrap())?; |         let docid = self.external_documents_ids.get(external_id)?; | ||||||
|         self.delete_document(docid); |         self.delete_document(docid); | ||||||
|         Some(docid) |         Some(docid) | ||||||
|     } |     } | ||||||
| @@ -112,26 +109,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|         // We create the FST map of the external ids that we must delete. |         // We create the FST map of the external ids that we must delete. | ||||||
|         external_ids.sort_unstable(); |         external_ids.sort_unstable(); | ||||||
|         let external_ids_to_delete = fst::Set::from_iter(external_ids.iter().map(AsRef::as_ref))?; |         let external_ids_to_delete = fst::Set::from_iter(external_ids.iter().map(AsRef::as_ref))?; | ||||||
|         let external_ids_to_delete = fst::Map::from(external_ids_to_delete.into_fst()); |  | ||||||
|  |  | ||||||
|         let new_external_documents_ids = { |         // We acquire the current external documents ids map... | ||||||
|             // We acquire the current external documents ids map and create |         let mut new_external_documents_ids = self.index.external_documents_ids(self.wtxn)?; | ||||||
|             // a difference operation between the current and to-delete external ids. |         // ...and remove the to-delete external ids. | ||||||
|             let external_documents_ids = self.index.external_documents_ids(self.wtxn)?; |         new_external_documents_ids.delete_ids(external_ids_to_delete)?; | ||||||
|             let difference = external_documents_ids.op().add(&external_ids_to_delete).difference(); |  | ||||||
|  |  | ||||||
|             // We stream the new external ids that does no more contains the to-delete external ids. |  | ||||||
|             let mut iter = difference.into_stream(); |  | ||||||
|             let mut new_external_documents_ids_builder = fst::MapBuilder::memory(); |  | ||||||
|             while let Some((external_id, docids)) = iter.next() { |  | ||||||
|                 new_external_documents_ids_builder.insert(external_id, docids[0].value)?; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // We create an FST map from the above builder. |  | ||||||
|             new_external_documents_ids_builder.into_map() |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         // We write the new external ids into the main database. |         // We write the new external ids into the main database. | ||||||
|  |         let new_external_documents_ids = new_external_documents_ids.into_static(); | ||||||
|         self.index.put_external_documents_ids(self.wtxn, &new_external_documents_ids)?; |         self.index.put_external_documents_ids(self.wtxn, &new_external_documents_ids)?; | ||||||
|  |  | ||||||
|         // Maybe we can improve the get performance of the words |         // Maybe we can improve the get performance of the words | ||||||
|   | |||||||
| @@ -6,13 +6,12 @@ use std::iter::Peekable; | |||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
|  |  | ||||||
| use anyhow::{anyhow, Context}; | use anyhow::{anyhow, Context}; | ||||||
| use fst::{IntoStreamer, Streamer}; |  | ||||||
| use grenad::CompressionType; | use grenad::CompressionType; | ||||||
| use log::info; | use log::info; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde_json::{Map, Value}; | use serde_json::{Map, Value}; | ||||||
|  |  | ||||||
| use crate::{BEU32, MergeFn, Index, FieldsIdsMap}; | use crate::{BEU32, MergeFn, Index, FieldsIdsMap, ExternalDocumentsIds}; | ||||||
| use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | ||||||
| use super::merge_function::merge_two_obkvs; | use super::merge_function::merge_two_obkvs; | ||||||
| use super::{create_writer, create_sorter, IndexDocumentsMethod}; | use super::{create_writer, create_sorter, IndexDocumentsMethod}; | ||||||
| @@ -20,7 +19,7 @@ use super::{create_writer, create_sorter, IndexDocumentsMethod}; | |||||||
| pub struct TransformOutput { | pub struct TransformOutput { | ||||||
|     pub primary_key: u8, |     pub primary_key: u8, | ||||||
|     pub fields_ids_map: FieldsIdsMap, |     pub fields_ids_map: FieldsIdsMap, | ||||||
|     pub external_documents_ids: fst::Map<Vec<u8>>, |     pub external_documents_ids: ExternalDocumentsIds<'static>, | ||||||
|     pub new_documents_ids: RoaringBitmap, |     pub new_documents_ids: RoaringBitmap, | ||||||
|     pub replaced_documents_ids: RoaringBitmap, |     pub replaced_documents_ids: RoaringBitmap, | ||||||
|     pub documents_count: usize, |     pub documents_count: usize, | ||||||
| @@ -116,7 +115,7 @@ impl Transform<'_, '_> { | |||||||
|             return Ok(TransformOutput { |             return Ok(TransformOutput { | ||||||
|                 primary_key, |                 primary_key, | ||||||
|                 fields_ids_map, |                 fields_ids_map, | ||||||
|                 external_documents_ids: fst::Map::default(), |                 external_documents_ids: ExternalDocumentsIds::default(), | ||||||
|                 new_documents_ids: RoaringBitmap::new(), |                 new_documents_ids: RoaringBitmap::new(), | ||||||
|                 replaced_documents_ids: RoaringBitmap::new(), |                 replaced_documents_ids: RoaringBitmap::new(), | ||||||
|                 documents_count: 0, |                 documents_count: 0, | ||||||
| @@ -370,7 +369,7 @@ impl Transform<'_, '_> { | |||||||
|         primary_key: u8, |         primary_key: u8, | ||||||
|         fields_ids_map: FieldsIdsMap, |         fields_ids_map: FieldsIdsMap, | ||||||
|         approximate_number_of_documents: usize, |         approximate_number_of_documents: usize, | ||||||
|         external_documents_ids: fst::Map<Cow<'_, [u8]>>, |         mut external_documents_ids: ExternalDocumentsIds<'_>, | ||||||
|         progress_callback: F, |         progress_callback: F, | ||||||
|     ) -> anyhow::Result<TransformOutput> |     ) -> anyhow::Result<TransformOutput> | ||||||
|     where |     where | ||||||
| @@ -457,28 +456,17 @@ impl Transform<'_, '_> { | |||||||
|         let mut documents_file = writer.into_inner()?; |         let mut documents_file = writer.into_inner()?; | ||||||
|         documents_file.seek(SeekFrom::Start(0))?; |         documents_file.seek(SeekFrom::Start(0))?; | ||||||
|  |  | ||||||
|         // We create the union between the existing external documents ids with the new ones. |  | ||||||
|         let new_external_documents_ids = new_external_documents_ids_builder.into_map(); |  | ||||||
|         let union_op = fst::map::OpBuilder::new() |  | ||||||
|             .add(&external_documents_ids) |  | ||||||
|             .add(&new_external_documents_ids) |  | ||||||
|             .r#union(); |  | ||||||
|  |  | ||||||
|         // We stream and merge the new external documents ids map with the existing one. |  | ||||||
|         let before_docids_merging = Instant::now(); |         let before_docids_merging = Instant::now(); | ||||||
|         let mut external_documents_ids_builder = fst::MapBuilder::memory(); |         // We merge the new external ids with existing external documents ids. | ||||||
|         let mut iter = union_op.into_stream(); |         let new_external_documents_ids = new_external_documents_ids_builder.into_map(); | ||||||
|         while let Some((external_id, vals)) = iter.next() { |         external_documents_ids.insert_ids(&new_external_documents_ids)?; | ||||||
|             assert_eq!(vals.len(), 1, "there must be exactly one document id"); |  | ||||||
|             external_documents_ids_builder.insert(external_id, vals[0].value)?; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         info!("Documents external merging took {:.02?}", before_docids_merging.elapsed()); |         info!("Documents external merging took {:.02?}", before_docids_merging.elapsed()); | ||||||
|  |  | ||||||
|         Ok(TransformOutput { |         Ok(TransformOutput { | ||||||
|             primary_key, |             primary_key, | ||||||
|             fields_ids_map, |             fields_ids_map, | ||||||
|             external_documents_ids: external_documents_ids_builder.into_map(), |             external_documents_ids: external_documents_ids.into_static(), | ||||||
|             new_documents_ids, |             new_documents_ids, | ||||||
|             replaced_documents_ids, |             replaced_documents_ids, | ||||||
|             documents_count, |             documents_count, | ||||||
| @@ -531,7 +519,7 @@ impl Transform<'_, '_> { | |||||||
|         Ok(TransformOutput { |         Ok(TransformOutput { | ||||||
|             primary_key, |             primary_key, | ||||||
|             fields_ids_map, |             fields_ids_map, | ||||||
|             external_documents_ids: external_documents_ids.map_data(Cow::into_owned)?, |             external_documents_ids: external_documents_ids.into_static(), | ||||||
|             new_documents_ids: documents_ids, |             new_documents_ids: documents_ids, | ||||||
|             replaced_documents_ids: RoaringBitmap::default(), |             replaced_documents_ids: RoaringBitmap::default(), | ||||||
|             documents_count, |             documents_count, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user