mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Move the document changes sorting logic to a new trait
This commit is contained in:
		| @@ -6,6 +6,7 @@ use heed::types::Bytes; | |||||||
| use heed::RoTxn; | use heed::RoTxn; | ||||||
| use memmap2::Mmap; | use memmap2::Mmap; | ||||||
| use rayon::iter::{IntoParallelIterator, ParallelIterator}; | use rayon::iter::{IntoParallelIterator, ParallelIterator}; | ||||||
|  | use IndexDocumentsMethod as Idm; | ||||||
|  |  | ||||||
| use super::super::document_change::DocumentChange; | use super::super::document_change::DocumentChange; | ||||||
| use super::super::items_pool::ItemsPool; | use super::super::items_pool::ItemsPool; | ||||||
| @@ -148,6 +149,8 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> { | |||||||
|                                     (docid, vec![document_operation]), |                                     (docid, vec![document_operation]), | ||||||
|                                 ); |                                 ); | ||||||
|                             } |                             } | ||||||
|  |                             // TODO clean the code to make sure we clean the useless operations | ||||||
|  |                             //      add a method to the MergeChanges trait | ||||||
|                             Some((_, offsets)) => offsets.push(document_operation), |                             Some((_, offsets)) => offsets.push(document_operation), | ||||||
|                         } |                         } | ||||||
|  |  | ||||||
| @@ -185,16 +188,10 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> { | |||||||
|         // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone |         // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone | ||||||
|         let mut docids_version_offsets: Vec<_> = docids_version_offsets.drain().collect(); |         let mut docids_version_offsets: Vec<_> = docids_version_offsets.drain().collect(); | ||||||
|         // Reorder the offsets to make sure we iterate on the file sequentially |         // Reorder the offsets to make sure we iterate on the file sequentially | ||||||
|         docids_version_offsets.sort_unstable_by_key(|(_, (_, offsets))| { |         match self.index_documents_method { | ||||||
|             offsets |             Idm::ReplaceDocuments => MergeDocumentForReplacement::sort(&mut docids_version_offsets), | ||||||
|                 .iter() |             Idm::UpdateDocuments => MergeDocumentForUpdates::sort(&mut docids_version_offsets), | ||||||
|                 .rev() |         } | ||||||
|                 .find_map(|ido| match ido { |  | ||||||
|                     InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize), |  | ||||||
|                     InnerDocOp::Deletion => None, |  | ||||||
|                 }) |  | ||||||
|                 .unwrap_or(0) |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         Ok(docids_version_offsets |         Ok(docids_version_offsets | ||||||
|             .into_par_iter() |             .into_par_iter() | ||||||
| @@ -202,11 +199,9 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> { | |||||||
|                 Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))), |                 Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))), | ||||||
|                 move |context_pool, (external_docid, (internal_docid, operations))| { |                 move |context_pool, (external_docid, (internal_docid, operations))| { | ||||||
|                     context_pool.with(|rtxn| { |                     context_pool.with(|rtxn| { | ||||||
|                         use IndexDocumentsMethod as Idm; |  | ||||||
|  |  | ||||||
|                         let document_merge_function = match self.index_documents_method { |                         let document_merge_function = match self.index_documents_method { | ||||||
|                             Idm::ReplaceDocuments => merge_document_for_replacements, |                             Idm::ReplaceDocuments => MergeDocumentForReplacement::merge, | ||||||
|                             Idm::UpdateDocuments => merge_document_for_updates, |                             Idm::UpdateDocuments => MergeDocumentForUpdates::merge, | ||||||
|                         }; |                         }; | ||||||
|  |  | ||||||
|                         document_merge_function( |                         document_merge_function( | ||||||
| @@ -224,17 +219,48 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Returns only the most recent version of a document based on the updates from the payloads. | trait MergeChanges { | ||||||
| /// |     /// Reorders the offsets to make sure we iterate on the file sequentially. | ||||||
| /// This function is only meant to be used when doing a replacement and not an update. |     fn sort(changes_offsets: &mut [(CowStr, (DocumentId, Vec<InnerDocOp>))]); | ||||||
| fn merge_document_for_replacements( |  | ||||||
|  |     fn merge( | ||||||
|         rtxn: &RoTxn, |         rtxn: &RoTxn, | ||||||
|         index: &Index, |         index: &Index, | ||||||
|         fields_ids_map: &FieldsIdsMap, |         fields_ids_map: &FieldsIdsMap, | ||||||
|         docid: DocumentId, |         docid: DocumentId, | ||||||
|         external_docid: String, |         external_docid: String, | ||||||
|         operations: &[InnerDocOp], |         operations: &[InnerDocOp], | ||||||
| ) -> Result<Option<DocumentChange>> { |     ) -> Result<Option<DocumentChange>>; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | struct MergeDocumentForReplacement; | ||||||
|  |  | ||||||
|  | impl MergeChanges for MergeDocumentForReplacement { | ||||||
|  |     /// Reorders to read only the last change. | ||||||
|  |     fn sort(changes_offsets: &mut [(CowStr, (DocumentId, Vec<InnerDocOp>))]) { | ||||||
|  |         changes_offsets.sort_unstable_by_key(|(_, (_, offsets))| { | ||||||
|  |             offsets | ||||||
|  |                 .iter() | ||||||
|  |                 .rev() | ||||||
|  |                 .find_map(|ido| match ido { | ||||||
|  |                     InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize), | ||||||
|  |                     InnerDocOp::Deletion => None, | ||||||
|  |                 }) | ||||||
|  |                 .unwrap_or(0) | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns only the most recent version of a document based on the updates from the payloads. | ||||||
|  |     /// | ||||||
|  |     /// This function is only meant to be used when doing a replacement and not an update. | ||||||
|  |     fn merge( | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         index: &Index, | ||||||
|  |         fields_ids_map: &FieldsIdsMap, | ||||||
|  |         docid: DocumentId, | ||||||
|  |         external_docid: String, | ||||||
|  |         operations: &[InnerDocOp], | ||||||
|  |     ) -> Result<Option<DocumentChange>> { | ||||||
|         let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?; |         let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?; | ||||||
|         let current: Option<&KvReaderFieldId> = current.map(Into::into); |         let current: Option<&KvReaderFieldId> = current.map(Into::into); | ||||||
|  |  | ||||||
| @@ -275,20 +301,37 @@ fn merge_document_for_replacements( | |||||||
|             }, |             }, | ||||||
|             None => Ok(None), // but it's strange |             None => Ok(None), // but it's strange | ||||||
|         } |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Reads the previous version of a document from the database, the new versions | struct MergeDocumentForUpdates; | ||||||
| /// in the grenad update files and merges them to generate a new boxed obkv. |  | ||||||
| /// | impl MergeChanges for MergeDocumentForUpdates { | ||||||
| /// This function is only meant to be used when doing an update and not a replacement. |     /// Reorders to read the first changes first so that it's faster to read the first one and then the rest. | ||||||
| fn merge_document_for_updates( |     fn sort(changes_offsets: &mut [(CowStr, (DocumentId, Vec<InnerDocOp>))]) { | ||||||
|  |         changes_offsets.sort_unstable_by_key(|(_, (_, offsets))| { | ||||||
|  |             offsets | ||||||
|  |                 .iter() | ||||||
|  |                 .find_map(|ido| match ido { | ||||||
|  |                     InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize), | ||||||
|  |                     InnerDocOp::Deletion => None, | ||||||
|  |                 }) | ||||||
|  |                 .unwrap_or(0) | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Reads the previous version of a document from the database, the new versions | ||||||
|  |     /// in the grenad update files and merges them to generate a new boxed obkv. | ||||||
|  |     /// | ||||||
|  |     /// This function is only meant to be used when doing an update and not a replacement. | ||||||
|  |     fn merge( | ||||||
|         rtxn: &RoTxn, |         rtxn: &RoTxn, | ||||||
|         index: &Index, |         index: &Index, | ||||||
|         fields_ids_map: &FieldsIdsMap, |         fields_ids_map: &FieldsIdsMap, | ||||||
|         docid: DocumentId, |         docid: DocumentId, | ||||||
|         external_docid: String, |         external_docid: String, | ||||||
|         operations: &[InnerDocOp], |         operations: &[InnerDocOp], | ||||||
| ) -> Result<Option<DocumentChange>> { |     ) -> Result<Option<DocumentChange>> { | ||||||
|         let mut document = BTreeMap::<_, Cow<_>>::new(); |         let mut document = BTreeMap::<_, Cow<_>>::new(); | ||||||
|         let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?; |         let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?; | ||||||
|         let current: Option<&KvReaderFieldId> = current.map(Into::into); |         let current: Option<&KvReaderFieldId> = current.map(Into::into); | ||||||
| @@ -349,6 +392,7 @@ fn merge_document_for_updates( | |||||||
|                 Ok(Some(DocumentChange::Insertion(insertion))) |                 Ok(Some(DocumentChange::Insertion(insertion))) | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| use std::borrow::Borrow; | use std::borrow::Borrow; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user