mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Make sure we correctly mix different document operations
This commit is contained in:
		| @@ -54,7 +54,8 @@ pub(crate) enum Batch { | |||||||
|  |  | ||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub(crate) enum DocumentOperation { | pub(crate) enum DocumentOperation { | ||||||
|     Add(Uuid), |     Replace(Uuid), | ||||||
|  |     Update(Uuid), | ||||||
|     Delete(Vec<String>), |     Delete(Vec<String>), | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -253,7 +254,7 @@ impl IndexScheduler { | |||||||
|                     _ => unreachable!(), |                     _ => unreachable!(), | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             BatchKind::DocumentOperation { method, operation_ids, .. } => { |             BatchKind::DocumentOperation { operation_ids, .. } => { | ||||||
|                 let tasks = self.queue.get_existing_tasks_for_processing_batch( |                 let tasks = self.queue.get_existing_tasks_for_processing_batch( | ||||||
|                     rtxn, |                     rtxn, | ||||||
|                     current_batch, |                     current_batch, | ||||||
| @@ -275,9 +276,17 @@ impl IndexScheduler { | |||||||
|  |  | ||||||
|                 for task in tasks.iter() { |                 for task in tasks.iter() { | ||||||
|                     match task.kind { |                     match task.kind { | ||||||
|                         KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => { |                         KindWithContent::DocumentAdditionOrUpdate { | ||||||
|                             operations.push(DocumentOperation::Add(content_file)); |                             content_file, method, .. | ||||||
|  |                         } => match method { | ||||||
|  |                             IndexDocumentsMethod::ReplaceDocuments => { | ||||||
|  |                                 operations.push(DocumentOperation::Replace(content_file)) | ||||||
|                             } |                             } | ||||||
|  |                             IndexDocumentsMethod::UpdateDocuments => { | ||||||
|  |                                 operations.push(DocumentOperation::Update(content_file)) | ||||||
|  |                             } | ||||||
|  |                             _ => unreachable!("Unknown document merging method"), | ||||||
|  |                         }, | ||||||
|                         KindWithContent::DocumentDeletion { ref documents_ids, .. } => { |                         KindWithContent::DocumentDeletion { ref documents_ids, .. } => { | ||||||
|                             operations.push(DocumentOperation::Delete(documents_ids.clone())); |                             operations.push(DocumentOperation::Delete(documents_ids.clone())); | ||||||
|                         } |                         } | ||||||
| @@ -289,7 +298,6 @@ impl IndexScheduler { | |||||||
|                     op: IndexOperation::DocumentOperation { |                     op: IndexOperation::DocumentOperation { | ||||||
|                         index_uid, |                         index_uid, | ||||||
|                         primary_key, |                         primary_key, | ||||||
|                         method, |  | ||||||
|                         operations, |                         operations, | ||||||
|                         tasks, |                         tasks, | ||||||
|                     }, |                     }, | ||||||
|   | |||||||
| @@ -62,24 +62,22 @@ impl IndexScheduler { | |||||||
|  |  | ||||||
|                 Ok(tasks) |                 Ok(tasks) | ||||||
|             } |             } | ||||||
|             IndexOperation::DocumentOperation { |             IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => { | ||||||
|                 index_uid, |  | ||||||
|                 primary_key, |  | ||||||
|                 method, |  | ||||||
|                 operations, |  | ||||||
|                 mut tasks, |  | ||||||
|             } => { |  | ||||||
|                 progress.update_progress(DocumentOperationProgress::RetrievingConfig); |                 progress.update_progress(DocumentOperationProgress::RetrievingConfig); | ||||||
|                 // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. |                 // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. | ||||||
|                 // this is made difficult by the fact we're doing private clones of the index scheduler and sending it |                 // this is made difficult by the fact we're doing private clones of the index scheduler and sending it | ||||||
|                 // to a fresh thread. |                 // to a fresh thread. | ||||||
|                 let mut content_files = Vec::new(); |                 let mut content_files = Vec::new(); | ||||||
|                 for operation in &operations { |                 for operation in &operations { | ||||||
|                     if let DocumentOperation::Add(content_uuid) = operation { |                     match operation { | ||||||
|  |                         DocumentOperation::Replace(content_uuid) | ||||||
|  |                         | DocumentOperation::Update(content_uuid) => { | ||||||
|                             let content_file = self.queue.file_store.get_update(*content_uuid)?; |                             let content_file = self.queue.file_store.get_update(*content_uuid)?; | ||||||
|                             let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; |                             let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; | ||||||
|                             content_files.push(mmap); |                             content_files.push(mmap); | ||||||
|                         } |                         } | ||||||
|  |                         _ => (), | ||||||
|  |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 let rtxn = index.read_txn()?; |                 let rtxn = index.read_txn()?; | ||||||
| @@ -87,17 +85,23 @@ impl IndexScheduler { | |||||||
|                 let mut new_fields_ids_map = db_fields_ids_map.clone(); |                 let mut new_fields_ids_map = db_fields_ids_map.clone(); | ||||||
|  |  | ||||||
|                 let mut content_files_iter = content_files.iter(); |                 let mut content_files_iter = content_files.iter(); | ||||||
|                 let mut indexer = indexer::DocumentOperation::new(method); |                 let mut indexer = indexer::DocumentOperation::new(); | ||||||
|                 let embedders = index |                 let embedders = index | ||||||
|                     .embedding_configs(index_wtxn) |                     .embedding_configs(index_wtxn) | ||||||
|                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; |                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|                 let embedders = self.embedders(index_uid.clone(), embedders)?; |                 let embedders = self.embedders(index_uid.clone(), embedders)?; | ||||||
|                 for operation in operations { |                 for operation in operations { | ||||||
|                     match operation { |                     match operation { | ||||||
|                         DocumentOperation::Add(_content_uuid) => { |                         DocumentOperation::Replace(_content_uuid) => { | ||||||
|                             let mmap = content_files_iter.next().unwrap(); |                             let mmap = content_files_iter.next().unwrap(); | ||||||
|                             indexer |                             indexer | ||||||
|                                 .add_documents(mmap) |                                 .replace_documents(mmap) | ||||||
|  |                                 .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|  |                         } | ||||||
|  |                         DocumentOperation::Update(_content_uuid) => { | ||||||
|  |                             let mmap = content_files_iter.next().unwrap(); | ||||||
|  |                             indexer | ||||||
|  |                                 .update_documents(mmap) | ||||||
|                                 .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; |                                 .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|                         } |                         } | ||||||
|                         DocumentOperation::Delete(document_ids) => { |                         DocumentOperation::Delete(document_ids) => { | ||||||
|   | |||||||
| @@ -23,6 +23,7 @@ use crate::update::new::{Deletion, Insertion, Update}; | |||||||
| use crate::update::{AvailableIds, IndexDocumentsMethod}; | use crate::update::{AvailableIds, IndexDocumentsMethod}; | ||||||
| use crate::{DocumentId, Error, FieldsIdsMap, Index, InternalError, Result, UserError}; | use crate::{DocumentId, Error, FieldsIdsMap, Index, InternalError, Result, UserError}; | ||||||
|  |  | ||||||
|  | #[derive(Default)] | ||||||
| pub struct DocumentOperation<'pl> { | pub struct DocumentOperation<'pl> { | ||||||
|     operations: Vec<Payload<'pl>>, |     operations: Vec<Payload<'pl>>, | ||||||
|     method: MergeMethod, |     method: MergeMethod, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user