mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	polish the global structure of the batch creation
This commit is contained in:
		| @@ -1,5 +1,12 @@ | |||||||
| use crate::{autobatcher::BatchKind, task::Status, Error, IndexScheduler, Result, TaskId}; | use crate::{ | ||||||
| use milli::{heed::RoTxn, update::IndexDocumentsMethod}; |     autobatcher::BatchKind, | ||||||
|  |     task::{KindWithContent, Status}, | ||||||
|  |     Error, IndexScheduler, Result, TaskId, | ||||||
|  | }; | ||||||
|  | use milli::{ | ||||||
|  |     heed::{RoTxn, RwTxn}, | ||||||
|  |     update::IndexDocumentsMethod, | ||||||
|  | }; | ||||||
| use uuid::Uuid; | use uuid::Uuid; | ||||||
|  |  | ||||||
| use crate::{task::Kind, Task}; | use crate::{task::Kind, Task}; | ||||||
| @@ -8,10 +15,100 @@ pub(crate) enum Batch { | |||||||
|     Cancel(Task), |     Cancel(Task), | ||||||
|     Snapshot(Vec<Task>), |     Snapshot(Vec<Task>), | ||||||
|     Dump(Vec<Task>), |     Dump(Vec<Task>), | ||||||
|     IndexSpecific { index_uid: String, kind: BatchKind }, |     // IndexSpecific { index_uid: String, kind: BatchKind }, | ||||||
|  |     DocumentAddition { | ||||||
|  |         index_uid: String, | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |         content_files: Vec<Uuid>, | ||||||
|  |         tasks: Vec<Task>, | ||||||
|  |     }, | ||||||
|  |     SettingsAndDocumentAddition { | ||||||
|  |         index_uid: String, | ||||||
|  |  | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |         content_files: Vec<Uuid>, | ||||||
|  |         document_addition_tasks: Vec<Task>, | ||||||
|  |  | ||||||
|  |         settings: Vec<String>, | ||||||
|  |         settings_tasks: Vec<Task>, | ||||||
|  |     }, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl IndexScheduler { | impl IndexScheduler { | ||||||
|  |     pub(crate) fn create_next_batch_index( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         index_uid: String, | ||||||
|  |         batch: BatchKind, | ||||||
|  |     ) -> Result<Option<Batch>> { | ||||||
|  |         match batch { | ||||||
|  |             BatchKind::ClearAll { ids } => todo!(), | ||||||
|  |             BatchKind::DocumentAddition { addition_ids } => todo!(), | ||||||
|  |             BatchKind::DocumentDeletion { deletion_ids } => todo!(), | ||||||
|  |             BatchKind::ClearAllAndSettings { | ||||||
|  |                 other, | ||||||
|  |                 settings_ids, | ||||||
|  |             } => todo!(), | ||||||
|  |             BatchKind::SettingsAndDocumentAddition { | ||||||
|  |                 addition_ids, | ||||||
|  |                 settings_ids, | ||||||
|  |             } => { | ||||||
|  |                 // you're not supposed to create an empty BatchKind. | ||||||
|  |                 assert!(addition_ids.len() > 0); | ||||||
|  |                 assert!(settings_ids.len() > 0); | ||||||
|  |  | ||||||
|  |                 let document_addition_tasks = addition_ids | ||||||
|  |                     .iter() | ||||||
|  |                     .map(|tid| { | ||||||
|  |                         self.get_task(rtxn, *tid) | ||||||
|  |                             .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) | ||||||
|  |                     }) | ||||||
|  |                     .collect::<Result<Vec<_>>>()?; | ||||||
|  |                 let settings_tasks = settings_ids | ||||||
|  |                     .iter() | ||||||
|  |                     .map(|tid| { | ||||||
|  |                         self.get_task(rtxn, *tid) | ||||||
|  |                             .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) | ||||||
|  |                     }) | ||||||
|  |                     .collect::<Result<Vec<_>>>()?; | ||||||
|  |  | ||||||
|  |                 let primary_key = match document_addition_tasks[0].kind { | ||||||
|  |                     KindWithContent::DocumentAddition { primary_key, .. } => primary_key, | ||||||
|  |                     _ => unreachable!(), | ||||||
|  |                 }; | ||||||
|  |                 let content_files = document_addition_tasks | ||||||
|  |                     .iter() | ||||||
|  |                     .map(|task| match task.kind { | ||||||
|  |                         KindWithContent::DocumentAddition { content_file, .. } => content_file, | ||||||
|  |                         _ => unreachable!(), | ||||||
|  |                     }) | ||||||
|  |                     .collect(); | ||||||
|  |  | ||||||
|  |                 let settings = settings_tasks | ||||||
|  |                     .iter() | ||||||
|  |                     .map(|task| match task.kind { | ||||||
|  |                         KindWithContent::Settings { new_settings, .. } => new_settings.to_string(), | ||||||
|  |                         _ => unreachable!(), | ||||||
|  |                     }) | ||||||
|  |                     .collect(); | ||||||
|  |  | ||||||
|  |                 Ok(Some(Batch::SettingsAndDocumentAddition { | ||||||
|  |                     index_uid, | ||||||
|  |                     primary_key, | ||||||
|  |                     content_files, | ||||||
|  |                     document_addition_tasks, | ||||||
|  |                     settings, | ||||||
|  |                     settings_tasks, | ||||||
|  |                 })) | ||||||
|  |             } | ||||||
|  |             BatchKind::Settings { settings_ids } => todo!(), | ||||||
|  |             BatchKind::DeleteIndex { ids } => todo!(), | ||||||
|  |             BatchKind::CreateIndex { id } => todo!(), | ||||||
|  |             BatchKind::SwapIndex { id } => todo!(), | ||||||
|  |             BatchKind::RenameIndex { id } => todo!(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Create the next batch to be processed; |     /// Create the next batch to be processed; | ||||||
|     /// 1. We get the *last* task to cancel. |     /// 1. We get the *last* task to cancel. | ||||||
|     /// 2. We get the *next* snapshot to process. |     /// 2. We get the *next* snapshot to process. | ||||||
| @@ -65,12 +162,9 @@ impl IndexScheduler { | |||||||
|                 }) |                 }) | ||||||
|                 .collect::<Result<Vec<_>>>()?; |                 .collect::<Result<Vec<_>>>()?; | ||||||
|  |  | ||||||
|             return Ok(crate::autobatcher::autobatch(enqueued).map(|batch_kind| { |             if let Some(batchkind) = crate::autobatcher::autobatch(enqueued) { | ||||||
|                 Batch::IndexSpecific { |                 return self.create_next_batch_index(rtxn, index_name.to_string(), batchkind); | ||||||
|                     index_uid: index_name.to_string(), |             } | ||||||
|                     kind: batch_kind, |  | ||||||
|                 } |  | ||||||
|             })); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // If we found no tasks then we were notified for something that got autobatched |         // If we found no tasks then we were notified for something that got autobatched | ||||||
| @@ -80,53 +174,62 @@ impl IndexScheduler { | |||||||
|  |  | ||||||
|     pub(crate) fn process_batch(&self, wtxn: &mut RwTxn, batch: Batch) -> Result<Vec<Task>> { |     pub(crate) fn process_batch(&self, wtxn: &mut RwTxn, batch: Batch) -> Result<Vec<Task>> { | ||||||
|         match batch { |         match batch { | ||||||
|             Batch::IndexSpecific { index_uid, kind } => { |             Batch::Cancel(_) => todo!(), | ||||||
|                 let index = create_index(); |             Batch::Snapshot(_) => todo!(), | ||||||
|                 match kind { |             Batch::Dump(_) => todo!(), | ||||||
|                     BatchKind::ClearAll { ids } => todo!(), |             Batch::DocumentAddition { | ||||||
|                     BatchKind::DocumentAddition { addition_ids } => { |                 index_uid, | ||||||
|                         let index = self.create_index(wtxn, &index_uid)?; |                 primary_key, | ||||||
|                         let ret = index.update_documents( |                 content_files, | ||||||
|                             IndexDocumentsMethod::UpdateDocuments, |                 tasks, | ||||||
|                             None, // TODO primary key |             } => todo!(), | ||||||
|                             self.file_store, |             Batch::SettingsAndDocumentAddition { | ||||||
|                             content_files, |                 index_uid, | ||||||
|                         )?; |                 primary_key, | ||||||
|  |                 content_files, | ||||||
|  |                 document_addition_tasks, | ||||||
|  |                 settings, | ||||||
|  |                 settings_tasks, | ||||||
|  |             } => { | ||||||
|  |                 let index = self.create_index(wtxn, &index_uid)?; | ||||||
|  |                 let mut updated_tasks = Vec::new(); | ||||||
|  |  | ||||||
|                         assert_eq!(ret.len(), tasks.len(), "Update documents must return the same number of `Result` than the number of tasks."); |                 /* | ||||||
|  |                 let ret = index.update_settings(settings)?; | ||||||
|                         Ok(tasks |                 for (ret, task) in ret.iter().zip(settings_tasks) { | ||||||
|                             .into_iter() |                     match ret { | ||||||
|                             .zip(ret) |                         Ok(ret) => task.status = Some(ret), | ||||||
|                             .map(|(mut task, res)| match res { |                         Err(err) => task.error = Some(err), | ||||||
|                                 Ok(info) => { |  | ||||||
|                                     task.status = Status::Succeeded; |  | ||||||
|                                     task.info = Some(info.to_string()); |  | ||||||
|                                 } |  | ||||||
|                                 Err(error) => { |  | ||||||
|                                     task.status = Status::Failed; |  | ||||||
|                                     task.error = Some(error.to_string()); |  | ||||||
|                                 } |  | ||||||
|                             }) |  | ||||||
|                             .collect()) |  | ||||||
|                     } |                     } | ||||||
|                     BatchKind::DocumentDeletion { deletion_ids } => todo!(), |  | ||||||
|                     BatchKind::ClearAllAndSettings { |  | ||||||
|                         other, |  | ||||||
|                         settings_ids, |  | ||||||
|                     } => todo!(), |  | ||||||
|                     BatchKind::SettingsAndDocumentAddition { |  | ||||||
|                         settings_ids, |  | ||||||
|                         addition_ids, |  | ||||||
|                     } => todo!(), |  | ||||||
|                     BatchKind::Settings { settings_ids } => todo!(), |  | ||||||
|                     BatchKind::DeleteIndex { ids } => todo!(), |  | ||||||
|                     BatchKind::CreateIndex { id } => todo!(), |  | ||||||
|                     BatchKind::SwapIndex { id } => todo!(), |  | ||||||
|                     BatchKind::RenameIndex { id } => todo!(), |  | ||||||
|                 } |                 } | ||||||
|  |                 */ | ||||||
|  |  | ||||||
|  |                 /* | ||||||
|  |                 for (ret, task) in ret.iter().zip(settings_tasks) { | ||||||
|  |                     match ret { | ||||||
|  |                         Ok(ret) => task.status = Some(ret), | ||||||
|  |                         Err(err) => task.error = Some(err), | ||||||
|  |                     } | ||||||
|  |                     updated_tasks.push(task); | ||||||
|  |                 } | ||||||
|  |                 */ | ||||||
|  |  | ||||||
|  |                 let ret = index.update_documents( | ||||||
|  |                     IndexDocumentsMethod::ReplaceDocuments, | ||||||
|  |                     primary_key, | ||||||
|  |                     self.file_store, | ||||||
|  |                     content_files.into_iter(), | ||||||
|  |                 )?; | ||||||
|  |  | ||||||
|  |                 for (ret, task) in ret.iter().zip(document_addition_tasks) { | ||||||
|  |                     match ret { | ||||||
|  |                         Ok(ret) => task.info = Some(format!("{:?}", ret)), | ||||||
|  |                         Err(err) => task.error = Some(err.to_string()), | ||||||
|  |                     } | ||||||
|  |                     updated_tasks.push(task); | ||||||
|  |                 } | ||||||
|  |                 Ok(updated_tasks) | ||||||
|             } |             } | ||||||
|             _ => unreachable!(), |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -221,15 +221,21 @@ impl IndexScheduler { | |||||||
|                 } |                 } | ||||||
|             }; |             }; | ||||||
|             let mut batch = match self.create_next_batch(&wtxn) { |             let mut batch = match self.create_next_batch(&wtxn) { | ||||||
|                 Ok(batch) => batch, |                 Ok(Some(batch)) => batch, | ||||||
|  |                 Ok(None) => continue, | ||||||
|                 Err(e) => { |                 Err(e) => { | ||||||
|                     log::error!("{}", e); |                     log::error!("{}", e); | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
|             }; |             }; | ||||||
|  |             // 1. store the starting date with the bitmap of processing tasks | ||||||
|  |             // 2. update the tasks with a starting date *but* do not write anything on disk | ||||||
|  |  | ||||||
|  |             // 3. process the tasks | ||||||
|             let res = self.process_batch(&mut wtxn, batch); |             let res = self.process_batch(&mut wtxn, batch); | ||||||
|  |  | ||||||
|  |             // 4. store the updated tasks on disk | ||||||
|  |  | ||||||
|             // TODO: TAMO: do this later |             // TODO: TAMO: do this later | ||||||
|             // must delete the file on disk |             // must delete the file on disk | ||||||
|             // in case of error, must update the tasks with the error |             // in case of error, must update the tasks with the error | ||||||
|   | |||||||
| @@ -20,8 +20,8 @@ pub enum Status { | |||||||
| pub struct Task { | pub struct Task { | ||||||
|     pub uid: TaskId, |     pub uid: TaskId, | ||||||
|  |  | ||||||
|     #[serde(with = "time::serde::rfc3339::option")] |     #[serde(with = "time::serde::rfc3339")] | ||||||
|     pub enqueued_at: Option<OffsetDateTime>, |     pub enqueued_at: OffsetDateTime, | ||||||
|     #[serde(with = "time::serde::rfc3339::option")] |     #[serde(with = "time::serde::rfc3339::option")] | ||||||
|     pub started_at: Option<OffsetDateTime>, |     pub started_at: Option<OffsetDateTime>, | ||||||
|     #[serde(with = "time::serde::rfc3339::option")] |     #[serde(with = "time::serde::rfc3339::option")] | ||||||
| @@ -60,6 +60,7 @@ pub enum KindWithContent { | |||||||
|     Snapshot, |     Snapshot, | ||||||
|     DocumentAddition { |     DocumentAddition { | ||||||
|         index_name: String, |         index_name: String, | ||||||
|  |         primary_key: Option<String>, | ||||||
|         content_file: Uuid, |         content_file: Uuid, | ||||||
|     }, |     }, | ||||||
|     DocumentDeletion { |     DocumentDeletion { | ||||||
| @@ -69,11 +70,11 @@ pub enum KindWithContent { | |||||||
|     ClearAllDocuments { |     ClearAllDocuments { | ||||||
|         index_name: String, |         index_name: String, | ||||||
|     }, |     }, | ||||||
|     // TODO: TAMO: uncomment the settings |     Settings { | ||||||
|     // Settings { |         index_name: String, | ||||||
|     //     index_name: String, |         // TODO: TAMO: fix the type | ||||||
|     //     new_settings: Settings, |         new_settings: String, | ||||||
|     // }, |     }, | ||||||
|     RenameIndex { |     RenameIndex { | ||||||
|         index_name: String, |         index_name: String, | ||||||
|         new_name: String, |         new_name: String, | ||||||
| @@ -107,6 +108,10 @@ impl KindWithContent { | |||||||
|             KindWithContent::SwapIndex { .. } => Kind::SwapIndex, |             KindWithContent::SwapIndex { .. } => Kind::SwapIndex, | ||||||
|             KindWithContent::CancelTask { .. } => Kind::CancelTask, |             KindWithContent::CancelTask { .. } => Kind::CancelTask, | ||||||
|             KindWithContent::Snapshot => Kind::Snapshot, |             KindWithContent::Snapshot => Kind::Snapshot, | ||||||
|  |             KindWithContent::Settings { | ||||||
|  |                 index_name, | ||||||
|  |                 new_settings, | ||||||
|  |             } => todo!(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -117,6 +122,7 @@ impl KindWithContent { | |||||||
|             DocumentAddition { |             DocumentAddition { | ||||||
|                 index_name: _, |                 index_name: _, | ||||||
|                 content_file: _, |                 content_file: _, | ||||||
|  |                 primary_key, | ||||||
|             } => { |             } => { | ||||||
|                 // TODO: TAMO: persist the file |                 // TODO: TAMO: persist the file | ||||||
|                 // content_file.persist(); |                 // content_file.persist(); | ||||||
| @@ -124,6 +130,7 @@ impl KindWithContent { | |||||||
|             } |             } | ||||||
|             // There is nothing to persist for all these tasks |             // There is nothing to persist for all these tasks | ||||||
|             DumpExport { .. } |             DumpExport { .. } | ||||||
|  |             | Settings { .. } | ||||||
|             | DocumentDeletion { .. } |             | DocumentDeletion { .. } | ||||||
|             | ClearAllDocuments { .. } |             | ClearAllDocuments { .. } | ||||||
|             | RenameIndex { .. } |             | RenameIndex { .. } | ||||||
| @@ -142,6 +149,7 @@ impl KindWithContent { | |||||||
|             DocumentAddition { |             DocumentAddition { | ||||||
|                 index_name: _, |                 index_name: _, | ||||||
|                 content_file: _, |                 content_file: _, | ||||||
|  |                 primary_key, | ||||||
|             } => { |             } => { | ||||||
|                 // TODO: TAMO: delete the file |                 // TODO: TAMO: delete the file | ||||||
|                 // content_file.delete(); |                 // content_file.delete(); | ||||||
| @@ -149,6 +157,7 @@ impl KindWithContent { | |||||||
|             } |             } | ||||||
|             // There is no data associated with all these tasks |             // There is no data associated with all these tasks | ||||||
|             DumpExport { .. } |             DumpExport { .. } | ||||||
|  |             | Settings { .. } | ||||||
|             | DocumentDeletion { .. } |             | DocumentDeletion { .. } | ||||||
|             | ClearAllDocuments { .. } |             | ClearAllDocuments { .. } | ||||||
|             | RenameIndex { .. } |             | RenameIndex { .. } | ||||||
| @@ -175,6 +184,7 @@ impl KindWithContent { | |||||||
|                 new_name: rhs, |                 new_name: rhs, | ||||||
|             } |             } | ||||||
|             | SwapIndex { lhs, rhs } => Some(vec![lhs, rhs]), |             | SwapIndex { lhs, rhs } => Some(vec![lhs, rhs]), | ||||||
|  |             Settings { index_name, .. } => Some(vec![index_name]), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user