mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Split the index-scheduler in ~500 loc modules
This commit is contained in:
		
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										203
									
								
								crates/index-scheduler/src/dump.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203
									
								
								crates/index-scheduler/src/dump.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,203 @@ | |||||||
|  | use std::collections::HashMap; | ||||||
|  |  | ||||||
|  | use dump::{KindDump, TaskDump, UpdateFile}; | ||||||
|  | use meilisearch_types::heed::RwTxn; | ||||||
|  | use meilisearch_types::milli::documents::DocumentsBatchBuilder; | ||||||
|  | use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  | use uuid::Uuid; | ||||||
|  |  | ||||||
|  | use crate::{utils, Error, IndexScheduler, Result}; | ||||||
|  |  | ||||||
|  | pub struct Dump<'a> { | ||||||
|  |     index_scheduler: &'a IndexScheduler, | ||||||
|  |     wtxn: RwTxn<'a>, | ||||||
|  |  | ||||||
|  |     indexes: HashMap<String, RoaringBitmap>, | ||||||
|  |     statuses: HashMap<Status, RoaringBitmap>, | ||||||
|  |     kinds: HashMap<Kind, RoaringBitmap>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> Dump<'a> { | ||||||
|  |     pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> { | ||||||
|  |         // While loading a dump no one should be able to access the scheduler thus I can block everything. | ||||||
|  |         let wtxn = index_scheduler.env.write_txn()?; | ||||||
|  |  | ||||||
|  |         Ok(Dump { | ||||||
|  |             index_scheduler, | ||||||
|  |             wtxn, | ||||||
|  |             indexes: HashMap::new(), | ||||||
|  |             statuses: HashMap::new(), | ||||||
|  |             kinds: HashMap::new(), | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Register a new task coming from a dump in the scheduler. | ||||||
|  |     /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. | ||||||
|  |     pub fn register_dumped_task( | ||||||
|  |         &mut self, | ||||||
|  |         task: TaskDump, | ||||||
|  |         content_file: Option<Box<UpdateFile>>, | ||||||
|  |     ) -> Result<Task> { | ||||||
|  |         let content_uuid = match content_file { | ||||||
|  |             Some(content_file) if task.status == Status::Enqueued => { | ||||||
|  |                 let (uuid, mut file) = self.index_scheduler.queue.create_update_file(false)?; | ||||||
|  |                 let mut builder = DocumentsBatchBuilder::new(&mut file); | ||||||
|  |                 for doc in content_file { | ||||||
|  |                     builder.append_json_object(&doc?)?; | ||||||
|  |                 } | ||||||
|  |                 builder.into_inner()?; | ||||||
|  |                 file.persist()?; | ||||||
|  |  | ||||||
|  |                 Some(uuid) | ||||||
|  |             } | ||||||
|  |             // If the task isn't `Enqueued` then just generate a recognisable `Uuid` | ||||||
|  |             // in case we try to open it later. | ||||||
|  |             _ if task.status != Status::Enqueued => Some(Uuid::nil()), | ||||||
|  |             _ => None, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let task = Task { | ||||||
|  |             uid: task.uid, | ||||||
|  |             batch_uid: task.batch_uid, | ||||||
|  |             enqueued_at: task.enqueued_at, | ||||||
|  |             started_at: task.started_at, | ||||||
|  |             finished_at: task.finished_at, | ||||||
|  |             error: task.error, | ||||||
|  |             canceled_by: task.canceled_by, | ||||||
|  |             details: task.details, | ||||||
|  |             status: task.status, | ||||||
|  |             kind: match task.kind { | ||||||
|  |                 KindDump::DocumentImport { | ||||||
|  |                     primary_key, | ||||||
|  |                     method, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation, | ||||||
|  |                 } => KindWithContent::DocumentAdditionOrUpdate { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     primary_key, | ||||||
|  |                     method, | ||||||
|  |                     content_file: content_uuid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { | ||||||
|  |                     documents_ids, | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::DocumentDeletionByFilter { filter } => { | ||||||
|  |                     KindWithContent::DocumentDeletionByFilter { | ||||||
|  |                         filter_expr: filter, | ||||||
|  |                         index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 KindDump::DocumentEdition { filter, context, function } => { | ||||||
|  |                     KindWithContent::DocumentEdition { | ||||||
|  |                         index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                         filter_expr: filter, | ||||||
|  |                         context, | ||||||
|  |                         function, | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 KindDump::DocumentClear => KindWithContent::DocumentClear { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::Settings { settings, is_deletion, allow_index_creation } => { | ||||||
|  |                     KindWithContent::SettingsUpdate { | ||||||
|  |                         index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                         new_settings: settings, | ||||||
|  |                         is_deletion, | ||||||
|  |                         allow_index_creation, | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 KindDump::IndexDeletion => KindWithContent::IndexDeletion { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     primary_key, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     primary_key, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, | ||||||
|  |                 KindDump::TaskCancelation { query, tasks } => { | ||||||
|  |                     KindWithContent::TaskCancelation { query, tasks } | ||||||
|  |                 } | ||||||
|  |                 KindDump::TasksDeletion { query, tasks } => { | ||||||
|  |                     KindWithContent::TaskDeletion { query, tasks } | ||||||
|  |                 } | ||||||
|  |                 KindDump::DumpCreation { keys, instance_uid } => { | ||||||
|  |                     KindWithContent::DumpCreation { keys, instance_uid } | ||||||
|  |                 } | ||||||
|  |                 KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; | ||||||
|  |  | ||||||
|  |         for index in task.indexes() { | ||||||
|  |             match self.indexes.get_mut(index) { | ||||||
|  |                 Some(bitmap) => { | ||||||
|  |                     bitmap.insert(task.uid); | ||||||
|  |                 } | ||||||
|  |                 None => { | ||||||
|  |                     let mut bitmap = RoaringBitmap::new(); | ||||||
|  |                     bitmap.insert(task.uid); | ||||||
|  |                     self.indexes.insert(index.to_string(), bitmap); | ||||||
|  |                 } | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         utils::insert_task_datetime( | ||||||
|  |             &mut self.wtxn, | ||||||
|  |             self.index_scheduler.queue.tasks.enqueued_at, | ||||||
|  |             task.enqueued_at, | ||||||
|  |             task.uid, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         // we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change | ||||||
|  |         if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) { | ||||||
|  |             if let Some(started_at) = task.started_at { | ||||||
|  |                 utils::insert_task_datetime( | ||||||
|  |                     &mut self.wtxn, | ||||||
|  |                     self.index_scheduler.queue.tasks.started_at, | ||||||
|  |                     started_at, | ||||||
|  |                     task.uid, | ||||||
|  |                 )?; | ||||||
|  |             } | ||||||
|  |             if let Some(finished_at) = task.finished_at { | ||||||
|  |                 utils::insert_task_datetime( | ||||||
|  |                     &mut self.wtxn, | ||||||
|  |                     self.index_scheduler.queue.tasks.finished_at, | ||||||
|  |                     finished_at, | ||||||
|  |                     task.uid, | ||||||
|  |                 )?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         self.statuses.entry(task.status).or_default().insert(task.uid); | ||||||
|  |         self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid); | ||||||
|  |  | ||||||
|  |         Ok(task) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Commit all the changes and exit the importing dump state | ||||||
|  |     pub fn finish(mut self) -> Result<()> { | ||||||
|  |         for (index, bitmap) in self.indexes { | ||||||
|  |             self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; | ||||||
|  |         } | ||||||
|  |         for (status, bitmap) in self.statuses { | ||||||
|  |             self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?; | ||||||
|  |         } | ||||||
|  |         for (kind, bitmap) in self.kinds { | ||||||
|  |             self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         self.wtxn.commit()?; | ||||||
|  |         self.index_scheduler.scheduler.wake_up.signal(); | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -323,7 +323,7 @@ mod tests { | |||||||
|     use uuid::Uuid; |     use uuid::Uuid; | ||||||
|  |  | ||||||
|     use super::super::IndexMapper; |     use super::super::IndexMapper; | ||||||
|     use crate::tests::IndexSchedulerHandle; |     use crate::test_utils::IndexSchedulerHandle; | ||||||
|     use crate::utils::clamp_to_page_size; |     use crate::utils::clamp_to_page_size; | ||||||
|     use crate::IndexScheduler; |     use crate::IndexScheduler; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,7 +16,7 @@ use uuid::Uuid; | |||||||
| use self::index_map::IndexMap; | use self::index_map::IndexMap; | ||||||
| use self::IndexStatus::{Available, BeingDeleted, Closing, Missing}; | use self::IndexStatus::{Available, BeingDeleted, Closing, Missing}; | ||||||
| use crate::uuid_codec::UuidCodec; | use crate::uuid_codec::UuidCodec; | ||||||
| use crate::{Error, Result}; | use crate::{Error, IndexBudget, IndexSchedulerOptions, Result}; | ||||||
|  |  | ||||||
| mod index_map; | mod index_map; | ||||||
|  |  | ||||||
| @@ -140,27 +140,19 @@ impl IndexStats { | |||||||
| impl IndexMapper { | impl IndexMapper { | ||||||
|     pub fn new( |     pub fn new( | ||||||
|         env: &Env, |         env: &Env, | ||||||
|         base_path: PathBuf, |         wtxn: &mut RwTxn, | ||||||
|         index_base_map_size: usize, |         options: &IndexSchedulerOptions, | ||||||
|         index_growth_amount: usize, |         budget: IndexBudget, | ||||||
|         index_count: usize, |  | ||||||
|         enable_mdb_writemap: bool, |  | ||||||
|         indexer_config: IndexerConfig, |  | ||||||
|     ) -> Result<Self> { |     ) -> Result<Self> { | ||||||
|         let mut wtxn = env.write_txn()?; |  | ||||||
|         let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?; |  | ||||||
|         let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?; |  | ||||||
|         wtxn.commit()?; |  | ||||||
|  |  | ||||||
|         Ok(Self { |         Ok(Self { | ||||||
|             index_map: Arc::new(RwLock::new(IndexMap::new(index_count))), |             index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))), | ||||||
|             index_mapping, |             index_mapping: env.create_database(wtxn, Some(INDEX_MAPPING))?, | ||||||
|             index_stats, |             index_stats: env.create_database(wtxn, Some(INDEX_STATS))?, | ||||||
|             base_path, |             base_path: options.indexes_path.clone(), | ||||||
|             index_base_map_size, |             index_base_map_size: budget.map_size, | ||||||
|             index_growth_amount, |             index_growth_amount: options.index_growth_amount, | ||||||
|             enable_mdb_writemap, |             enable_mdb_writemap: options.enable_mdb_writemap, | ||||||
|             indexer_config: Arc::new(indexer_config), |             indexer_config: options.indexer_config.clone(), | ||||||
|             currently_updating_index: Default::default(), |             currently_updating_index: Default::default(), | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -5,11 +5,11 @@ use meilisearch_types::batches::Batch; | |||||||
| use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; | use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; | ||||||
| use meilisearch_types::heed::{Database, RoTxn}; | use meilisearch_types::heed::{Database, RoTxn}; | ||||||
| use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; | use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; | ||||||
| use meilisearch_types::tasks::{Details, Task}; | use meilisearch_types::tasks::{Details, Kind, Status, Task}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::index_mapper::IndexMapper; | use crate::index_mapper::IndexMapper; | ||||||
| use crate::{IndexScheduler, Kind, Status, BEI128}; | use crate::{IndexScheduler, BEI128}; | ||||||
|  |  | ||||||
| pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { | pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { | ||||||
|     // Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run. |     // Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run. | ||||||
| @@ -18,41 +18,14 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { | |||||||
|     scheduler.assert_internally_consistent(); |     scheduler.assert_internally_consistent(); | ||||||
|  |  | ||||||
|     let IndexScheduler { |     let IndexScheduler { | ||||||
|         autobatching_enabled, |  | ||||||
|         cleanup_enabled: _, |         cleanup_enabled: _, | ||||||
|         must_stop_processing: _, |  | ||||||
|         processing_tasks, |         processing_tasks, | ||||||
|         file_store, |  | ||||||
|         env, |         env, | ||||||
|         all_tasks, |         queue, | ||||||
|         all_batches, |         scheduler, | ||||||
|         batch_to_tasks_mapping, |  | ||||||
|         // task reverse index |  | ||||||
|         status, |  | ||||||
|         kind, |  | ||||||
|         index_tasks, |  | ||||||
|         canceled_by, |  | ||||||
|         enqueued_at, |  | ||||||
|         started_at, |  | ||||||
|         finished_at, |  | ||||||
|  |  | ||||||
|         // batch reverse index |  | ||||||
|         batch_status, |  | ||||||
|         batch_kind, |  | ||||||
|         batch_index_tasks, |  | ||||||
|         batch_enqueued_at, |  | ||||||
|         batch_started_at, |  | ||||||
|         batch_finished_at, |  | ||||||
|  |  | ||||||
|         index_mapper, |         index_mapper, | ||||||
|         features: _, |         features: _, | ||||||
|         max_number_of_tasks: _, |  | ||||||
|         max_number_of_batched_tasks: _, |  | ||||||
|         wake_up: _, |  | ||||||
|         dumps_path: _, |  | ||||||
|         snapshots_path: _, |  | ||||||
|         auth_path: _, |  | ||||||
|         version_file_path: _, |  | ||||||
|         webhook_url: _, |         webhook_url: _, | ||||||
|         webhook_authorization_header: _, |         webhook_authorization_header: _, | ||||||
|         test_breakpoint_sdr: _, |         test_breakpoint_sdr: _, | ||||||
| @@ -66,7 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { | |||||||
|     let mut snap = String::new(); |     let mut snap = String::new(); | ||||||
|  |  | ||||||
|     let processing = processing_tasks.read().unwrap().clone(); |     let processing = processing_tasks.read().unwrap().clone(); | ||||||
|     snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n")); |     snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled)); | ||||||
|     snap.push_str(&format!( |     snap.push_str(&format!( | ||||||
|         "### Processing batch {:?}:\n", |         "### Processing batch {:?}:\n", | ||||||
|         processing.batch.as_ref().map(|batch| batch.uid) |         processing.batch.as_ref().map(|batch| batch.uid) | ||||||
| @@ -79,19 +52,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { | |||||||
|     snap.push_str("\n----------------------------------------------------------------------\n"); |     snap.push_str("\n----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### All Tasks:\n"); |     snap.push_str("### All Tasks:\n"); | ||||||
|     snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks)); |     snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Status:\n"); |     snap.push_str("### Status:\n"); | ||||||
|     snap.push_str(&snapshot_status(&rtxn, *status)); |     snap.push_str(&snapshot_status(&rtxn, queue.tasks.status)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Kind:\n"); |     snap.push_str("### Kind:\n"); | ||||||
|     snap.push_str(&snapshot_kind(&rtxn, *kind)); |     snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Index Tasks:\n"); |     snap.push_str("### Index Tasks:\n"); | ||||||
|     snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks)); |     snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Index Mapper:\n"); |     snap.push_str("### Index Mapper:\n"); | ||||||
| @@ -99,55 +72,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { | |||||||
|     snap.push_str("\n----------------------------------------------------------------------\n"); |     snap.push_str("\n----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Canceled By:\n"); |     snap.push_str("### Canceled By:\n"); | ||||||
|     snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by)); |     snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by)); | ||||||
|     snap.push_str("\n----------------------------------------------------------------------\n"); |     snap.push_str("\n----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Enqueued At:\n"); |     snap.push_str("### Enqueued At:\n"); | ||||||
|     snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at)); |     snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Started At:\n"); |     snap.push_str("### Started At:\n"); | ||||||
|     snap.push_str(&snapshot_date_db(&rtxn, *started_at)); |     snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Finished At:\n"); |     snap.push_str("### Finished At:\n"); | ||||||
|     snap.push_str(&snapshot_date_db(&rtxn, *finished_at)); |     snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### All Batches:\n"); |     snap.push_str("### All Batches:\n"); | ||||||
|     snap.push_str(&snapshot_all_batches(&rtxn, *all_batches)); |     snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Batch to tasks mapping:\n"); |     snap.push_str("### Batch to tasks mapping:\n"); | ||||||
|     snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping)); |     snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Batches Status:\n"); |     snap.push_str("### Batches Status:\n"); | ||||||
|     snap.push_str(&snapshot_status(&rtxn, *batch_status)); |     snap.push_str(&snapshot_status(&rtxn, queue.batches.status)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Batches Kind:\n"); |     snap.push_str("### Batches Kind:\n"); | ||||||
|     snap.push_str(&snapshot_kind(&rtxn, *batch_kind)); |     snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Batches Index Tasks:\n"); |     snap.push_str("### Batches Index Tasks:\n"); | ||||||
|     snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks)); |     snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Batches Enqueued At:\n"); |     snap.push_str("### Batches Enqueued At:\n"); | ||||||
|     snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at)); |     snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Batches Started At:\n"); |     snap.push_str("### Batches Started At:\n"); | ||||||
|     snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at)); |     snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### Batches Finished At:\n"); |     snap.push_str("### Batches Finished At:\n"); | ||||||
|     snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at)); |     snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at)); | ||||||
|     snap.push_str("----------------------------------------------------------------------\n"); |     snap.push_str("----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap.push_str("### File Store:\n"); |     snap.push_str("### File Store:\n"); | ||||||
|     snap.push_str(&snapshot_file_store(file_store)); |     snap.push_str(&snapshot_file_store(&queue.file_store)); | ||||||
|     snap.push_str("\n----------------------------------------------------------------------\n"); |     snap.push_str("\n----------------------------------------------------------------------\n"); | ||||||
|  |  | ||||||
|     snap |     snap | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -8,7 +8,7 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use crate::utils::ProcessingBatch; | use crate::utils::ProcessingBatch; | ||||||
|  |  | ||||||
| #[derive(Clone)] | #[derive(Clone, Default)] | ||||||
| pub struct ProcessingTasks { | pub struct ProcessingTasks { | ||||||
|     pub batch: Option<Arc<ProcessingBatch>>, |     pub batch: Option<Arc<ProcessingBatch>>, | ||||||
|     /// The list of tasks ids that are currently running. |     /// The list of tasks ids that are currently running. | ||||||
| @@ -20,7 +20,7 @@ pub struct ProcessingTasks { | |||||||
| impl ProcessingTasks { | impl ProcessingTasks { | ||||||
|     /// Creates an empty `ProcessingAt` struct. |     /// Creates an empty `ProcessingAt` struct. | ||||||
|     pub fn new() -> ProcessingTasks { |     pub fn new() -> ProcessingTasks { | ||||||
|         ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None } |         ProcessingTasks::default() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn get_progress_view(&self) -> Option<ProgressView> { |     pub fn get_progress_view(&self) -> Option<ProgressView> { | ||||||
|   | |||||||
							
								
								
									
										537
									
								
								crates/index-scheduler/src/queue/batches.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										537
									
								
								crates/index-scheduler/src/queue/batches.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,537 @@ | |||||||
|  | use std::ops::{Bound, RangeBounds}; | ||||||
|  |  | ||||||
|  | use meilisearch_types::batches::{Batch, BatchId}; | ||||||
|  | use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; | ||||||
|  | use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; | ||||||
|  | use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; | ||||||
|  | use meilisearch_types::tasks::{Kind, Status}; | ||||||
|  | use roaring::{MultiOps, RoaringBitmap}; | ||||||
|  | use time::OffsetDateTime; | ||||||
|  |  | ||||||
|  | use super::{Query, Queue}; | ||||||
|  | use crate::processing::ProcessingTasks; | ||||||
|  | use crate::utils::{insert_task_datetime, keep_ids_within_datetimes, map_bound, ProcessingBatch}; | ||||||
|  | use crate::{Error, Result, BEI128}; | ||||||
|  |  | ||||||
|  | /// Database const names for the `IndexScheduler`. | ||||||
|  | mod db_name { | ||||||
|  |     pub const ALL_BATCHES: &str = "all-batches"; | ||||||
|  |  | ||||||
|  |     pub const BATCH_STATUS: &str = "batch-status"; | ||||||
|  |     pub const BATCH_KIND: &str = "batch-kind"; | ||||||
|  |     pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks"; | ||||||
|  |     pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at"; | ||||||
|  |     pub const BATCH_STARTED_AT: &str = "batch-started-at"; | ||||||
|  |     pub const BATCH_FINISHED_AT: &str = "batch-finished-at"; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct BatchQueue { | ||||||
|  |     /// Contains all the batches accessible by their Id. | ||||||
|  |     pub(crate) all_batches: Database<BEU32, SerdeJson<Batch>>, | ||||||
|  |  | ||||||
|  |     /// All the batches containing a task matching the selected status. | ||||||
|  |     pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>, | ||||||
|  |     /// All the batches ids grouped by the kind of their task. | ||||||
|  |     pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>, | ||||||
|  |     /// Store the batches associated to an index. | ||||||
|  |     pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>, | ||||||
|  |     /// Store the batches containing tasks which were enqueued at a specific date | ||||||
|  |     pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>, | ||||||
|  |     /// Store the batches containing finished tasks started at a specific date | ||||||
|  |     pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>, | ||||||
|  |     /// Store the batches containing tasks finished at a specific date | ||||||
|  |     pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl BatchQueue { | ||||||
|  |     pub(crate) fn private_clone(&self) -> BatchQueue { | ||||||
|  |         BatchQueue { | ||||||
|  |             all_batches: self.all_batches, | ||||||
|  |             status: self.status, | ||||||
|  |             kind: self.kind, | ||||||
|  |             index_tasks: self.index_tasks, | ||||||
|  |             enqueued_at: self.enqueued_at, | ||||||
|  |             started_at: self.started_at, | ||||||
|  |             finished_at: self.finished_at, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { | ||||||
|  |         Ok(Self { | ||||||
|  |             all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?, | ||||||
|  |             status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?, | ||||||
|  |             kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?, | ||||||
|  |             index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?, | ||||||
|  |             enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?, | ||||||
|  |             started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?, | ||||||
|  |             finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> { | ||||||
|  |         enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result<BatchId> { | ||||||
|  |         Ok(self | ||||||
|  |             .all_batches | ||||||
|  |             .remap_data_type::<DecodeIgnore>() | ||||||
|  |             .last(rtxn)? | ||||||
|  |             .map(|(k, _)| k + 1) | ||||||
|  |             .unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<Option<Batch>> { | ||||||
|  |         Ok(self.all_batches.get(rtxn, &batch_id)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns the whole set of batches that belongs to this index. | ||||||
|  |     pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> { | ||||||
|  |         Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn update_index( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         index: &str, | ||||||
|  |         f: impl Fn(&mut RoaringBitmap), | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         let mut batches = self.index_batches(wtxn, index)?; | ||||||
|  |         f(&mut batches); | ||||||
|  |         if batches.is_empty() { | ||||||
|  |             self.index_tasks.delete(wtxn, index)?; | ||||||
|  |         } else { | ||||||
|  |             self.index_tasks.put(wtxn, index, &batches)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> { | ||||||
|  |         Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_status( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         status: Status, | ||||||
|  |         bitmap: &RoaringBitmap, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         Ok(self.status.put(wtxn, &status, bitmap)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn update_status( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         status: Status, | ||||||
|  |         f: impl Fn(&mut RoaringBitmap), | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         let mut tasks = self.get_status(wtxn, status)?; | ||||||
|  |         f(&mut tasks); | ||||||
|  |         self.put_status(wtxn, status, &tasks)?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> { | ||||||
|  |         Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_kind( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         kind: Kind, | ||||||
|  |         bitmap: &RoaringBitmap, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         Ok(self.kind.put(wtxn, &kind, bitmap)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn update_kind( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         kind: Kind, | ||||||
|  |         f: impl Fn(&mut RoaringBitmap), | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         let mut tasks = self.get_kind(wtxn, kind)?; | ||||||
|  |         f(&mut tasks); | ||||||
|  |         self.put_kind(wtxn, kind, &tasks)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> { | ||||||
|  |         self.all_batches.put( | ||||||
|  |             wtxn, | ||||||
|  |             &batch.uid, | ||||||
|  |             &Batch { | ||||||
|  |                 uid: batch.uid, | ||||||
|  |                 progress: None, | ||||||
|  |                 details: batch.details, | ||||||
|  |                 stats: batch.stats, | ||||||
|  |                 started_at: batch.started_at, | ||||||
|  |                 finished_at: batch.finished_at, | ||||||
|  |             }, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         for status in batch.statuses { | ||||||
|  |             self.update_status(wtxn, status, |bitmap| { | ||||||
|  |                 bitmap.insert(batch.uid); | ||||||
|  |             })?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         for kind in batch.kinds { | ||||||
|  |             self.update_kind(wtxn, kind, |bitmap| { | ||||||
|  |                 bitmap.insert(batch.uid); | ||||||
|  |             })?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         for index in batch.indexes { | ||||||
|  |             self.update_index(wtxn, &index, |bitmap| { | ||||||
|  |                 bitmap.insert(batch.uid); | ||||||
|  |             })?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(enqueued_at) = batch.oldest_enqueued_at { | ||||||
|  |             insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; | ||||||
|  |         } | ||||||
|  |         if let Some(enqueued_at) = batch.earliest_enqueued_at { | ||||||
|  |             insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; | ||||||
|  |         } | ||||||
|  |         insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?; | ||||||
|  |         insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Convert an iterator to a `Vec` of batches. The batches MUST exist or a | ||||||
|  |     /// `CorruptedTaskQueue` error will be thrown. | ||||||
|  |     pub(crate) fn get_existing_batches( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         tasks: impl IntoIterator<Item = BatchId>, | ||||||
|  |         processing: &ProcessingTasks, | ||||||
|  |     ) -> Result<Vec<Batch>> { | ||||||
|  |         tasks | ||||||
|  |             .into_iter() | ||||||
|  |             .map(|batch_id| { | ||||||
|  |                 if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { | ||||||
|  |                     let mut batch = processing.batch.as_ref().unwrap().to_batch(); | ||||||
|  |                     batch.progress = processing.get_progress_view(); | ||||||
|  |                     Ok(batch) | ||||||
|  |                 } else { | ||||||
|  |                     self.get_batch(rtxn, batch_id) | ||||||
|  |                         .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) | ||||||
|  |                 } | ||||||
|  |             }) | ||||||
|  |             .collect::<Result<_>>() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Queue { | ||||||
|  |     /// Return the batch ids matched by the given query from the index scheduler's point of view. | ||||||
|  |     pub(crate) fn get_batch_ids( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         query: &Query, | ||||||
|  |         processing: &ProcessingTasks, | ||||||
|  |     ) -> Result<RoaringBitmap> { | ||||||
|  |         let Query { | ||||||
|  |             limit, | ||||||
|  |             from, | ||||||
|  |             reverse, | ||||||
|  |             uids, | ||||||
|  |             batch_uids, | ||||||
|  |             statuses, | ||||||
|  |             types, | ||||||
|  |             index_uids, | ||||||
|  |             canceled_by, | ||||||
|  |             before_enqueued_at, | ||||||
|  |             after_enqueued_at, | ||||||
|  |             before_started_at, | ||||||
|  |             after_started_at, | ||||||
|  |             before_finished_at, | ||||||
|  |             after_finished_at, | ||||||
|  |         } = query; | ||||||
|  |  | ||||||
|  |         let mut batches = self.batches.all_batch_ids(rtxn)?; | ||||||
|  |         if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { | ||||||
|  |             batches.insert(batch_id); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(from) = from { | ||||||
|  |             let range = if reverse.unwrap_or_default() { | ||||||
|  |                 u32::MIN..*from | ||||||
|  |             } else { | ||||||
|  |                 from.saturating_add(1)..u32::MAX | ||||||
|  |             }; | ||||||
|  |             batches.remove_range(range); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(batch_uids) = &batch_uids { | ||||||
|  |             let batches_uids = RoaringBitmap::from_iter(batch_uids); | ||||||
|  |             batches &= batches_uids; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(status) = &statuses { | ||||||
|  |             let mut status_batches = RoaringBitmap::new(); | ||||||
|  |             for status in status { | ||||||
|  |                 match status { | ||||||
|  |                     // special case for Processing batches | ||||||
|  |                     Status::Processing => { | ||||||
|  |                         if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { | ||||||
|  |                             status_batches.insert(batch_id); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                     // Enqueued tasks are not stored in batches | ||||||
|  |                     Status::Enqueued => (), | ||||||
|  |                     status => status_batches |= &self.batches.get_status(rtxn, *status)?, | ||||||
|  |                 }; | ||||||
|  |             } | ||||||
|  |             if !status.contains(&Status::Processing) { | ||||||
|  |                 if let Some(ref batch) = processing.batch { | ||||||
|  |                     batches.remove(batch.uid); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             batches &= status_batches; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(task_uids) = &uids { | ||||||
|  |             let mut batches_by_task_uids = RoaringBitmap::new(); | ||||||
|  |             for task_uid in task_uids { | ||||||
|  |                 if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? { | ||||||
|  |                     if let Some(batch_uid) = task.batch_uid { | ||||||
|  |                         batches_by_task_uids.insert(batch_uid); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             batches &= batches_by_task_uids; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // There is no database for this query, we must retrieve the task queried by the client and ensure it's valid | ||||||
|  |         if let Some(canceled_by) = &canceled_by { | ||||||
|  |             let mut all_canceled_batches = RoaringBitmap::new(); | ||||||
|  |             for cancel_uid in canceled_by { | ||||||
|  |                 if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? { | ||||||
|  |                     if task.kind.as_kind() == Kind::TaskCancelation | ||||||
|  |                         && task.status == Status::Succeeded | ||||||
|  |                     { | ||||||
|  |                         if let Some(batch_uid) = task.batch_uid { | ||||||
|  |                             all_canceled_batches.insert(batch_uid); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // if the canceled_by has been specified but no batch | ||||||
|  |             // matches then we prefer matching zero than all batches. | ||||||
|  |             if all_canceled_batches.is_empty() { | ||||||
|  |                 return Ok(RoaringBitmap::new()); | ||||||
|  |             } else { | ||||||
|  |                 batches &= all_canceled_batches; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(kind) = &types { | ||||||
|  |             let mut kind_batches = RoaringBitmap::new(); | ||||||
|  |             for kind in kind { | ||||||
|  |                 kind_batches |= self.batches.get_kind(rtxn, *kind)?; | ||||||
|  |                 if let Some(uid) = processing | ||||||
|  |                     .batch | ||||||
|  |                     .as_ref() | ||||||
|  |                     .and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid)) | ||||||
|  |                 { | ||||||
|  |                     kind_batches.insert(uid); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             batches &= &kind_batches; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(index) = &index_uids { | ||||||
|  |             let mut index_batches = RoaringBitmap::new(); | ||||||
|  |             for index in index { | ||||||
|  |                 index_batches |= self.batches.index_batches(rtxn, index)?; | ||||||
|  |                 if let Some(uid) = processing | ||||||
|  |                     .batch | ||||||
|  |                     .as_ref() | ||||||
|  |                     .and_then(|batch| batch.indexes.contains(index).then_some(batch.uid)) | ||||||
|  |                 { | ||||||
|  |                     index_batches.insert(uid); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             batches &= &index_batches; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // For the started_at filter, we need to treat the part of the batches that are processing from the part of the | ||||||
|  |         // batches that are not processing. The non-processing ones are filtered normally while the processing ones | ||||||
|  |         // are entirely removed unless the in-memory startedAt variable falls within the date filter. | ||||||
|  |         // Once we have filtered the two subsets, we put them back together and assign it back to `batches`. | ||||||
|  |         batches = { | ||||||
|  |             let (mut filtered_non_processing_batches, mut filtered_processing_batches) = | ||||||
|  |                 (&batches - &*processing.processing, &batches & &*processing.processing); | ||||||
|  |  | ||||||
|  |             // special case for Processing batches | ||||||
|  |             // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds | ||||||
|  |             let mut clear_filtered_processing_batches = | ||||||
|  |                 |start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| { | ||||||
|  |                     let start = map_bound(start, |b| b.unix_timestamp_nanos()); | ||||||
|  |                     let end = map_bound(end, |b| b.unix_timestamp_nanos()); | ||||||
|  |                     let is_within_dates = RangeBounds::contains( | ||||||
|  |                         &(start, end), | ||||||
|  |                         &processing | ||||||
|  |                             .batch | ||||||
|  |                             .as_ref() | ||||||
|  |                             .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) | ||||||
|  |                             .unix_timestamp_nanos(), | ||||||
|  |                     ); | ||||||
|  |                     if !is_within_dates { | ||||||
|  |                         filtered_processing_batches.clear(); | ||||||
|  |                     } | ||||||
|  |                 }; | ||||||
|  |             match (after_started_at, before_started_at) { | ||||||
|  |                 (None, None) => (), | ||||||
|  |                 (None, Some(before)) => { | ||||||
|  |                     clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before)) | ||||||
|  |                 } | ||||||
|  |                 (Some(after), None) => { | ||||||
|  |                     clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded) | ||||||
|  |                 } | ||||||
|  |                 (Some(after), Some(before)) => clear_filtered_processing_batches( | ||||||
|  |                     Bound::Excluded(*after), | ||||||
|  |                     Bound::Excluded(*before), | ||||||
|  |                 ), | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             keep_ids_within_datetimes( | ||||||
|  |                 rtxn, | ||||||
|  |                 &mut filtered_non_processing_batches, | ||||||
|  |                 self.batches.started_at, | ||||||
|  |                 *after_started_at, | ||||||
|  |                 *before_started_at, | ||||||
|  |             )?; | ||||||
|  |             filtered_non_processing_batches | filtered_processing_batches | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         keep_ids_within_datetimes( | ||||||
|  |             rtxn, | ||||||
|  |             &mut batches, | ||||||
|  |             self.batches.enqueued_at, | ||||||
|  |             *after_enqueued_at, | ||||||
|  |             *before_enqueued_at, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         keep_ids_within_datetimes( | ||||||
|  |             rtxn, | ||||||
|  |             &mut batches, | ||||||
|  |             self.batches.finished_at, | ||||||
|  |             *after_finished_at, | ||||||
|  |             *before_finished_at, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         if let Some(limit) = limit { | ||||||
|  |             batches = if query.reverse.unwrap_or_default() { | ||||||
|  |                 batches.into_iter().take(*limit as usize).collect() | ||||||
|  |             } else { | ||||||
|  |                 batches.into_iter().rev().take(*limit as usize).collect() | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(batches) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Return the batch ids matching the query along with the total number of batches | ||||||
|  |     /// by ignoring the from and limit parameters from the user's point of view. | ||||||
|  |     /// | ||||||
|  |     /// There are two differences between an internal query and a query executed by | ||||||
|  |     /// the user. | ||||||
|  |     /// | ||||||
|  |     /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated | ||||||
|  |     /// with many indexes internally. | ||||||
|  |     /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. | ||||||
|  |     pub(crate) fn get_batch_ids_from_authorized_indexes( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         query: &Query, | ||||||
|  |         filters: &meilisearch_auth::AuthFilter, | ||||||
|  |         processing: &ProcessingTasks, | ||||||
|  |     ) -> Result<(RoaringBitmap, u64)> { | ||||||
|  |         // compute all batches matching the filter by ignoring the limits, to find the number of batches matching | ||||||
|  |         // the filter. | ||||||
|  |         // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares | ||||||
|  |         // us from modifying the underlying implementation, and the performance remains sufficient. | ||||||
|  |         // Should this change, we would modify `get_batch_ids` to directly return the number of matching batches. | ||||||
|  |         let total_batches = | ||||||
|  |             self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?; | ||||||
|  |         let mut batches = self.get_batch_ids(rtxn, query, processing)?; | ||||||
|  |  | ||||||
|  |         // If the query contains a list of index uid or there is a finite list of authorized indexes, | ||||||
|  |         // then we must exclude all the batches that only contains tasks associated to multiple indexes. | ||||||
|  |         // This works because we don't autobatch tasks associated to multiple indexes with tasks associated | ||||||
|  |         // to a single index. e.g: IndexSwap cannot be batched with IndexCreation. | ||||||
|  |         if query.index_uids.is_some() || !filters.all_indexes_authorized() { | ||||||
|  |             for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) { | ||||||
|  |                 batches -= self.tasks.get_kind(rtxn, kind)?; | ||||||
|  |                 if let Some(batch) = processing.batch.as_ref() { | ||||||
|  |                     if batch.kinds.contains(&kind) { | ||||||
|  |                         batches.remove(batch.uid); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Any batch that is internally associated with at least one authorized index | ||||||
|  |         // must be returned. | ||||||
|  |         if !filters.all_indexes_authorized() { | ||||||
|  |             let mut valid_indexes = RoaringBitmap::new(); | ||||||
|  |             let mut forbidden_indexes = RoaringBitmap::new(); | ||||||
|  |  | ||||||
|  |             let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?; | ||||||
|  |             for result in all_indexes_iter { | ||||||
|  |                 let (index, index_tasks) = result?; | ||||||
|  |                 if filters.is_index_authorized(index) { | ||||||
|  |                     valid_indexes |= index_tasks; | ||||||
|  |                 } else { | ||||||
|  |                     forbidden_indexes |= index_tasks; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             if let Some(batch) = processing.batch.as_ref() { | ||||||
|  |                 for index in &batch.indexes { | ||||||
|  |                     if filters.is_index_authorized(index) { | ||||||
|  |                         valid_indexes.insert(batch.uid); | ||||||
|  |                     } else { | ||||||
|  |                         forbidden_indexes.insert(batch.uid); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // If a batch had ONE valid task then it should be returned | ||||||
|  |             let invalid_batches = forbidden_indexes - valid_indexes; | ||||||
|  |  | ||||||
|  |             batches -= invalid_batches; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok((batches, total_batches.len())) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_batches_from_authorized_indexes( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         query: &Query, | ||||||
|  |         filters: &meilisearch_auth::AuthFilter, | ||||||
|  |         processing: &ProcessingTasks, | ||||||
|  |     ) -> Result<(Vec<Batch>, u64)> { | ||||||
|  |         let (batches, total) = | ||||||
|  |             self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?; | ||||||
|  |         let batches = if query.reverse.unwrap_or_default() { | ||||||
|  |             Box::new(batches.into_iter()) as Box<dyn Iterator<Item = u32>> | ||||||
|  |         } else { | ||||||
|  |             Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>> | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let batches = self.batches.get_existing_batches( | ||||||
|  |             rtxn, | ||||||
|  |             batches.take(query.limit.unwrap_or(u32::MAX) as usize), | ||||||
|  |             processing, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         Ok((batches, total)) | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										473
									
								
								crates/index-scheduler/src/queue/batches_test.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										473
									
								
								crates/index-scheduler/src/queue/batches_test.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,473 @@ | |||||||
|  | use meili_snap::snapshot; | ||||||
|  | use meilisearch_auth::AuthFilter; | ||||||
|  | use meilisearch_types::index_uid_pattern::IndexUidPattern; | ||||||
|  | use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status}; | ||||||
|  | use time::{Duration, OffsetDateTime}; | ||||||
|  |  | ||||||
|  | use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; | ||||||
|  | use crate::test_utils::Breakpoint::*; | ||||||
|  | use crate::test_utils::{index_creation_task, FailureLocation}; | ||||||
|  | use crate::{IndexScheduler, Query}; | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_batches_from_and_limit() { | ||||||
|  |     let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("doggo", "bone"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); | ||||||
|  |     let kind = index_creation_task("whalo", "plankton"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); | ||||||
|  |     let kind = index_creation_task("catto", "his_own_vomit"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); | ||||||
|  |  | ||||||
|  |     handle.advance_n_successful_batches(3); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); | ||||||
|  |  | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap().clone(); | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let query = Query { limit: Some(0), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { limit: Some(1), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { limit: Some(2), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[1,2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(1), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(2), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_batches_simple() { | ||||||
|  |     let start_time = OffsetDateTime::now_utc(); | ||||||
|  |  | ||||||
|  |     let (index_scheduler, mut handle) = | ||||||
|  |         IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("catto", "mouse"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("doggo", "sheep"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("whalo", "fish"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |  | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([Start, BatchCreated]); | ||||||
|  |  | ||||||
|  |     let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; | ||||||
|  |     let (mut batches, _) = index_scheduler | ||||||
|  |         .get_batches_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     assert_eq!(batches.len(), 1); | ||||||
|  |     batches[0].started_at = OffsetDateTime::UNIX_EPOCH; | ||||||
|  |     // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 | ||||||
|  |     let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); | ||||||
|  |     snapshot!(batch, @r#" | ||||||
|  |         { | ||||||
|  |           "uid": 0, | ||||||
|  |           "details": { | ||||||
|  |             "primaryKey": "mouse" | ||||||
|  |           }, | ||||||
|  |           "stats": { | ||||||
|  |             "totalNbTasks": 1, | ||||||
|  |             "status": { | ||||||
|  |               "processing": 1 | ||||||
|  |             }, | ||||||
|  |             "types": { | ||||||
|  |               "indexCreation": 1 | ||||||
|  |             }, | ||||||
|  |             "indexUids": { | ||||||
|  |               "catto": 1 | ||||||
|  |             } | ||||||
|  |           }, | ||||||
|  |           "startedAt": "1970-01-01T00:00:00Z", | ||||||
|  |           "finishedAt": null | ||||||
|  |         } | ||||||
|  |         "#); | ||||||
|  |  | ||||||
|  |     let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks | ||||||
|  |  | ||||||
|  |     let query = | ||||||
|  |         Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Processing]), | ||||||
|  |         after_started_at: Some(start_time), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both enqueued and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the test, which should excludes the enqueued tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Processing]), | ||||||
|  |         before_started_at: Some(start_time), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both enqueued and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes before the start of the test, which should excludes all of them | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Processing]), | ||||||
|  |         after_started_at: Some(start_time), | ||||||
|  |         before_started_at: Some(start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both enqueued and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the test and before one minute after the start of the test, | ||||||
|  |     // which should exclude the enqueued tasks and include the only processing task | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,]"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([ | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         ProcessBatchSucceeded, | ||||||
|  |         AfterProcessing, | ||||||
|  |         Start, | ||||||
|  |         BatchCreated, | ||||||
|  |     ]); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit"); | ||||||
|  |  | ||||||
|  |     let second_start_time = OffsetDateTime::now_utc(); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Succeeded, Status::Processing]), | ||||||
|  |         after_started_at: Some(start_time), | ||||||
|  |         before_started_at: Some(start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both succeeded and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the test and before one minute after the start of the test, | ||||||
|  |     // which should include all tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Succeeded, Status::Processing]), | ||||||
|  |         before_started_at: Some(start_time), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both succeeded and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes before the start of the test, which should exclude all tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both succeeded and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the second part of the test and before one minute after the | ||||||
|  |     // second start of the test, which should exclude all tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); | ||||||
|  |  | ||||||
|  |     // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` | ||||||
|  |     handle.advance_till([ | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         ProcessBatchSucceeded, | ||||||
|  |         AfterProcessing, | ||||||
|  |         Start, | ||||||
|  |         BatchCreated, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we run the same query to verify that, and indeed find that the last task is matched | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // enqueued, succeeded, or processing tasks started after the second part of the test, should | ||||||
|  |     // again only return the last task | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[2,]"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([ProcessBatchFailed, AfterProcessing]); | ||||||
|  |  | ||||||
|  |     // now the last task should have failed | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // so running the last query should return nothing | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // but the same query on failed tasks should return the last task | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // but the same query on failed tasks should return the last task | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         uids: Some(vec![1]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // same query but with an invalid uid | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         uids: Some(vec![2]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // same query but with a valid uid | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[2,]"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_batches_special_rules() { | ||||||
|  |     let (index_scheduler, mut handle) = | ||||||
|  |         IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("catto", "mouse"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("doggo", "sheep"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = KindWithContent::IndexSwap { | ||||||
|  |         swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], | ||||||
|  |     }; | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = KindWithContent::IndexSwap { | ||||||
|  |         swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], | ||||||
|  |     }; | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |  | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([Start, BatchCreated]); | ||||||
|  |  | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap().clone(); | ||||||
|  |  | ||||||
|  |     let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     // only the first task associated with catto is returned, the indexSwap tasks are excluded! | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes( | ||||||
|  |             &rtxn, | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), | ||||||
|  |             ), | ||||||
|  |             &proc, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks | ||||||
|  |     // associated with doggo -> empty result | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[]"); | ||||||
|  |  | ||||||
|  |     drop(rtxn); | ||||||
|  |     // We're going to advance and process all the batches for the next query to actually hit the db | ||||||
|  |     handle.advance_till([ | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         ProcessBatchSucceeded, | ||||||
|  |         AfterProcessing, | ||||||
|  |     ]); | ||||||
|  |     handle.advance_one_successful_batch(); | ||||||
|  |     handle.advance_n_failed_batches(2); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything"); | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |     let query = Query::default(); | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes( | ||||||
|  |             &rtxn, | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), | ||||||
|  |             ), | ||||||
|  |             &proc, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks | ||||||
|  |     // -> only the index creation of doggo should be returned | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query::default(); | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes( | ||||||
|  |             &rtxn, | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![ | ||||||
|  |                     IndexUidPattern::new_unchecked("catto"), | ||||||
|  |                     IndexUidPattern::new_unchecked("doggo"), | ||||||
|  |                 ] | ||||||
|  |                 .into_iter() | ||||||
|  |                 .collect(), | ||||||
|  |             ), | ||||||
|  |             &proc, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks | ||||||
|  |     // -> all tasks except the swap of catto with whalo are returned | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query::default(); | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we asked for all the tasks with all index authorized -> all tasks returned | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_batches_canceled_by() { | ||||||
|  |     let (index_scheduler, mut handle) = | ||||||
|  |         IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("catto", "mouse"); | ||||||
|  |     let _ = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("doggo", "sheep"); | ||||||
|  |     let _ = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = KindWithContent::IndexSwap { | ||||||
|  |         swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], | ||||||
|  |     }; | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |  | ||||||
|  |     handle.advance_n_successful_batches(1); | ||||||
|  |     let kind = KindWithContent::TaskCancelation { | ||||||
|  |         query: "test_query".to_string(), | ||||||
|  |         tasks: [0, 1, 2, 3].into_iter().collect(), | ||||||
|  |     }; | ||||||
|  |     let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     handle.advance_n_successful_batches(1); | ||||||
|  |  | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); | ||||||
|  |  | ||||||
|  |     let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // The batch zero was the index creation task, the 1 is the task cancellation | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; | ||||||
|  |     let (batches, _) = index_scheduler | ||||||
|  |         .get_batch_ids_from_authorized_indexes( | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), | ||||||
|  |             ), | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // Return only 1 because the user is not authorized to see task 2 | ||||||
|  |     snapshot!(snapshot_bitmap(&batches), @"[1,]"); | ||||||
|  | } | ||||||
							
								
								
									
										379
									
								
								crates/index-scheduler/src/queue/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										379
									
								
								crates/index-scheduler/src/queue/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,379 @@ | |||||||
|  | mod batches; | ||||||
|  | #[cfg(test)] | ||||||
|  | mod batches_test; | ||||||
|  | mod tasks; | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tasks_test; | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test; | ||||||
|  |  | ||||||
|  | use std::collections::BTreeMap; | ||||||
|  | use std::time::Duration; | ||||||
|  |  | ||||||
|  | use file_store::FileStore; | ||||||
|  | use meilisearch_types::batches::BatchId; | ||||||
|  | use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; | ||||||
|  | use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; | ||||||
|  | use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  | use time::format_description::well_known::Rfc3339; | ||||||
|  | use time::OffsetDateTime; | ||||||
|  | use uuid::Uuid; | ||||||
|  |  | ||||||
|  | use self::batches::BatchQueue; | ||||||
|  | use self::tasks::TaskQueue; | ||||||
|  | use crate::processing::ProcessingTasks; | ||||||
|  | use crate::utils::{ | ||||||
|  |     check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch, | ||||||
|  | }; | ||||||
|  | use crate::{Error, IndexSchedulerOptions, Result, TaskId}; | ||||||
|  |  | ||||||
|  | /// Database const names for the `IndexScheduler`. | ||||||
|  | mod db_name { | ||||||
|  |     pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping"; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. | ||||||
|  | /// | ||||||
|  | /// An empty/default query (where each field is set to `None`) matches all tasks. | ||||||
|  | /// Each non-null field restricts the set of tasks further. | ||||||
|  | #[derive(Default, Debug, Clone, PartialEq, Eq)] | ||||||
|  | pub struct Query { | ||||||
|  |     /// The maximum number of tasks to be matched | ||||||
|  |     pub limit: Option<u32>, | ||||||
|  |     /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched | ||||||
|  |     pub from: Option<u32>, | ||||||
|  |     /// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`. | ||||||
|  |     pub reverse: Option<bool>, | ||||||
|  |     /// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched | ||||||
|  |     pub uids: Option<Vec<TaskId>>, | ||||||
|  |     /// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched | ||||||
|  |     pub batch_uids: Option<Vec<BatchId>>, | ||||||
|  |     /// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls | ||||||
|  |     pub statuses: Option<Vec<Status>>, | ||||||
|  |     /// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks. | ||||||
|  |     /// | ||||||
|  |     /// The kind of a task is given by: | ||||||
|  |     /// ``` | ||||||
|  |     /// # use meilisearch_types::tasks::{Task, Kind}; | ||||||
|  |     /// # fn doc_func(task: Task) -> Kind { | ||||||
|  |     /// task.kind.as_kind() | ||||||
|  |     /// # } | ||||||
|  |     /// ``` | ||||||
|  |     pub types: Option<Vec<Kind>>, | ||||||
|  |     /// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks | ||||||
|  |     pub index_uids: Option<Vec<String>>, | ||||||
|  |     /// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks | ||||||
|  |     /// that canceled the matched tasks. | ||||||
|  |     pub canceled_by: Option<Vec<TaskId>>, | ||||||
|  |     /// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. | ||||||
|  |     pub before_enqueued_at: Option<OffsetDateTime>, | ||||||
|  |     /// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. | ||||||
|  |     pub after_enqueued_at: Option<OffsetDateTime>, | ||||||
|  |     /// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. | ||||||
|  |     pub before_started_at: Option<OffsetDateTime>, | ||||||
|  |     /// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. | ||||||
|  |     pub after_started_at: Option<OffsetDateTime>, | ||||||
|  |     /// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. | ||||||
|  |     pub before_finished_at: Option<OffsetDateTime>, | ||||||
|  |     /// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. | ||||||
|  |     pub after_finished_at: Option<OffsetDateTime>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Query { | ||||||
|  |     /// Return `true` if every field of the query is set to `None`, such that the query | ||||||
|  |     /// matches all tasks. | ||||||
|  |     pub fn is_empty(&self) -> bool { | ||||||
|  |         matches!( | ||||||
|  |             self, | ||||||
|  |             Query { | ||||||
|  |                 limit: None, | ||||||
|  |                 from: None, | ||||||
|  |                 reverse: None, | ||||||
|  |                 uids: None, | ||||||
|  |                 batch_uids: None, | ||||||
|  |                 statuses: None, | ||||||
|  |                 types: None, | ||||||
|  |                 index_uids: None, | ||||||
|  |                 canceled_by: None, | ||||||
|  |                 before_enqueued_at: None, | ||||||
|  |                 after_enqueued_at: None, | ||||||
|  |                 before_started_at: None, | ||||||
|  |                 after_started_at: None, | ||||||
|  |                 before_finished_at: None, | ||||||
|  |                 after_finished_at: None, | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. | ||||||
|  |     pub fn with_index(self, index_uid: String) -> Self { | ||||||
|  |         let mut index_vec = self.index_uids.unwrap_or_default(); | ||||||
|  |         index_vec.push(index_uid); | ||||||
|  |         Self { index_uids: Some(index_vec), ..self } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Removes the `from` and `limit` restrictions from the query. | ||||||
|  |     // Useful to get the total number of tasks matching a filter. | ||||||
|  |     pub fn without_limits(self) -> Self { | ||||||
|  |         Query { limit: None, from: None, ..self } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Structure which holds meilisearch's indexes and schedules the tasks | ||||||
|  | /// to be performed on them. | ||||||
|  | pub struct Queue { | ||||||
|  |     pub(crate) tasks: tasks::TaskQueue, | ||||||
|  |     pub(crate) batches: batches::BatchQueue, | ||||||
|  |  | ||||||
|  |     /// Matches a batch id with the associated task ids. | ||||||
|  |     pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>, | ||||||
|  |  | ||||||
|  |     /// The list of files referenced by the tasks. | ||||||
|  |     pub(crate) file_store: FileStore, | ||||||
|  |  | ||||||
|  |     /// The max number of tasks allowed before the scheduler starts to delete | ||||||
|  |     /// the finished tasks automatically. | ||||||
|  |     pub(crate) max_number_of_tasks: usize, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Queue { | ||||||
|  |     pub(crate) fn private_clone(&self) -> Queue { | ||||||
|  |         Queue { | ||||||
|  |             tasks: self.tasks.private_clone(), | ||||||
|  |             batches: self.batches.private_clone(), | ||||||
|  |             batch_to_tasks_mapping: self.batch_to_tasks_mapping, | ||||||
|  |             file_store: self.file_store.clone(), | ||||||
|  |             max_number_of_tasks: self.max_number_of_tasks, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Create an index scheduler and start its run loop. | ||||||
|  |     pub(crate) fn new( | ||||||
|  |         env: &Env, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         options: &IndexSchedulerOptions, | ||||||
|  |     ) -> Result<Self> { | ||||||
|  |         // allow unreachable_code to get rids of the warning in the case of a test build. | ||||||
|  |         Ok(Self { | ||||||
|  |             file_store: FileStore::new(&options.update_file_path)?, | ||||||
|  |             batch_to_tasks_mapping: env | ||||||
|  |                 .create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?, | ||||||
|  |             tasks: TaskQueue::new(env, wtxn)?, | ||||||
|  |             batches: BatchQueue::new(env, wtxn)?, | ||||||
|  |             max_number_of_tasks: options.max_number_of_tasks, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns the whole set of tasks that belongs to this batch. | ||||||
|  |     pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<RoaringBitmap> { | ||||||
|  |         Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks. | ||||||
|  |     /// | ||||||
|  |     /// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown. | ||||||
|  |     pub(crate) fn get_existing_tasks_for_processing_batch( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         processing_batch: &mut ProcessingBatch, | ||||||
|  |         tasks: impl IntoIterator<Item = TaskId>, | ||||||
|  |     ) -> Result<Vec<Task>> { | ||||||
|  |         tasks | ||||||
|  |             .into_iter() | ||||||
|  |             .map(|task_id| { | ||||||
|  |                 let mut task = self | ||||||
|  |                     .tasks | ||||||
|  |                     .get_task(rtxn, task_id) | ||||||
|  |                     .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)); | ||||||
|  |                 processing_batch.processing(&mut task); | ||||||
|  |                 task | ||||||
|  |             }) | ||||||
|  |             .collect::<Result<_>>() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn write_batch( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         batch: ProcessingBatch, | ||||||
|  |         tasks: &RoaringBitmap, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?; | ||||||
|  |         self.batches.write_batch(wtxn, batch)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { | ||||||
|  |         match task.content_uuid() { | ||||||
|  |             Some(content_file) => self.delete_update_file(content_file), | ||||||
|  |             None => Ok(()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Delete a file from the index scheduler. | ||||||
|  |     /// | ||||||
|  |     /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. | ||||||
|  |     pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> { | ||||||
|  |         Ok(self.file_store.delete(uuid)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Create a file and register it in the index scheduler. | ||||||
|  |     /// | ||||||
|  |     /// The returned file and uuid can be used to associate | ||||||
|  |     /// some data to a task. The file will be kept until | ||||||
|  |     /// the task has been fully processed. | ||||||
|  |     pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { | ||||||
|  |         if dry_run { | ||||||
|  |             Ok((Uuid::nil(), file_store::File::dry_file()?)) | ||||||
|  |         } else { | ||||||
|  |             Ok(self.file_store.new_update()?) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[cfg(test)] | ||||||
|  |     pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { | ||||||
|  |         Ok(self.file_store.new_update_with_uuid(uuid)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes. | ||||||
|  |     pub fn compute_update_file_size(&self) -> Result<u64> { | ||||||
|  |         Ok(self.file_store.compute_total_size()?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn register( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         kind: &KindWithContent, | ||||||
|  |         task_id: Option<TaskId>, | ||||||
|  |         dry_run: bool, | ||||||
|  |     ) -> Result<Task> { | ||||||
|  |         let next_task_id = self.tasks.next_task_id(wtxn)?; | ||||||
|  |  | ||||||
|  |         if let Some(uid) = task_id { | ||||||
|  |             if uid < next_task_id { | ||||||
|  |                 return Err(Error::BadTaskId { received: uid, expected: next_task_id }); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let mut task = Task { | ||||||
|  |             uid: task_id.unwrap_or(next_task_id), | ||||||
|  |             // The batch is defined once we starts processing the task | ||||||
|  |             batch_uid: None, | ||||||
|  |             enqueued_at: OffsetDateTime::now_utc(), | ||||||
|  |             started_at: None, | ||||||
|  |             finished_at: None, | ||||||
|  |             error: None, | ||||||
|  |             canceled_by: None, | ||||||
|  |             details: kind.default_details(), | ||||||
|  |             status: Status::Enqueued, | ||||||
|  |             kind: kind.clone(), | ||||||
|  |         }; | ||||||
|  |         // For deletion and cancelation tasks, we want to make extra sure that they | ||||||
|  |         // don't attempt to delete/cancel tasks that are newer than themselves. | ||||||
|  |         filter_out_references_to_newer_tasks(&mut task); | ||||||
|  |         // If the register task is an index swap task, verify that it is well-formed | ||||||
|  |         // (that it does not contain duplicate indexes). | ||||||
|  |         check_index_swap_validity(&task)?; | ||||||
|  |  | ||||||
|  |         // At this point the task is going to be registered and no further checks will be done | ||||||
|  |         if dry_run { | ||||||
|  |             return Ok(task); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Get rid of the mutability. | ||||||
|  |         let task = task; | ||||||
|  |         self.tasks.register(wtxn, &task)?; | ||||||
|  |  | ||||||
|  |         Ok(task) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Register a task to cleanup the task queue if needed | ||||||
|  |     pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> { | ||||||
|  |         let nb_tasks = self.tasks.all_task_ids(wtxn)?.len(); | ||||||
|  |         // if we have less than 1M tasks everything is fine | ||||||
|  |         if nb_tasks < self.max_number_of_tasks as u64 { | ||||||
|  |             return Ok(()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default() | ||||||
|  |             | self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default() | ||||||
|  |             | self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default(); | ||||||
|  |  | ||||||
|  |         let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000)); | ||||||
|  |  | ||||||
|  |         // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete | ||||||
|  |         //     the deletion tasks we enqueued ourselves. | ||||||
|  |         if to_delete.len() < 2 { | ||||||
|  |             tracing::warn!("The task queue is almost full, but no task can be deleted yet."); | ||||||
|  |             // the only thing we can do is hope that the user tasks are going to finish | ||||||
|  |             return Ok(()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         tracing::info!( | ||||||
|  |             "The task queue is almost full. Deleting the oldest {} finished tasks.", | ||||||
|  |             to_delete.len() | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         // it's safe to unwrap here because we checked the len above | ||||||
|  |         let newest_task_id = to_delete.iter().last().unwrap(); | ||||||
|  |         let last_task_to_delete = | ||||||
|  |             self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |  | ||||||
|  |         // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. | ||||||
|  |         let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); | ||||||
|  |  | ||||||
|  |         self.register( | ||||||
|  |             wtxn, | ||||||
|  |             &KindWithContent::TaskDeletion { | ||||||
|  |                 query: format!( | ||||||
|  |                     "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", | ||||||
|  |                     delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, | ||||||
|  |                 ), | ||||||
|  |                 tasks: to_delete, | ||||||
|  |             }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn get_stats( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         processing: &ProcessingTasks, | ||||||
|  |     ) -> Result<BTreeMap<String, BTreeMap<String, u64>>> { | ||||||
|  |         let mut res = BTreeMap::new(); | ||||||
|  |         let processing_tasks = processing.processing.len(); | ||||||
|  |  | ||||||
|  |         res.insert( | ||||||
|  |             "statuses".to_string(), | ||||||
|  |             enum_iterator::all::<Status>() | ||||||
|  |                 .map(|s| { | ||||||
|  |                     let tasks = self.tasks.get_status(rtxn, s)?.len(); | ||||||
|  |                     match s { | ||||||
|  |                         Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)), | ||||||
|  |                         Status::Processing => Ok((s.to_string(), processing_tasks)), | ||||||
|  |                         s => Ok((s.to_string(), tasks)), | ||||||
|  |                     } | ||||||
|  |                 }) | ||||||
|  |                 .collect::<Result<BTreeMap<String, u64>>>()?, | ||||||
|  |         ); | ||||||
|  |         res.insert( | ||||||
|  |             "types".to_string(), | ||||||
|  |             enum_iterator::all::<Kind>() | ||||||
|  |                 .map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len()))) | ||||||
|  |                 .collect::<Result<BTreeMap<String, u64>>>()?, | ||||||
|  |         ); | ||||||
|  |         res.insert( | ||||||
|  |             "indexes".to_string(), | ||||||
|  |             self.tasks | ||||||
|  |                 .index_tasks | ||||||
|  |                 .iter(rtxn)? | ||||||
|  |                 .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?)) | ||||||
|  |                 .collect::<Result<BTreeMap<String, u64>>>()?, | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         Ok(res) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/batches_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/tasks_test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| [ | [ | ||||||
|   { |   { | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| [ | [ | ||||||
|   { |   { | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| [ | [ | ||||||
|   { |   { | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| [ | [ | ||||||
|   { |   { | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| [ | [ | ||||||
|   { |   { | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| [ | [ | ||||||
|   { |   { | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/queue/test.rs | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| [ | [ | ||||||
|   { |   { | ||||||
							
								
								
									
										514
									
								
								crates/index-scheduler/src/queue/tasks.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										514
									
								
								crates/index-scheduler/src/queue/tasks.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,514 @@ | |||||||
|  | use std::ops::{Bound, RangeBounds}; | ||||||
|  |  | ||||||
|  | use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; | ||||||
|  | use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; | ||||||
|  | use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; | ||||||
|  | use meilisearch_types::tasks::{Kind, Status, Task}; | ||||||
|  | use roaring::{MultiOps, RoaringBitmap}; | ||||||
|  | use time::OffsetDateTime; | ||||||
|  |  | ||||||
|  | use super::{Query, Queue}; | ||||||
|  | use crate::processing::ProcessingTasks; | ||||||
|  | use crate::utils::{self, insert_task_datetime, keep_ids_within_datetimes, map_bound}; | ||||||
|  | use crate::{Error, Result, TaskId, BEI128}; | ||||||
|  |  | ||||||
|  | /// Database const names for the `IndexScheduler`. | ||||||
|  | mod db_name { | ||||||
|  |     pub const ALL_TASKS: &str = "all-tasks"; | ||||||
|  |     pub const STATUS: &str = "status"; | ||||||
|  |     pub const KIND: &str = "kind"; | ||||||
|  |     pub const INDEX_TASKS: &str = "index-tasks"; | ||||||
|  |     pub const CANCELED_BY: &str = "canceled_by"; | ||||||
|  |     pub const ENQUEUED_AT: &str = "enqueued-at"; | ||||||
|  |     pub const STARTED_AT: &str = "started-at"; | ||||||
|  |     pub const FINISHED_AT: &str = "finished-at"; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct TaskQueue { | ||||||
|  |     /// The main database, it contains all the tasks accessible by their Id. | ||||||
|  |     pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>, | ||||||
|  |  | ||||||
|  |     /// All the tasks ids grouped by their status. | ||||||
|  |     // TODO we should not be able to serialize a `Status::Processing` in this database. | ||||||
|  |     pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>, | ||||||
|  |     /// All the tasks ids grouped by their kind. | ||||||
|  |     pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>, | ||||||
|  |     /// Store the tasks associated to an index. | ||||||
|  |     pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>, | ||||||
|  |     /// Store the tasks that were canceled by a task uid | ||||||
|  |     pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>, | ||||||
|  |     /// Store the task ids of tasks which were enqueued at a specific date | ||||||
|  |     pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>, | ||||||
|  |     /// Store the task ids of finished tasks which started being processed at a specific date | ||||||
|  |     pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>, | ||||||
|  |     /// Store the task ids of tasks which finished at a specific date | ||||||
|  |     pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl TaskQueue { | ||||||
|  |     pub(crate) fn private_clone(&self) -> TaskQueue { | ||||||
|  |         TaskQueue { | ||||||
|  |             all_tasks: self.all_tasks, | ||||||
|  |             status: self.status, | ||||||
|  |             kind: self.kind, | ||||||
|  |             index_tasks: self.index_tasks, | ||||||
|  |             canceled_by: self.canceled_by, | ||||||
|  |             enqueued_at: self.enqueued_at, | ||||||
|  |             started_at: self.started_at, | ||||||
|  |             finished_at: self.finished_at, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { | ||||||
|  |         Ok(Self { | ||||||
|  |             all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?, | ||||||
|  |             status: env.create_database(wtxn, Some(db_name::STATUS))?, | ||||||
|  |             kind: env.create_database(wtxn, Some(db_name::KIND))?, | ||||||
|  |             index_tasks: env.create_database(wtxn, Some(db_name::INDEX_TASKS))?, | ||||||
|  |             canceled_by: env.create_database(wtxn, Some(db_name::CANCELED_BY))?, | ||||||
|  |             enqueued_at: env.create_database(wtxn, Some(db_name::ENQUEUED_AT))?, | ||||||
|  |             started_at: env.create_database(wtxn, Some(db_name::STARTED_AT))?, | ||||||
|  |             finished_at: env.create_database(wtxn, Some(db_name::FINISHED_AT))?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> { | ||||||
|  |         Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> { | ||||||
|  |         Ok(self.last_task_id(rtxn)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> { | ||||||
|  |         enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> { | ||||||
|  |         Ok(self.all_tasks.get(rtxn, &task_id)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { | ||||||
|  |         let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |  | ||||||
|  |         debug_assert!(old_task != *task); | ||||||
|  |         debug_assert_eq!(old_task.uid, task.uid); | ||||||
|  |         debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some()); | ||||||
|  |  | ||||||
|  |         if old_task.status != task.status { | ||||||
|  |             self.update_status(wtxn, old_task.status, |bitmap| { | ||||||
|  |                 bitmap.remove(task.uid); | ||||||
|  |             })?; | ||||||
|  |             self.update_status(wtxn, task.status, |bitmap| { | ||||||
|  |                 bitmap.insert(task.uid); | ||||||
|  |             })?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if old_task.kind.as_kind() != task.kind.as_kind() { | ||||||
|  |             self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| { | ||||||
|  |                 bitmap.remove(task.uid); | ||||||
|  |             })?; | ||||||
|  |             self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { | ||||||
|  |                 bitmap.insert(task.uid); | ||||||
|  |             })?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         assert_eq!( | ||||||
|  |             old_task.enqueued_at, task.enqueued_at, | ||||||
|  |             "Cannot update a task's enqueued_at time" | ||||||
|  |         ); | ||||||
|  |         if old_task.started_at != task.started_at { | ||||||
|  |             assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time"); | ||||||
|  |             if let Some(started_at) = task.started_at { | ||||||
|  |                 insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         if old_task.finished_at != task.finished_at { | ||||||
|  |             assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time"); | ||||||
|  |             if let Some(finished_at) = task.finished_at { | ||||||
|  |                 insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         self.all_tasks.put(wtxn, &task.uid, task)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns the whole set of tasks that belongs to this index. | ||||||
|  |     pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> { | ||||||
|  |         Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn update_index( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         index: &str, | ||||||
|  |         f: impl Fn(&mut RoaringBitmap), | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         let mut tasks = self.index_tasks(wtxn, index)?; | ||||||
|  |         f(&mut tasks); | ||||||
|  |         if tasks.is_empty() { | ||||||
|  |             self.index_tasks.delete(wtxn, index)?; | ||||||
|  |         } else { | ||||||
|  |             self.index_tasks.put(wtxn, index, &tasks)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> { | ||||||
|  |         Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_status( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         status: Status, | ||||||
|  |         bitmap: &RoaringBitmap, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         Ok(self.status.put(wtxn, &status, bitmap)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn update_status( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         status: Status, | ||||||
|  |         f: impl Fn(&mut RoaringBitmap), | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         let mut tasks = self.get_status(wtxn, status)?; | ||||||
|  |         f(&mut tasks); | ||||||
|  |         self.put_status(wtxn, status, &tasks)?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> { | ||||||
|  |         Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_kind( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         kind: Kind, | ||||||
|  |         bitmap: &RoaringBitmap, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         Ok(self.kind.put(wtxn, &kind, bitmap)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn update_kind( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         kind: Kind, | ||||||
|  |         f: impl Fn(&mut RoaringBitmap), | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         let mut tasks = self.get_kind(wtxn, kind)?; | ||||||
|  |         f(&mut tasks); | ||||||
|  |         self.put_kind(wtxn, kind, &tasks)?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a | ||||||
|  |     /// `CorruptedTaskQueue` error will be thrown. | ||||||
|  |     pub(crate) fn get_existing_tasks( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         tasks: impl IntoIterator<Item = TaskId>, | ||||||
|  |     ) -> Result<Vec<Task>> { | ||||||
|  |         tasks | ||||||
|  |             .into_iter() | ||||||
|  |             .map(|task_id| { | ||||||
|  |                 self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) | ||||||
|  |             }) | ||||||
|  |             .collect::<Result<_>>() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn register(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { | ||||||
|  |         self.all_tasks.put(wtxn, &task.uid, task)?; | ||||||
|  |  | ||||||
|  |         for index in task.indexes() { | ||||||
|  |             self.update_index(wtxn, index, |bitmap| { | ||||||
|  |                 bitmap.insert(task.uid); | ||||||
|  |             })?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         self.update_status(wtxn, Status::Enqueued, |bitmap| { | ||||||
|  |             bitmap.insert(task.uid); | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { | ||||||
|  |             bitmap.insert(task.uid); | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         utils::insert_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Queue { | ||||||
|  |     /// Return the task ids matched by the given query from the index scheduler's point of view. | ||||||
|  |     pub(crate) fn get_task_ids( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         query: &Query, | ||||||
|  |         processing_tasks: &ProcessingTasks, | ||||||
|  |     ) -> Result<RoaringBitmap> { | ||||||
|  |         let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } = | ||||||
|  |             processing_tasks; | ||||||
|  |         let Query { | ||||||
|  |             limit, | ||||||
|  |             from, | ||||||
|  |             reverse, | ||||||
|  |             uids, | ||||||
|  |             batch_uids, | ||||||
|  |             statuses, | ||||||
|  |             types, | ||||||
|  |             index_uids, | ||||||
|  |             canceled_by, | ||||||
|  |             before_enqueued_at, | ||||||
|  |             after_enqueued_at, | ||||||
|  |             before_started_at, | ||||||
|  |             after_started_at, | ||||||
|  |             before_finished_at, | ||||||
|  |             after_finished_at, | ||||||
|  |         } = query; | ||||||
|  |  | ||||||
|  |         let mut tasks = self.tasks.all_task_ids(rtxn)?; | ||||||
|  |  | ||||||
|  |         if let Some(from) = from { | ||||||
|  |             let range = if reverse.unwrap_or_default() { | ||||||
|  |                 u32::MIN..*from | ||||||
|  |             } else { | ||||||
|  |                 from.saturating_add(1)..u32::MAX | ||||||
|  |             }; | ||||||
|  |             tasks.remove_range(range); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(batch_uids) = batch_uids { | ||||||
|  |             let mut batch_tasks = RoaringBitmap::new(); | ||||||
|  |             for batch_uid in batch_uids { | ||||||
|  |                 if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { | ||||||
|  |                     batch_tasks |= &**processing_tasks; | ||||||
|  |                 } else { | ||||||
|  |                     batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             tasks &= batch_tasks; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(status) = statuses { | ||||||
|  |             let mut status_tasks = RoaringBitmap::new(); | ||||||
|  |             for status in status { | ||||||
|  |                 match status { | ||||||
|  |                     // special case for Processing tasks | ||||||
|  |                     Status::Processing => { | ||||||
|  |                         status_tasks |= &**processing_tasks; | ||||||
|  |                     } | ||||||
|  |                     status => status_tasks |= &self.tasks.get_status(rtxn, *status)?, | ||||||
|  |                 }; | ||||||
|  |             } | ||||||
|  |             if !status.contains(&Status::Processing) { | ||||||
|  |                 tasks -= &**processing_tasks; | ||||||
|  |             } | ||||||
|  |             tasks &= status_tasks; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(uids) = uids { | ||||||
|  |             let uids = RoaringBitmap::from_iter(uids); | ||||||
|  |             tasks &= &uids; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(canceled_by) = canceled_by { | ||||||
|  |             let mut all_canceled_tasks = RoaringBitmap::new(); | ||||||
|  |             for cancel_task_uid in canceled_by { | ||||||
|  |                 if let Some(canceled_by_uid) = self.tasks.canceled_by.get(rtxn, cancel_task_uid)? { | ||||||
|  |                     all_canceled_tasks |= canceled_by_uid; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // if the canceled_by has been specified but no task | ||||||
|  |             // matches then we prefer matching zero than all tasks. | ||||||
|  |             if all_canceled_tasks.is_empty() { | ||||||
|  |                 return Ok(RoaringBitmap::new()); | ||||||
|  |             } else { | ||||||
|  |                 tasks &= all_canceled_tasks; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(kind) = types { | ||||||
|  |             let mut kind_tasks = RoaringBitmap::new(); | ||||||
|  |             for kind in kind { | ||||||
|  |                 kind_tasks |= self.tasks.get_kind(rtxn, *kind)?; | ||||||
|  |             } | ||||||
|  |             tasks &= &kind_tasks; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(index) = index_uids { | ||||||
|  |             let mut index_tasks = RoaringBitmap::new(); | ||||||
|  |             for index in index { | ||||||
|  |                 index_tasks |= self.tasks.index_tasks(rtxn, index)?; | ||||||
|  |             } | ||||||
|  |             tasks &= &index_tasks; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // For the started_at filter, we need to treat the part of the tasks that are processing from the part of the | ||||||
|  |         // tasks that are not processing. The non-processing ones are filtered normally while the processing ones | ||||||
|  |         // are entirely removed unless the in-memory startedAt variable falls within the date filter. | ||||||
|  |         // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. | ||||||
|  |         tasks = { | ||||||
|  |             let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = | ||||||
|  |                 (&tasks - &**processing_tasks, &tasks & &**processing_tasks); | ||||||
|  |  | ||||||
|  |             // special case for Processing tasks | ||||||
|  |             // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds | ||||||
|  |             let mut clear_filtered_processing_tasks = | ||||||
|  |                 |start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| { | ||||||
|  |                     let start = map_bound(start, |b| b.unix_timestamp_nanos()); | ||||||
|  |                     let end = map_bound(end, |b| b.unix_timestamp_nanos()); | ||||||
|  |                     let is_within_dates = RangeBounds::contains( | ||||||
|  |                         &(start, end), | ||||||
|  |                         &processing_batch | ||||||
|  |                             .as_ref() | ||||||
|  |                             .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) | ||||||
|  |                             .unix_timestamp_nanos(), | ||||||
|  |                     ); | ||||||
|  |                     if !is_within_dates { | ||||||
|  |                         filtered_processing_tasks.clear(); | ||||||
|  |                     } | ||||||
|  |                 }; | ||||||
|  |             match (after_started_at, before_started_at) { | ||||||
|  |                 (None, None) => (), | ||||||
|  |                 (None, Some(before)) => { | ||||||
|  |                     clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before)) | ||||||
|  |                 } | ||||||
|  |                 (Some(after), None) => { | ||||||
|  |                     clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded) | ||||||
|  |                 } | ||||||
|  |                 (Some(after), Some(before)) => clear_filtered_processing_tasks( | ||||||
|  |                     Bound::Excluded(*after), | ||||||
|  |                     Bound::Excluded(*before), | ||||||
|  |                 ), | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             keep_ids_within_datetimes( | ||||||
|  |                 rtxn, | ||||||
|  |                 &mut filtered_non_processing_tasks, | ||||||
|  |                 self.tasks.started_at, | ||||||
|  |                 *after_started_at, | ||||||
|  |                 *before_started_at, | ||||||
|  |             )?; | ||||||
|  |             filtered_non_processing_tasks | filtered_processing_tasks | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         keep_ids_within_datetimes( | ||||||
|  |             rtxn, | ||||||
|  |             &mut tasks, | ||||||
|  |             self.tasks.enqueued_at, | ||||||
|  |             *after_enqueued_at, | ||||||
|  |             *before_enqueued_at, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         keep_ids_within_datetimes( | ||||||
|  |             rtxn, | ||||||
|  |             &mut tasks, | ||||||
|  |             self.tasks.finished_at, | ||||||
|  |             *after_finished_at, | ||||||
|  |             *before_finished_at, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         if let Some(limit) = limit { | ||||||
|  |             tasks = if query.reverse.unwrap_or_default() { | ||||||
|  |                 tasks.into_iter().take(*limit as usize).collect() | ||||||
|  |             } else { | ||||||
|  |                 tasks.into_iter().rev().take(*limit as usize).collect() | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(tasks) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_task_ids_from_authorized_indexes( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         query: &Query, | ||||||
|  |         filters: &meilisearch_auth::AuthFilter, | ||||||
|  |         processing_tasks: &ProcessingTasks, | ||||||
|  |     ) -> Result<(RoaringBitmap, u64)> { | ||||||
|  |         // compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching | ||||||
|  |         // the filter. | ||||||
|  |         // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares | ||||||
|  |         // us from modifying the underlying implementation, and the performance remains sufficient. | ||||||
|  |         // Should this change, we would modify `get_task_ids` to directly return the number of matching tasks. | ||||||
|  |         let total_tasks = | ||||||
|  |             self.get_task_ids(rtxn, &query.clone().without_limits(), processing_tasks)?; | ||||||
|  |         let mut tasks = self.get_task_ids(rtxn, query, processing_tasks)?; | ||||||
|  |  | ||||||
|  |         // If the query contains a list of index uid or there is a finite list of authorized indexes, | ||||||
|  |         // then we must exclude all the kinds that aren't associated to one and only one index. | ||||||
|  |         if query.index_uids.is_some() || !filters.all_indexes_authorized() { | ||||||
|  |             for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) { | ||||||
|  |                 tasks -= self.tasks.get_kind(rtxn, kind)?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Any task that is internally associated with a non-authorized index | ||||||
|  |         // must be discarded. | ||||||
|  |         if !filters.all_indexes_authorized() { | ||||||
|  |             let all_indexes_iter = self.tasks.index_tasks.iter(rtxn)?; | ||||||
|  |             for result in all_indexes_iter { | ||||||
|  |                 let (index, index_tasks) = result?; | ||||||
|  |                 if !filters.is_index_authorized(index) { | ||||||
|  |                     tasks -= index_tasks; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok((tasks, total_tasks.len())) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn get_tasks_from_authorized_indexes( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         query: &Query, | ||||||
|  |         filters: &meilisearch_auth::AuthFilter, | ||||||
|  |         processing_tasks: &ProcessingTasks, | ||||||
|  |     ) -> Result<(Vec<Task>, u64)> { | ||||||
|  |         let (tasks, total) = | ||||||
|  |             self.get_task_ids_from_authorized_indexes(rtxn, query, filters, processing_tasks)?; | ||||||
|  |         let tasks = if query.reverse.unwrap_or_default() { | ||||||
|  |             Box::new(tasks.into_iter()) as Box<dyn Iterator<Item = u32>> | ||||||
|  |         } else { | ||||||
|  |             Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>> | ||||||
|  |         }; | ||||||
|  |         let tasks = self | ||||||
|  |             .tasks | ||||||
|  |             .get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?; | ||||||
|  |  | ||||||
|  |         let ProcessingTasks { batch, processing, progress: _ } = processing_tasks; | ||||||
|  |  | ||||||
|  |         let ret = tasks.into_iter(); | ||||||
|  |         if processing.is_empty() || batch.is_none() { | ||||||
|  |             Ok((ret.collect(), total)) | ||||||
|  |         } else { | ||||||
|  |             // Safe because we ensured there was a batch in the previous branch | ||||||
|  |             let batch = batch.as_ref().unwrap(); | ||||||
|  |             Ok(( | ||||||
|  |                 ret.map(|task| { | ||||||
|  |                     if processing.contains(task.uid) { | ||||||
|  |                         Task { | ||||||
|  |                             status: Status::Processing, | ||||||
|  |                             batch_uid: Some(batch.uid), | ||||||
|  |                             started_at: Some(batch.started_at), | ||||||
|  |                             ..task | ||||||
|  |                         } | ||||||
|  |                     } else { | ||||||
|  |                         task | ||||||
|  |                     } | ||||||
|  |                 }) | ||||||
|  |                 .collect(), | ||||||
|  |                 total, | ||||||
|  |             )) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										441
									
								
								crates/index-scheduler/src/queue/tasks_test.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										441
									
								
								crates/index-scheduler/src/queue/tasks_test.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,441 @@ | |||||||
|  | use meili_snap::snapshot; | ||||||
|  | use meilisearch_auth::AuthFilter; | ||||||
|  | use meilisearch_types::index_uid_pattern::IndexUidPattern; | ||||||
|  | use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status}; | ||||||
|  | use time::{Duration, OffsetDateTime}; | ||||||
|  |  | ||||||
|  | use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; | ||||||
|  | use crate::test_utils::Breakpoint::*; | ||||||
|  | use crate::test_utils::{index_creation_task, FailureLocation}; | ||||||
|  | use crate::{IndexScheduler, Query}; | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_tasks_from_and_limit() { | ||||||
|  |     let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("doggo", "bone"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); | ||||||
|  |     let kind = index_creation_task("whalo", "plankton"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); | ||||||
|  |     let kind = index_creation_task("catto", "his_own_vomit"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); | ||||||
|  |  | ||||||
|  |     handle.advance_n_successful_batches(3); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); | ||||||
|  |  | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let processing = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let query = Query { limit: Some(0), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { limit: Some(1), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { limit: Some(2), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(1), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(2), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_tasks_simple() { | ||||||
|  |     let start_time = OffsetDateTime::now_utc(); | ||||||
|  |  | ||||||
|  |     let (index_scheduler, mut handle) = | ||||||
|  |         IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("catto", "mouse"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("doggo", "sheep"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("whalo", "fish"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |  | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([Start, BatchCreated]); | ||||||
|  |  | ||||||
|  |     let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick | ||||||
|  |  | ||||||
|  |     let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick | ||||||
|  |  | ||||||
|  |     let query = | ||||||
|  |         Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Processing]), | ||||||
|  |         after_started_at: Some(start_time), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both enqueued and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the test, which should excludes the enqueued tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Processing]), | ||||||
|  |         before_started_at: Some(start_time), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both enqueued and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes before the start of the test, which should excludes all of them | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Processing]), | ||||||
|  |         after_started_at: Some(start_time), | ||||||
|  |         before_started_at: Some(start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both enqueued and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the test and before one minute after the start of the test, | ||||||
|  |     // which should exclude the enqueued tasks and include the only processing task | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,]"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([ | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         ProcessBatchSucceeded, | ||||||
|  |         AfterProcessing, | ||||||
|  |         Start, | ||||||
|  |         BatchCreated, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let second_start_time = OffsetDateTime::now_utc(); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Succeeded, Status::Processing]), | ||||||
|  |         after_started_at: Some(start_time), | ||||||
|  |         before_started_at: Some(start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both succeeded and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the test and before one minute after the start of the test, | ||||||
|  |     // which should include all tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Succeeded, Status::Processing]), | ||||||
|  |         before_started_at: Some(start_time), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both succeeded and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes before the start of the test, which should exclude all tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // both succeeded and processing tasks in the first tick, but limited to those with a started_at | ||||||
|  |     // that comes after the start of the second part of the test and before one minute after the | ||||||
|  |     // second start of the test, which should exclude all tasks | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[]"); | ||||||
|  |  | ||||||
|  |     // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` | ||||||
|  |     handle.advance_till([ | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         InsideProcessBatch, | ||||||
|  |         ProcessBatchSucceeded, | ||||||
|  |         AfterProcessing, | ||||||
|  |         Start, | ||||||
|  |         BatchCreated, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we run the same query to verify that, and indeed find that the last task is matched | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // enqueued, succeeded, or processing tasks started after the second part of the test, should | ||||||
|  |     // again only return the last task | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[2,]"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([ProcessBatchFailed, AfterProcessing]); | ||||||
|  |  | ||||||
|  |     // now the last task should have failed | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // so running the last query should return nothing | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // but the same query on failed tasks should return the last task | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // but the same query on failed tasks should return the last task | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         uids: Some(vec![1]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // same query but with an invalid uid | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query { | ||||||
|  |         statuses: Some(vec![Status::Failed]), | ||||||
|  |         uids: Some(vec![2]), | ||||||
|  |         after_started_at: Some(second_start_time), | ||||||
|  |         before_started_at: Some(second_start_time + Duration::minutes(1)), | ||||||
|  |         ..Default::default() | ||||||
|  |     }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) | ||||||
|  |         .unwrap(); | ||||||
|  |     // same query but with a valid uid | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[2,]"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_tasks_special_rules() { | ||||||
|  |     let (index_scheduler, mut handle) = | ||||||
|  |         IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("catto", "mouse"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("doggo", "sheep"); | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = KindWithContent::IndexSwap { | ||||||
|  |         swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], | ||||||
|  |     }; | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = KindWithContent::IndexSwap { | ||||||
|  |         swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], | ||||||
|  |     }; | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |  | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); | ||||||
|  |  | ||||||
|  |     handle.advance_till([Start, BatchCreated]); | ||||||
|  |  | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |  | ||||||
|  |     let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     // only the first task associated with catto is returned, the indexSwap tasks are excluded! | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes( | ||||||
|  |             &rtxn, | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), | ||||||
|  |             ), | ||||||
|  |             &proc, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks | ||||||
|  |     // associated with doggo -> empty result | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[]"); | ||||||
|  |  | ||||||
|  |     let query = Query::default(); | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes( | ||||||
|  |             &rtxn, | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), | ||||||
|  |             ), | ||||||
|  |             &proc, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks | ||||||
|  |     // -> only the index creation of doggo should be returned | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query::default(); | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes( | ||||||
|  |             &rtxn, | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![ | ||||||
|  |                     IndexUidPattern::new_unchecked("catto"), | ||||||
|  |                     IndexUidPattern::new_unchecked("doggo"), | ||||||
|  |                 ] | ||||||
|  |                 .into_iter() | ||||||
|  |                 .collect(), | ||||||
|  |             ), | ||||||
|  |             &proc, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks | ||||||
|  |     // -> all tasks except the swap of catto with whalo are returned | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); | ||||||
|  |  | ||||||
|  |     let query = Query::default(); | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     // we asked for all the tasks with all index authorized -> all tasks returned | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn query_tasks_canceled_by() { | ||||||
|  |     let (index_scheduler, mut handle) = | ||||||
|  |         IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); | ||||||
|  |  | ||||||
|  |     let kind = index_creation_task("catto", "mouse"); | ||||||
|  |     let _ = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = index_creation_task("doggo", "sheep"); | ||||||
|  |     let _ = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     let kind = KindWithContent::IndexSwap { | ||||||
|  |         swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], | ||||||
|  |     }; | ||||||
|  |     let _task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |  | ||||||
|  |     handle.advance_n_successful_batches(1); | ||||||
|  |     let kind = KindWithContent::TaskCancelation { | ||||||
|  |         query: "test_query".to_string(), | ||||||
|  |         tasks: [0, 1, 2, 3].into_iter().collect(), | ||||||
|  |     }; | ||||||
|  |     let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     handle.advance_n_successful_batches(1); | ||||||
|  |  | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); | ||||||
|  |  | ||||||
|  |     let rtxn = index_scheduler.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) | ||||||
|  |         .unwrap(); | ||||||
|  |     // 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the | ||||||
|  |     // taskCancelation itself | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); | ||||||
|  |  | ||||||
|  |     let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; | ||||||
|  |     let (tasks, _) = index_scheduler | ||||||
|  |         .queue | ||||||
|  |         .get_task_ids_from_authorized_indexes( | ||||||
|  |             &rtxn, | ||||||
|  |             &query, | ||||||
|  |             &AuthFilter::with_allowed_indexes( | ||||||
|  |                 vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), | ||||||
|  |             ), | ||||||
|  |             &proc, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     // Return only 1 because the user is not authorized to see task 2 | ||||||
|  |     snapshot!(snapshot_bitmap(&tasks), @"[1,]"); | ||||||
|  | } | ||||||
							
								
								
									
										395
									
								
								crates/index-scheduler/src/queue/test.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										395
									
								
								crates/index-scheduler/src/queue/test.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,395 @@ | |||||||
|  | use big_s::S; | ||||||
|  | use meili_snap::{json_string, snapshot}; | ||||||
|  | use meilisearch_types::error::ErrorCode; | ||||||
|  | use meilisearch_types::tasks::{KindWithContent, Status}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|  | use crate::insta_snapshot::snapshot_index_scheduler; | ||||||
|  | use crate::test_utils::Breakpoint::*; | ||||||
|  | use crate::test_utils::{index_creation_task, replace_document_import_task}; | ||||||
|  | use crate::{IndexScheduler, Query}; | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn register() { | ||||||
|  |     // In this test, the handle doesn't make any progress, we only check that the tasks are registered | ||||||
|  |     let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |     let kinds = [ | ||||||
|  |         index_creation_task("catto", "mouse"), | ||||||
|  |         replace_document_import_task("catto", None, 0, 12), | ||||||
|  |         replace_document_import_task("catto", None, 1, 50), | ||||||
|  |         replace_document_import_task("doggo", Some("bone"), 2, 5000), | ||||||
|  |     ]; | ||||||
|  |     let (_, file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); | ||||||
|  |     file.persist().unwrap(); | ||||||
|  |     let (_, file) = index_scheduler.queue.create_update_file_with_uuid(1).unwrap(); | ||||||
|  |     file.persist().unwrap(); | ||||||
|  |     let (_, file) = index_scheduler.queue.create_update_file_with_uuid(2).unwrap(); | ||||||
|  |     file.persist().unwrap(); | ||||||
|  |  | ||||||
|  |     for (idx, kind) in kinds.into_iter().enumerate() { | ||||||
|  |         let k = kind.as_kind(); | ||||||
|  |         let task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |         index_scheduler.assert_internally_consistent(); | ||||||
|  |  | ||||||
|  |         assert_eq!(task.uid, idx as u32); | ||||||
|  |         assert_eq!(task.status, Status::Enqueued); | ||||||
|  |         assert_eq!(task.kind.as_kind(), k); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn dry_run() { | ||||||
|  |     let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |     let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; | ||||||
|  |     let task = index_scheduler.register(kind, None, true).unwrap(); | ||||||
|  |     snapshot!(task.uid, @"0"); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), @r" | ||||||
|  |         ### Autobatching Enabled = true | ||||||
|  |         ### Processing batch None: | ||||||
|  |         [] | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### All Tasks: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Status: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Kind: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Index Tasks: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Index Mapper: | ||||||
|  |  | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Canceled By: | ||||||
|  |  | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Enqueued At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Started At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Finished At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### All Batches: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batch to tasks mapping: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Status: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Kind: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Index Tasks: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Enqueued At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Started At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Finished At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### File Store: | ||||||
|  |  | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         "); | ||||||
|  |  | ||||||
|  |     let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; | ||||||
|  |     let task = index_scheduler.register(kind, Some(12), true).unwrap(); | ||||||
|  |     snapshot!(task.uid, @"12"); | ||||||
|  |     snapshot!(snapshot_index_scheduler(&index_scheduler), @r" | ||||||
|  |         ### Autobatching Enabled = true | ||||||
|  |         ### Processing batch None: | ||||||
|  |         [] | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### All Tasks: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Status: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Kind: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Index Tasks: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Index Mapper: | ||||||
|  |  | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Canceled By: | ||||||
|  |  | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Enqueued At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Started At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Finished At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### All Batches: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batch to tasks mapping: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Status: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Kind: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Index Tasks: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Enqueued At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Started At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### Batches Finished At: | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         ### File Store: | ||||||
|  |  | ||||||
|  |         ---------------------------------------------------------------------- | ||||||
|  |         "); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn basic_set_taskid() { | ||||||
|  |     let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |     let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; | ||||||
|  |     let task = index_scheduler.register(kind, None, false).unwrap(); | ||||||
|  |     snapshot!(task.uid, @"0"); | ||||||
|  |  | ||||||
|  |     let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; | ||||||
|  |     let task = index_scheduler.register(kind, Some(12), false).unwrap(); | ||||||
|  |     snapshot!(task.uid, @"12"); | ||||||
|  |  | ||||||
|  |     let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; | ||||||
|  |     let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); | ||||||
|  |     snapshot!(error, @"Received bad task id: 5 should be >= to 13."); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn test_disable_auto_deletion_of_tasks() { | ||||||
|  |     let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { | ||||||
|  |         config.cleanup_enabled = false; | ||||||
|  |         config.max_number_of_tasks = 2; | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     handle.advance_one_failed_batch(); | ||||||
|  |  | ||||||
|  |     // at this point the max number of tasks is reached | ||||||
|  |     // we can still enqueue multiple tasks | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let tasks = | ||||||
|  |         index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); | ||||||
|  |     let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); | ||||||
|  |     snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); | ||||||
|  |     drop(rtxn); | ||||||
|  |     drop(proc); | ||||||
|  |  | ||||||
|  |     // now we're above the max number of tasks | ||||||
|  |     // and if we try to advance in the tick function no new task deletion should be enqueued | ||||||
|  |     handle.advance_till([Start, BatchCreated]); | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let tasks = | ||||||
|  |         index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); | ||||||
|  |     let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); | ||||||
|  |     snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); | ||||||
|  |     drop(rtxn); | ||||||
|  |     drop(proc); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn test_auto_deletion_of_tasks() { | ||||||
|  |     let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { | ||||||
|  |         config.max_number_of_tasks = 2; | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     handle.advance_one_failed_batch(); | ||||||
|  |  | ||||||
|  |     // at this point the max number of tasks is reached | ||||||
|  |     // we can still enqueue multiple tasks | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let tasks = | ||||||
|  |         index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); | ||||||
|  |     let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); | ||||||
|  |     snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); | ||||||
|  |     drop(rtxn); | ||||||
|  |     drop(proc); | ||||||
|  |  | ||||||
|  |     // now we're above the max number of tasks | ||||||
|  |     // and if we try to advance in the tick function a new task deletion should be enqueued | ||||||
|  |     handle.advance_till([Start, BatchCreated]); | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let tasks = | ||||||
|  |         index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); | ||||||
|  |     let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); | ||||||
|  |     snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); | ||||||
|  |     drop(rtxn); | ||||||
|  |     drop(proc); | ||||||
|  |  | ||||||
|  |     handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let tasks = | ||||||
|  |         index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); | ||||||
|  |     let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); | ||||||
|  |     snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); | ||||||
|  |     drop(rtxn); | ||||||
|  |     drop(proc); | ||||||
|  |  | ||||||
|  |     handle.advance_one_failed_batch(); | ||||||
|  |     // a new task deletion has been enqueued | ||||||
|  |     handle.advance_one_successful_batch(); | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let tasks = | ||||||
|  |         index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); | ||||||
|  |     let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); | ||||||
|  |     snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); | ||||||
|  |     drop(rtxn); | ||||||
|  |     drop(proc); | ||||||
|  |  | ||||||
|  |     handle.advance_one_failed_batch(); | ||||||
|  |     handle.advance_one_successful_batch(); | ||||||
|  |     let rtxn = index_scheduler.env.read_txn().unwrap(); | ||||||
|  |     let proc = index_scheduler.processing_tasks.read().unwrap(); | ||||||
|  |     let tasks = | ||||||
|  |         index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); | ||||||
|  |     let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); | ||||||
|  |     snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); | ||||||
|  |     drop(rtxn); | ||||||
|  |     drop(proc); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn test_task_queue_is_full() { | ||||||
|  |     let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { | ||||||
|  |         // that's the minimum map size possible | ||||||
|  |         config.task_db_size = 1048576; | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     handle.advance_one_successful_batch(); | ||||||
|  |     // on average this task takes ~600 bytes | ||||||
|  |     loop { | ||||||
|  |         let result = index_scheduler.register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ); | ||||||
|  |         if result.is_err() { | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         handle.advance_one_failed_batch(); | ||||||
|  |     } | ||||||
|  |     index_scheduler.assert_internally_consistent(); | ||||||
|  |  | ||||||
|  |     // at this point the task DB shoud have reached its limit and we should not be able to register new tasks | ||||||
|  |     let result = index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap_err(); | ||||||
|  |     snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); | ||||||
|  |     // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code | ||||||
|  |     snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); | ||||||
|  |  | ||||||
|  |     // Even the task deletion that doesn't delete anything shouldn't be accepted | ||||||
|  |     let result = index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap_err(); | ||||||
|  |     snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); | ||||||
|  |     // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code | ||||||
|  |     snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); | ||||||
|  |  | ||||||
|  |     // But a task deletion that delete something should works | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |     // Now we should be able to enqueue a few tasks again | ||||||
|  |     index_scheduler | ||||||
|  |         .register( | ||||||
|  |             KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, | ||||||
|  |             None, | ||||||
|  |             false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |     handle.advance_one_failed_batch(); | ||||||
|  | } | ||||||
| @@ -519,7 +519,14 @@ mod tests { | |||||||
|     use uuid::Uuid; |     use uuid::Uuid; | ||||||
| 
 | 
 | ||||||
|     use super::*; |     use super::*; | ||||||
|     use crate::debug_snapshot; | 
 | ||||||
|  |     #[macro_export] | ||||||
|  |     macro_rules! debug_snapshot { | ||||||
|  |         ($value:expr, @$snapshot:literal) => {{ | ||||||
|  |             let value = format!("{:?}", $value); | ||||||
|  |             meili_snap::snapshot!(value, @$snapshot); | ||||||
|  |         }}; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     fn autobatch_from( |     fn autobatch_from( | ||||||
|         index_already_exists: bool, |         index_already_exists: bool, | ||||||
							
								
								
									
										530
									
								
								crates/index-scheduler/src/scheduler/create_batch.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										530
									
								
								crates/index-scheduler/src/scheduler/create_batch.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,530 @@ | |||||||
|  | use std::fmt; | ||||||
|  |  | ||||||
|  | use meilisearch_types::heed::RoTxn; | ||||||
|  | use meilisearch_types::milli::update::IndexDocumentsMethod; | ||||||
|  | use meilisearch_types::settings::{Settings, Unchecked}; | ||||||
|  | use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  | use uuid::Uuid; | ||||||
|  |  | ||||||
|  | use super::autobatcher::{self, BatchKind}; | ||||||
|  | use crate::utils::ProcessingBatch; | ||||||
|  | use crate::{Error, IndexScheduler, Result}; | ||||||
|  |  | ||||||
|  | /// Represents a combination of tasks that can all be processed at the same time. | ||||||
|  | /// | ||||||
|  | /// A batch contains the set of tasks that it represents (accessible through | ||||||
|  | /// [`self.ids()`](Batch::ids)), as well as additional information on how to | ||||||
|  | /// be processed. | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub(crate) enum Batch { | ||||||
|  |     TaskCancelation { | ||||||
|  |         /// The task cancelation itself. | ||||||
|  |         task: Task, | ||||||
|  |     }, | ||||||
|  |     TaskDeletions(Vec<Task>), | ||||||
|  |     SnapshotCreation(Vec<Task>), | ||||||
|  |     Dump(Task), | ||||||
|  |     IndexOperation { | ||||||
|  |         op: IndexOperation, | ||||||
|  |         must_create_index: bool, | ||||||
|  |     }, | ||||||
|  |     IndexCreation { | ||||||
|  |         index_uid: String, | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |         task: Task, | ||||||
|  |     }, | ||||||
|  |     IndexUpdate { | ||||||
|  |         index_uid: String, | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |         task: Task, | ||||||
|  |     }, | ||||||
|  |     IndexDeletion { | ||||||
|  |         index_uid: String, | ||||||
|  |         tasks: Vec<Task>, | ||||||
|  |         index_has_been_created: bool, | ||||||
|  |     }, | ||||||
|  |     IndexSwap { | ||||||
|  |         task: Task, | ||||||
|  |     }, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub(crate) enum DocumentOperation { | ||||||
|  |     Add(Uuid), | ||||||
|  |     Delete(Vec<String>), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// A [batch](Batch) that combines multiple tasks operating on an index. | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub(crate) enum IndexOperation { | ||||||
|  |     DocumentOperation { | ||||||
|  |         index_uid: String, | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |         method: IndexDocumentsMethod, | ||||||
|  |         operations: Vec<DocumentOperation>, | ||||||
|  |         tasks: Vec<Task>, | ||||||
|  |     }, | ||||||
|  |     DocumentEdition { | ||||||
|  |         index_uid: String, | ||||||
|  |         task: Task, | ||||||
|  |     }, | ||||||
|  |     DocumentDeletion { | ||||||
|  |         index_uid: String, | ||||||
|  |         tasks: Vec<Task>, | ||||||
|  |     }, | ||||||
|  |     DocumentClear { | ||||||
|  |         index_uid: String, | ||||||
|  |         tasks: Vec<Task>, | ||||||
|  |     }, | ||||||
|  |     Settings { | ||||||
|  |         index_uid: String, | ||||||
|  |         // The boolean indicates if it's a settings deletion or creation. | ||||||
|  |         settings: Vec<(bool, Settings<Unchecked>)>, | ||||||
|  |         tasks: Vec<Task>, | ||||||
|  |     }, | ||||||
|  |     DocumentClearAndSetting { | ||||||
|  |         index_uid: String, | ||||||
|  |         cleared_tasks: Vec<Task>, | ||||||
|  |  | ||||||
|  |         // The boolean indicates if it's a settings deletion or creation. | ||||||
|  |         settings: Vec<(bool, Settings<Unchecked>)>, | ||||||
|  |         settings_tasks: Vec<Task>, | ||||||
|  |     }, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Batch { | ||||||
|  |     /// Return the task ids associated with this batch. | ||||||
|  |     pub fn ids(&self) -> RoaringBitmap { | ||||||
|  |         match self { | ||||||
|  |             Batch::TaskCancelation { task, .. } | ||||||
|  |             | Batch::Dump(task) | ||||||
|  |             | Batch::IndexCreation { task, .. } | ||||||
|  |             | Batch::IndexUpdate { task, .. } => { | ||||||
|  |                 RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() | ||||||
|  |             } | ||||||
|  |             Batch::SnapshotCreation(tasks) | ||||||
|  |             | Batch::TaskDeletions(tasks) | ||||||
|  |             | Batch::IndexDeletion { tasks, .. } => { | ||||||
|  |                 RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) | ||||||
|  |             } | ||||||
|  |             Batch::IndexOperation { op, .. } => match op { | ||||||
|  |                 IndexOperation::DocumentOperation { tasks, .. } | ||||||
|  |                 | IndexOperation::Settings { tasks, .. } | ||||||
|  |                 | IndexOperation::DocumentDeletion { tasks, .. } | ||||||
|  |                 | IndexOperation::DocumentClear { tasks, .. } => { | ||||||
|  |                     RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) | ||||||
|  |                 } | ||||||
|  |                 IndexOperation::DocumentEdition { task, .. } => { | ||||||
|  |                     RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() | ||||||
|  |                 } | ||||||
|  |                 IndexOperation::DocumentClearAndSetting { | ||||||
|  |                     cleared_tasks: tasks, | ||||||
|  |                     settings_tasks: other, | ||||||
|  |                     .. | ||||||
|  |                 } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)), | ||||||
|  |             }, | ||||||
|  |             Batch::IndexSwap { task } => { | ||||||
|  |                 RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Return the index UID associated with this batch | ||||||
|  |     pub fn index_uid(&self) -> Option<&str> { | ||||||
|  |         use Batch::*; | ||||||
|  |         match self { | ||||||
|  |             TaskCancelation { .. } | ||||||
|  |             | TaskDeletions(_) | ||||||
|  |             | SnapshotCreation(_) | ||||||
|  |             | Dump(_) | ||||||
|  |             | IndexSwap { .. } => None, | ||||||
|  |             IndexOperation { op, .. } => Some(op.index_uid()), | ||||||
|  |             IndexCreation { index_uid, .. } | ||||||
|  |             | IndexUpdate { index_uid, .. } | ||||||
|  |             | IndexDeletion { index_uid, .. } => Some(index_uid), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for Batch { | ||||||
|  |     /// A text used when we debug the profiling reports. | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         let index_uid = self.index_uid(); | ||||||
|  |         let tasks = self.ids(); | ||||||
|  |         match self { | ||||||
|  |             Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, | ||||||
|  |             Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?, | ||||||
|  |             Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, | ||||||
|  |             Batch::Dump(_) => f.write_str("Dump")?, | ||||||
|  |             Batch::IndexOperation { op, .. } => write!(f, "{op}")?, | ||||||
|  |             Batch::IndexCreation { .. } => f.write_str("IndexCreation")?, | ||||||
|  |             Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, | ||||||
|  |             Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, | ||||||
|  |             Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, | ||||||
|  |         }; | ||||||
|  |         match index_uid { | ||||||
|  |             Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), | ||||||
|  |             None => f.write_fmt(format_args!(" from tasks: {tasks:?}")), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl IndexOperation { | ||||||
|  |     pub fn index_uid(&self) -> &str { | ||||||
|  |         match self { | ||||||
|  |             IndexOperation::DocumentOperation { index_uid, .. } | ||||||
|  |             | IndexOperation::DocumentEdition { index_uid, .. } | ||||||
|  |             | IndexOperation::DocumentDeletion { index_uid, .. } | ||||||
|  |             | IndexOperation::DocumentClear { index_uid, .. } | ||||||
|  |             | IndexOperation::Settings { index_uid, .. } | ||||||
|  |             | IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for IndexOperation { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         match self { | ||||||
|  |             IndexOperation::DocumentOperation { .. } => { | ||||||
|  |                 f.write_str("IndexOperation::DocumentOperation") | ||||||
|  |             } | ||||||
|  |             IndexOperation::DocumentEdition { .. } => { | ||||||
|  |                 f.write_str("IndexOperation::DocumentEdition") | ||||||
|  |             } | ||||||
|  |             IndexOperation::DocumentDeletion { .. } => { | ||||||
|  |                 f.write_str("IndexOperation::DocumentDeletion") | ||||||
|  |             } | ||||||
|  |             IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), | ||||||
|  |             IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), | ||||||
|  |             IndexOperation::DocumentClearAndSetting { .. } => { | ||||||
|  |                 f.write_str("IndexOperation::DocumentClearAndSetting") | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl IndexScheduler { | ||||||
|  |     /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. | ||||||
|  |     /// | ||||||
|  |     /// ## Arguments | ||||||
|  |     /// - `rtxn`: read transaction | ||||||
|  |     /// - `index_uid`: name of the index affected by the operations of the autobatch | ||||||
|  |     /// - `batch`: the result of the autobatcher | ||||||
|  |     pub(crate) fn create_next_batch_index( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         index_uid: String, | ||||||
|  |         batch: BatchKind, | ||||||
|  |         current_batch: &mut ProcessingBatch, | ||||||
|  |         must_create_index: bool, | ||||||
|  |     ) -> Result<Option<Batch>> { | ||||||
|  |         match batch { | ||||||
|  |             BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation { | ||||||
|  |                 op: IndexOperation::DocumentClear { | ||||||
|  |                     tasks: self.queue.get_existing_tasks_for_processing_batch( | ||||||
|  |                         rtxn, | ||||||
|  |                         current_batch, | ||||||
|  |                         ids, | ||||||
|  |                     )?, | ||||||
|  |                     index_uid, | ||||||
|  |                 }, | ||||||
|  |                 must_create_index, | ||||||
|  |             })), | ||||||
|  |             BatchKind::DocumentEdition { id } => { | ||||||
|  |                 let mut task = | ||||||
|  |                     self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |                 current_batch.processing(Some(&mut task)); | ||||||
|  |                 match &task.kind { | ||||||
|  |                     KindWithContent::DocumentEdition { index_uid, .. } => { | ||||||
|  |                         Ok(Some(Batch::IndexOperation { | ||||||
|  |                             op: IndexOperation::DocumentEdition { | ||||||
|  |                                 index_uid: index_uid.clone(), | ||||||
|  |                                 task, | ||||||
|  |                             }, | ||||||
|  |                             must_create_index: false, | ||||||
|  |                         })) | ||||||
|  |                     } | ||||||
|  |                     _ => unreachable!(), | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             BatchKind::DocumentOperation { method, operation_ids, .. } => { | ||||||
|  |                 let tasks = self.queue.get_existing_tasks_for_processing_batch( | ||||||
|  |                     rtxn, | ||||||
|  |                     current_batch, | ||||||
|  |                     operation_ids, | ||||||
|  |                 )?; | ||||||
|  |                 let primary_key = tasks | ||||||
|  |                     .iter() | ||||||
|  |                     .find_map(|task| match task.kind { | ||||||
|  |                         KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => { | ||||||
|  |                             // we want to stop on the first document addition | ||||||
|  |                             Some(primary_key.clone()) | ||||||
|  |                         } | ||||||
|  |                         KindWithContent::DocumentDeletion { .. } => None, | ||||||
|  |                         _ => unreachable!(), | ||||||
|  |                     }) | ||||||
|  |                     .flatten(); | ||||||
|  |  | ||||||
|  |                 let mut operations = Vec::new(); | ||||||
|  |  | ||||||
|  |                 for task in tasks.iter() { | ||||||
|  |                     match task.kind { | ||||||
|  |                         KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => { | ||||||
|  |                             operations.push(DocumentOperation::Add(content_file)); | ||||||
|  |                         } | ||||||
|  |                         KindWithContent::DocumentDeletion { ref documents_ids, .. } => { | ||||||
|  |                             operations.push(DocumentOperation::Delete(documents_ids.clone())); | ||||||
|  |                         } | ||||||
|  |                         _ => unreachable!(), | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(Some(Batch::IndexOperation { | ||||||
|  |                     op: IndexOperation::DocumentOperation { | ||||||
|  |                         index_uid, | ||||||
|  |                         primary_key, | ||||||
|  |                         method, | ||||||
|  |                         operations, | ||||||
|  |                         tasks, | ||||||
|  |                     }, | ||||||
|  |                     must_create_index, | ||||||
|  |                 })) | ||||||
|  |             } | ||||||
|  |             BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => { | ||||||
|  |                 let tasks = self.queue.get_existing_tasks_for_processing_batch( | ||||||
|  |                     rtxn, | ||||||
|  |                     current_batch, | ||||||
|  |                     deletion_ids, | ||||||
|  |                 )?; | ||||||
|  |  | ||||||
|  |                 Ok(Some(Batch::IndexOperation { | ||||||
|  |                     op: IndexOperation::DocumentDeletion { index_uid, tasks }, | ||||||
|  |                     must_create_index, | ||||||
|  |                 })) | ||||||
|  |             } | ||||||
|  |             BatchKind::Settings { settings_ids, .. } => { | ||||||
|  |                 let tasks = self.queue.get_existing_tasks_for_processing_batch( | ||||||
|  |                     rtxn, | ||||||
|  |                     current_batch, | ||||||
|  |                     settings_ids, | ||||||
|  |                 )?; | ||||||
|  |  | ||||||
|  |                 let mut settings = Vec::new(); | ||||||
|  |                 for task in &tasks { | ||||||
|  |                     match task.kind { | ||||||
|  |                         KindWithContent::SettingsUpdate { | ||||||
|  |                             ref new_settings, is_deletion, .. | ||||||
|  |                         } => settings.push((is_deletion, *new_settings.clone())), | ||||||
|  |                         _ => unreachable!(), | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(Some(Batch::IndexOperation { | ||||||
|  |                     op: IndexOperation::Settings { index_uid, settings, tasks }, | ||||||
|  |                     must_create_index, | ||||||
|  |                 })) | ||||||
|  |             } | ||||||
|  |             BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => { | ||||||
|  |                 let (index_uid, settings, settings_tasks) = match self | ||||||
|  |                     .create_next_batch_index( | ||||||
|  |                         rtxn, | ||||||
|  |                         index_uid, | ||||||
|  |                         BatchKind::Settings { settings_ids, allow_index_creation }, | ||||||
|  |                         current_batch, | ||||||
|  |                         must_create_index, | ||||||
|  |                     )? | ||||||
|  |                     .unwrap() | ||||||
|  |                 { | ||||||
|  |                     Batch::IndexOperation { | ||||||
|  |                         op: IndexOperation::Settings { index_uid, settings, tasks, .. }, | ||||||
|  |                         .. | ||||||
|  |                     } => (index_uid, settings, tasks), | ||||||
|  |                     _ => unreachable!(), | ||||||
|  |                 }; | ||||||
|  |                 let (index_uid, cleared_tasks) = match self | ||||||
|  |                     .create_next_batch_index( | ||||||
|  |                         rtxn, | ||||||
|  |                         index_uid, | ||||||
|  |                         BatchKind::DocumentClear { ids: other }, | ||||||
|  |                         current_batch, | ||||||
|  |                         must_create_index, | ||||||
|  |                     )? | ||||||
|  |                     .unwrap() | ||||||
|  |                 { | ||||||
|  |                     Batch::IndexOperation { | ||||||
|  |                         op: IndexOperation::DocumentClear { index_uid, tasks }, | ||||||
|  |                         .. | ||||||
|  |                     } => (index_uid, tasks), | ||||||
|  |                     _ => unreachable!(), | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 Ok(Some(Batch::IndexOperation { | ||||||
|  |                     op: IndexOperation::DocumentClearAndSetting { | ||||||
|  |                         index_uid, | ||||||
|  |                         cleared_tasks, | ||||||
|  |                         settings, | ||||||
|  |                         settings_tasks, | ||||||
|  |                     }, | ||||||
|  |                     must_create_index, | ||||||
|  |                 })) | ||||||
|  |             } | ||||||
|  |             BatchKind::IndexCreation { id } => { | ||||||
|  |                 let mut task = | ||||||
|  |                     self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |                 current_batch.processing(Some(&mut task)); | ||||||
|  |                 let (index_uid, primary_key) = match &task.kind { | ||||||
|  |                     KindWithContent::IndexCreation { index_uid, primary_key } => { | ||||||
|  |                         (index_uid.clone(), primary_key.clone()) | ||||||
|  |                     } | ||||||
|  |                     _ => unreachable!(), | ||||||
|  |                 }; | ||||||
|  |                 Ok(Some(Batch::IndexCreation { index_uid, primary_key, task })) | ||||||
|  |             } | ||||||
|  |             BatchKind::IndexUpdate { id } => { | ||||||
|  |                 let mut task = | ||||||
|  |                     self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |                 current_batch.processing(Some(&mut task)); | ||||||
|  |                 let primary_key = match &task.kind { | ||||||
|  |                     KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(), | ||||||
|  |                     _ => unreachable!(), | ||||||
|  |                 }; | ||||||
|  |                 Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task })) | ||||||
|  |             } | ||||||
|  |             BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion { | ||||||
|  |                 index_uid, | ||||||
|  |                 index_has_been_created: must_create_index, | ||||||
|  |                 tasks: self.queue.get_existing_tasks_for_processing_batch( | ||||||
|  |                     rtxn, | ||||||
|  |                     current_batch, | ||||||
|  |                     ids, | ||||||
|  |                 )?, | ||||||
|  |             })), | ||||||
|  |             BatchKind::IndexSwap { id } => { | ||||||
|  |                 let mut task = | ||||||
|  |                     self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |                 current_batch.processing(Some(&mut task)); | ||||||
|  |                 Ok(Some(Batch::IndexSwap { task })) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Create the next batch to be processed; | ||||||
|  |     /// 1. We get the *last* task to cancel. | ||||||
|  |     /// 2. We get the *next* task to delete. | ||||||
|  |     /// 3. We get the *next* snapshot to process. | ||||||
|  |     /// 4. We get the *next* dump to process. | ||||||
|  |     /// 5. We get the *next* tasks to process for a specific index. | ||||||
|  |     #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] | ||||||
|  |     pub(crate) fn create_next_batch( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |     ) -> Result<Option<(Batch, ProcessingBatch)>> { | ||||||
|  |         #[cfg(test)] | ||||||
|  |         self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?; | ||||||
|  |  | ||||||
|  |         let batch_id = self.queue.batches.next_batch_id(rtxn)?; | ||||||
|  |         let mut current_batch = ProcessingBatch::new(batch_id); | ||||||
|  |  | ||||||
|  |         let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?; | ||||||
|  |         let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; | ||||||
|  |  | ||||||
|  |         // 1. we get the last task to cancel. | ||||||
|  |         if let Some(task_id) = to_cancel.max() { | ||||||
|  |             let mut task = | ||||||
|  |                 self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |             current_batch.processing(Some(&mut task)); | ||||||
|  |             return Ok(Some((Batch::TaskCancelation { task }, current_batch))); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 2. we get the next task to delete | ||||||
|  |         let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; | ||||||
|  |         if !to_delete.is_empty() { | ||||||
|  |             let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?; | ||||||
|  |             current_batch.processing(&mut tasks); | ||||||
|  |             return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 3. we batch the snapshot. | ||||||
|  |         let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; | ||||||
|  |         if !to_snapshot.is_empty() { | ||||||
|  |             let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?; | ||||||
|  |             current_batch.processing(&mut tasks); | ||||||
|  |             return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 4. we batch the dumps. | ||||||
|  |         let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued; | ||||||
|  |         if let Some(to_dump) = to_dump.min() { | ||||||
|  |             let mut task = | ||||||
|  |                 self.queue.tasks.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |             current_batch.processing(Some(&mut task)); | ||||||
|  |             return Ok(Some((Batch::Dump(task), current_batch))); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. | ||||||
|  |         let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; | ||||||
|  |         let mut task = | ||||||
|  |             self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |  | ||||||
|  |         // If the task is not associated with any index, verify that it is an index swap and | ||||||
|  |         // create the batch directly. Otherwise, get the index name associated with the task | ||||||
|  |         // and use the autobatcher to batch the enqueued tasks associated with it | ||||||
|  |  | ||||||
|  |         let index_name = if let Some(&index_name) = task.indexes().first() { | ||||||
|  |             index_name | ||||||
|  |         } else { | ||||||
|  |             assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); | ||||||
|  |             current_batch.processing(Some(&mut task)); | ||||||
|  |             return Ok(Some((Batch::IndexSwap { task }, current_batch))); | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; | ||||||
|  |         let mut primary_key = None; | ||||||
|  |         if index_already_exists { | ||||||
|  |             let index = self.index_mapper.index(rtxn, index_name)?; | ||||||
|  |             let rtxn = index.read_txn()?; | ||||||
|  |             primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let index_tasks = self.queue.tasks.index_tasks(rtxn, index_name)? & enqueued; | ||||||
|  |  | ||||||
|  |         // If autobatching is disabled we only take one task at a time. | ||||||
|  |         // Otherwise, we take only a maximum of tasks to create batches. | ||||||
|  |         let tasks_limit = if self.scheduler.autobatching_enabled { | ||||||
|  |             self.scheduler.max_number_of_batched_tasks | ||||||
|  |         } else { | ||||||
|  |             1 | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let enqueued = index_tasks | ||||||
|  |             .into_iter() | ||||||
|  |             .take(tasks_limit) | ||||||
|  |             .map(|task_id| { | ||||||
|  |                 self.queue | ||||||
|  |                     .tasks | ||||||
|  |                     .get_task(rtxn, task_id) | ||||||
|  |                     .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) | ||||||
|  |                     .map(|task| (task.uid, task.kind)) | ||||||
|  |             }) | ||||||
|  |             .collect::<Result<Vec<_>>>()?; | ||||||
|  |  | ||||||
|  |         if let Some((batchkind, create_index)) = | ||||||
|  |             autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref()) | ||||||
|  |         { | ||||||
|  |             return Ok(self | ||||||
|  |                 .create_next_batch_index( | ||||||
|  |                     rtxn, | ||||||
|  |                     index_name.to_string(), | ||||||
|  |                     batchkind, | ||||||
|  |                     &mut current_batch, | ||||||
|  |                     create_index, | ||||||
|  |                 )? | ||||||
|  |                 .map(|batch| (batch, current_batch))); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // If we found no tasks then we were notified for something that got autobatched | ||||||
|  |         // somehow and there is nothing to do. | ||||||
|  |         Ok(None) | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										342
									
								
								crates/index-scheduler/src/scheduler/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										342
									
								
								crates/index-scheduler/src/scheduler/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,342 @@ | |||||||
|  | mod autobatcher; | ||||||
|  | mod create_batch; | ||||||
|  | mod process_batch; | ||||||
|  | mod process_dump_creation; | ||||||
|  | mod process_index_operation; | ||||||
|  | mod process_snapshot_creation; | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test; | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test_document_addition; | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test_embedders; | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test_failure; | ||||||
|  |  | ||||||
|  | use std::path::PathBuf; | ||||||
|  | use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; | ||||||
|  | use std::sync::Arc; | ||||||
|  |  | ||||||
|  | use meilisearch_types::error::ResponseError; | ||||||
|  | use meilisearch_types::milli; | ||||||
|  | use meilisearch_types::tasks::Status; | ||||||
|  | use rayon::current_num_threads; | ||||||
|  | use rayon::iter::{IntoParallelIterator, ParallelIterator}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  | use synchronoise::SignalEvent; | ||||||
|  |  | ||||||
|  | use crate::processing::{AtomicTaskStep, BatchProgress}; | ||||||
|  | use crate::{Error, IndexScheduler, IndexSchedulerOptions, Result, TickOutcome}; | ||||||
|  |  | ||||||
|  | #[derive(Default, Clone, Debug)] | ||||||
|  | pub struct MustStopProcessing(Arc<AtomicBool>); | ||||||
|  |  | ||||||
|  | impl MustStopProcessing { | ||||||
|  |     pub fn get(&self) -> bool { | ||||||
|  |         self.0.load(Ordering::Relaxed) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn must_stop(&self) { | ||||||
|  |         self.0.store(true, Ordering::Relaxed); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn reset(&self) { | ||||||
|  |         self.0.store(false, Ordering::Relaxed); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct Scheduler { | ||||||
|  |     /// A boolean that can be set to true to stop the currently processing tasks. | ||||||
|  |     pub must_stop_processing: MustStopProcessing, | ||||||
|  |  | ||||||
|  |     /// Get a signal when a batch needs to be processed. | ||||||
|  |     pub(crate) wake_up: Arc<SignalEvent>, | ||||||
|  |  | ||||||
|  |     /// Whether auto-batching is enabled or not. | ||||||
|  |     pub(crate) autobatching_enabled: bool, | ||||||
|  |  | ||||||
|  |     /// The maximum number of tasks that will be batched together. | ||||||
|  |     pub(crate) max_number_of_batched_tasks: usize, | ||||||
|  |  | ||||||
|  |     /// The path used to create the dumps. | ||||||
|  |     pub(crate) dumps_path: PathBuf, | ||||||
|  |  | ||||||
|  |     /// The path used to create the snapshots. | ||||||
|  |     pub(crate) snapshots_path: PathBuf, | ||||||
|  |  | ||||||
|  |     /// The path to the folder containing the auth LMDB env. | ||||||
|  |     pub(crate) auth_path: PathBuf, | ||||||
|  |  | ||||||
|  |     /// The path to the version file of Meilisearch. | ||||||
|  |     pub(crate) version_file_path: PathBuf, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Scheduler { | ||||||
|  |     pub(crate) fn private_clone(&self) -> Scheduler { | ||||||
|  |         Scheduler { | ||||||
|  |             must_stop_processing: self.must_stop_processing.clone(), | ||||||
|  |             wake_up: self.wake_up.clone(), | ||||||
|  |             autobatching_enabled: self.autobatching_enabled, | ||||||
|  |             max_number_of_batched_tasks: self.max_number_of_batched_tasks, | ||||||
|  |             dumps_path: self.dumps_path.clone(), | ||||||
|  |             snapshots_path: self.snapshots_path.clone(), | ||||||
|  |             auth_path: self.auth_path.clone(), | ||||||
|  |             version_file_path: self.version_file_path.clone(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn new(options: &IndexSchedulerOptions) -> Scheduler { | ||||||
|  |         Scheduler { | ||||||
|  |             must_stop_processing: MustStopProcessing::default(), | ||||||
|  |             // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things | ||||||
|  |             wake_up: Arc::new(SignalEvent::auto(true)), | ||||||
|  |             autobatching_enabled: options.autobatching_enabled, | ||||||
|  |             max_number_of_batched_tasks: options.max_number_of_batched_tasks, | ||||||
|  |             dumps_path: options.dumps_path.clone(), | ||||||
|  |             snapshots_path: options.snapshots_path.clone(), | ||||||
|  |             auth_path: options.auth_path.clone(), | ||||||
|  |             version_file_path: options.version_file_path.clone(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl IndexScheduler { | ||||||
|  |     /// Perform one iteration of the run loop. | ||||||
|  |     /// | ||||||
|  |     /// 1. See if we need to cleanup the task queue | ||||||
|  |     /// 2. Find the next batch of tasks to be processed. | ||||||
|  |     /// 3. Update the information of these tasks following the start of their processing. | ||||||
|  |     /// 4. Update the in-memory list of processed tasks accordingly. | ||||||
|  |     /// 5. Process the batch: | ||||||
|  |     ///    - perform the actions of each batched task | ||||||
|  |     ///    - update the information of each batched task following the end | ||||||
|  |     ///      of their processing. | ||||||
|  |     /// 6. Reset the in-memory list of processed tasks. | ||||||
|  |     /// | ||||||
|  |     /// Returns the number of processed tasks. | ||||||
|  |     pub(crate) fn tick(&self) -> Result<TickOutcome> { | ||||||
|  |         #[cfg(test)] | ||||||
|  |         { | ||||||
|  |             *self.run_loop_iteration.write().unwrap() += 1; | ||||||
|  |             self.breakpoint(crate::test_utils::Breakpoint::Start); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if self.cleanup_enabled { | ||||||
|  |             let mut wtxn = self.env.write_txn()?; | ||||||
|  |             self.queue.cleanup_task_queue(&mut wtxn)?; | ||||||
|  |             wtxn.commit()?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; | ||||||
|  |         let (batch, mut processing_batch) = | ||||||
|  |             match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { | ||||||
|  |                 Some(batch) => batch, | ||||||
|  |                 None => return Ok(TickOutcome::WaitForSignal), | ||||||
|  |             }; | ||||||
|  |         let index_uid = batch.index_uid().map(ToOwned::to_owned); | ||||||
|  |         drop(rtxn); | ||||||
|  |  | ||||||
|  |         // 1. store the starting date with the bitmap of processing tasks. | ||||||
|  |         let mut ids = batch.ids(); | ||||||
|  |         let processed_tasks = ids.len(); | ||||||
|  |  | ||||||
|  |         // We reset the must_stop flag to be sure that we don't stop processing tasks | ||||||
|  |         self.scheduler.must_stop_processing.reset(); | ||||||
|  |         let progress = self | ||||||
|  |             .processing_tasks | ||||||
|  |             .write() | ||||||
|  |             .unwrap() | ||||||
|  |             // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches | ||||||
|  |             .start_processing(processing_batch.clone(), ids.clone()); | ||||||
|  |  | ||||||
|  |         #[cfg(test)] | ||||||
|  |         self.breakpoint(crate::test_utils::Breakpoint::BatchCreated); | ||||||
|  |  | ||||||
|  |         // 2. Process the tasks | ||||||
|  |         let res = { | ||||||
|  |             let cloned_index_scheduler = self.private_clone(); | ||||||
|  |             let processing_batch = &mut processing_batch; | ||||||
|  |             let progress = progress.clone(); | ||||||
|  |             std::thread::scope(|s| { | ||||||
|  |                 let handle = std::thread::Builder::new() | ||||||
|  |                     .name(String::from("batch-operation")) | ||||||
|  |                     .spawn_scoped(s, move || { | ||||||
|  |                         cloned_index_scheduler.process_batch(batch, processing_batch, progress) | ||||||
|  |                     }) | ||||||
|  |                     .unwrap(); | ||||||
|  |                 handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) | ||||||
|  |             }) | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         // Reset the currently updating index to relinquish the index handle | ||||||
|  |         self.index_mapper.set_currently_updating_index(None); | ||||||
|  |  | ||||||
|  |         #[cfg(test)] | ||||||
|  |         self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?; | ||||||
|  |  | ||||||
|  |         progress.update_progress(BatchProgress::WritingTasksToDisk); | ||||||
|  |         processing_batch.finished(); | ||||||
|  |         let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; | ||||||
|  |         let mut canceled = RoaringBitmap::new(); | ||||||
|  |  | ||||||
|  |         match res { | ||||||
|  |             Ok(tasks) => { | ||||||
|  |                 #[cfg(test)] | ||||||
|  |                 self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded); | ||||||
|  |  | ||||||
|  |                 let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); | ||||||
|  |                 progress.update_progress(task_progress_obj); | ||||||
|  |                 let mut success = 0; | ||||||
|  |                 let mut failure = 0; | ||||||
|  |                 let mut canceled_by = None; | ||||||
|  |  | ||||||
|  |                 #[allow(unused_variables)] | ||||||
|  |                 for (i, mut task) in tasks.into_iter().enumerate() { | ||||||
|  |                     task_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |                     processing_batch.update(&mut task); | ||||||
|  |                     if task.status == Status::Canceled { | ||||||
|  |                         canceled.insert(task.uid); | ||||||
|  |                         canceled_by = task.canceled_by; | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     #[cfg(test)] | ||||||
|  |                     self.maybe_fail( | ||||||
|  |                         crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchSuccess { | ||||||
|  |                             task_uid: i as u32, | ||||||
|  |                         }, | ||||||
|  |                     )?; | ||||||
|  |  | ||||||
|  |                     match task.error { | ||||||
|  |                         Some(_) => failure += 1, | ||||||
|  |                         None => success += 1, | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     self.queue | ||||||
|  |                         .tasks | ||||||
|  |                         .update_task(&mut wtxn, &task) | ||||||
|  |                         .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; | ||||||
|  |                 } | ||||||
|  |                 if let Some(canceled_by) = canceled_by { | ||||||
|  |                     self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?; | ||||||
|  |                 } | ||||||
|  |                 tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks."); | ||||||
|  |             } | ||||||
|  |             // If we have an abortion error we must stop the tick here and re-schedule tasks. | ||||||
|  |             Err(Error::Milli { | ||||||
|  |                 error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), | ||||||
|  |                 .. | ||||||
|  |             }) | ||||||
|  |             | Err(Error::AbortedTask) => { | ||||||
|  |                 #[cfg(test)] | ||||||
|  |                 self.breakpoint(crate::test_utils::Breakpoint::AbortedIndexation); | ||||||
|  |                 wtxn.abort(); | ||||||
|  |  | ||||||
|  |                 tracing::info!("A batch of tasks was aborted."); | ||||||
|  |                 // We make sure that we don't call `stop_processing` on the `processing_tasks`, | ||||||
|  |                 // this is because we want to let the next tick call `create_next_batch` and keep | ||||||
|  |                 // the `started_at` date times and `processings` of the current processing tasks. | ||||||
|  |                 // This date time is used by the task cancelation to store the right `started_at` | ||||||
|  |                 // date in the task on disk. | ||||||
|  |                 return Ok(TickOutcome::TickAgain(0)); | ||||||
|  |             } | ||||||
|  |             // If an index said it was full, we need to: | ||||||
|  |             // 1. identify which index is full | ||||||
|  |             // 2. close the associated environment | ||||||
|  |             // 3. resize it | ||||||
|  |             // 4. re-schedule tasks | ||||||
|  |             Err(Error::Milli { | ||||||
|  |                 error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), | ||||||
|  |                 .. | ||||||
|  |             }) if index_uid.is_some() => { | ||||||
|  |                 // fixme: add index_uid to match to avoid the unwrap | ||||||
|  |                 let index_uid = index_uid.unwrap(); | ||||||
|  |                 // fixme: handle error more gracefully? not sure when this could happen | ||||||
|  |                 self.index_mapper.resize_index(&wtxn, &index_uid)?; | ||||||
|  |                 wtxn.abort(); | ||||||
|  |  | ||||||
|  |                 tracing::info!("The max database size was reached. Resizing the index."); | ||||||
|  |  | ||||||
|  |                 return Ok(TickOutcome::TickAgain(0)); | ||||||
|  |             } | ||||||
|  |             // In case of a failure we must get back and patch all the tasks with the error. | ||||||
|  |             Err(err) => { | ||||||
|  |                 #[cfg(test)] | ||||||
|  |                 self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed); | ||||||
|  |                 let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); | ||||||
|  |                 progress.update_progress(task_progress_obj); | ||||||
|  |  | ||||||
|  |                 let error: ResponseError = err.into(); | ||||||
|  |                 for id in ids.iter() { | ||||||
|  |                     task_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |                     let mut task = self | ||||||
|  |                         .queue | ||||||
|  |                         .tasks | ||||||
|  |                         .get_task(&wtxn, id) | ||||||
|  |                         .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? | ||||||
|  |                         .ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |                     task.status = Status::Failed; | ||||||
|  |                     task.error = Some(error.clone()); | ||||||
|  |                     task.details = task.details.map(|d| d.to_failed()); | ||||||
|  |                     processing_batch.update(&mut task); | ||||||
|  |  | ||||||
|  |                     #[cfg(test)] | ||||||
|  |                     self.maybe_fail( | ||||||
|  |                         crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchFailure, | ||||||
|  |                     )?; | ||||||
|  |  | ||||||
|  |                     tracing::error!("Batch failed {}", error); | ||||||
|  |  | ||||||
|  |                     self.queue | ||||||
|  |                         .tasks | ||||||
|  |                         .update_task(&mut wtxn, &task) | ||||||
|  |                         .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // We must re-add the canceled task so they're part of the same batch. | ||||||
|  |         ids |= canceled; | ||||||
|  |         self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; | ||||||
|  |  | ||||||
|  |         #[cfg(test)] | ||||||
|  |         self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?; | ||||||
|  |  | ||||||
|  |         wtxn.commit().map_err(Error::HeedTransaction)?; | ||||||
|  |  | ||||||
|  |         // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task | ||||||
|  |         // and then become « not found » for some time until the commit everything is written and the final commit is made. | ||||||
|  |         self.processing_tasks.write().unwrap().stop_processing(); | ||||||
|  |  | ||||||
|  |         // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart | ||||||
|  |         tracing::debug!("Deleting the update files"); | ||||||
|  |  | ||||||
|  |         //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap | ||||||
|  |         let idx = AtomicU32::new(0); | ||||||
|  |         (0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> { | ||||||
|  |             let rtxn = self.read_txn()?; | ||||||
|  |             while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) { | ||||||
|  |                 let task = self | ||||||
|  |                     .queue | ||||||
|  |                     .tasks | ||||||
|  |                     .get_task(&rtxn, id) | ||||||
|  |                     .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? | ||||||
|  |                     .ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |                 if let Err(e) = self.queue.delete_persisted_task_data(&task) { | ||||||
|  |                     tracing::error!( | ||||||
|  |                         "Failure to delete the content files associated with task {}. Error: {e}", | ||||||
|  |                         task.uid | ||||||
|  |                     ); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Ok(()) | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         // We shouldn't crash the tick function if we can't send data to the webhook. | ||||||
|  |         let _ = self.notify_webhook(&ids); | ||||||
|  |  | ||||||
|  |         #[cfg(test)] | ||||||
|  |         self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing); | ||||||
|  |  | ||||||
|  |         Ok(TickOutcome::TickAgain(processed_tasks)) | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										581
									
								
								crates/index-scheduler/src/scheduler/process_batch.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										581
									
								
								crates/index-scheduler/src/scheduler/process_batch.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,581 @@ | |||||||
|  | use std::collections::{BTreeSet, HashMap, HashSet}; | ||||||
|  | use std::sync::atomic::Ordering; | ||||||
|  |  | ||||||
|  | use meilisearch_types::batches::BatchId; | ||||||
|  | use meilisearch_types::heed::{RoTxn, RwTxn}; | ||||||
|  | use meilisearch_types::milli::progress::Progress; | ||||||
|  | use meilisearch_types::milli::{self}; | ||||||
|  | use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task}; | ||||||
|  | use milli::update::Settings as MilliSettings; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|  | use super::create_batch::Batch; | ||||||
|  | use crate::processing::{ | ||||||
|  |     AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, | ||||||
|  |     InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, | ||||||
|  |     UpdateIndexProgress, VariableNameStep, | ||||||
|  | }; | ||||||
|  | use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; | ||||||
|  | use crate::{Error, IndexScheduler, Result, TaskId}; | ||||||
|  |  | ||||||
|  | impl IndexScheduler { | ||||||
|  |     /// Apply the operation associated with the given batch. | ||||||
|  |     /// | ||||||
|  |     /// ## Return | ||||||
|  |     /// The list of tasks that were processed. The metadata of each task in the returned | ||||||
|  |     /// list is updated accordingly, with the exception of the its date fields | ||||||
|  |     /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). | ||||||
|  |     #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))] | ||||||
|  |     pub(crate) fn process_batch( | ||||||
|  |         &self, | ||||||
|  |         batch: Batch, | ||||||
|  |         current_batch: &mut ProcessingBatch, | ||||||
|  |         progress: Progress, | ||||||
|  |     ) -> Result<Vec<Task>> { | ||||||
|  |         #[cfg(test)] | ||||||
|  |         { | ||||||
|  |             self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?; | ||||||
|  |             self.maybe_fail(crate::test_utils::FailureLocation::PanicInsideProcessBatch)?; | ||||||
|  |             self.breakpoint(crate::test_utils::Breakpoint::InsideProcessBatch); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         match batch { | ||||||
|  |             Batch::TaskCancelation { mut task } => { | ||||||
|  |                 // 1. Retrieve the tasks that matched the query at enqueue-time. | ||||||
|  |                 let matched_tasks = | ||||||
|  |                     if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind { | ||||||
|  |                         tasks | ||||||
|  |                     } else { | ||||||
|  |                         unreachable!() | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                 let rtxn = self.env.read_txn()?; | ||||||
|  |                 let mut canceled_tasks = self.cancel_matched_tasks( | ||||||
|  |                     &rtxn, | ||||||
|  |                     task.uid, | ||||||
|  |                     current_batch, | ||||||
|  |                     matched_tasks, | ||||||
|  |                     &progress, | ||||||
|  |                 )?; | ||||||
|  |  | ||||||
|  |                 task.status = Status::Succeeded; | ||||||
|  |                 match &mut task.details { | ||||||
|  |                     Some(Details::TaskCancelation { | ||||||
|  |                         matched_tasks: _, | ||||||
|  |                         canceled_tasks: canceled_tasks_details, | ||||||
|  |                         original_filter: _, | ||||||
|  |                     }) => { | ||||||
|  |                         *canceled_tasks_details = Some(canceled_tasks.len() as u64); | ||||||
|  |                     } | ||||||
|  |                     _ => unreachable!(), | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 canceled_tasks.push(task); | ||||||
|  |  | ||||||
|  |                 Ok(canceled_tasks) | ||||||
|  |             } | ||||||
|  |             Batch::TaskDeletions(mut tasks) => { | ||||||
|  |                 // 1. Retrieve the tasks that matched the query at enqueue-time. | ||||||
|  |                 let mut matched_tasks = RoaringBitmap::new(); | ||||||
|  |  | ||||||
|  |                 for task in tasks.iter() { | ||||||
|  |                     if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { | ||||||
|  |                         matched_tasks |= tasks; | ||||||
|  |                     } else { | ||||||
|  |                         unreachable!() | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let mut wtxn = self.env.write_txn()?; | ||||||
|  |                 let mut deleted_tasks = | ||||||
|  |                     self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?; | ||||||
|  |                 wtxn.commit()?; | ||||||
|  |  | ||||||
|  |                 for task in tasks.iter_mut() { | ||||||
|  |                     task.status = Status::Succeeded; | ||||||
|  |                     let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else { | ||||||
|  |                         unreachable!() | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     let deleted_tasks_count = deleted_tasks.intersection_len(tasks); | ||||||
|  |                     deleted_tasks -= tasks; | ||||||
|  |  | ||||||
|  |                     match &mut task.details { | ||||||
|  |                         Some(Details::TaskDeletion { | ||||||
|  |                             matched_tasks: _, | ||||||
|  |                             deleted_tasks, | ||||||
|  |                             original_filter: _, | ||||||
|  |                         }) => { | ||||||
|  |                             *deleted_tasks = Some(deleted_tasks_count); | ||||||
|  |                         } | ||||||
|  |                         _ => unreachable!(), | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |             Batch::SnapshotCreation(tasks) => self.process_snapshot(progress, tasks), | ||||||
|  |             Batch::Dump(task) => self.process_dump_creation(progress, task), | ||||||
|  |             Batch::IndexOperation { op, must_create_index } => { | ||||||
|  |                 let index_uid = op.index_uid().to_string(); | ||||||
|  |                 let index = if must_create_index { | ||||||
|  |                     // create the index if it doesn't already exist | ||||||
|  |                     let wtxn = self.env.write_txn()?; | ||||||
|  |                     self.index_mapper.create_index(wtxn, &index_uid, None)? | ||||||
|  |                 } else { | ||||||
|  |                     let rtxn = self.env.read_txn()?; | ||||||
|  |                     self.index_mapper.index(&rtxn, &index_uid)? | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick | ||||||
|  |                 self.index_mapper | ||||||
|  |                     .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); | ||||||
|  |  | ||||||
|  |                 let mut index_wtxn = index.write_txn()?; | ||||||
|  |                 let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; | ||||||
|  |  | ||||||
|  |                 { | ||||||
|  |                     let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); | ||||||
|  |                     let _entered = span.enter(); | ||||||
|  |  | ||||||
|  |                     index_wtxn.commit()?; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 // if the update processed successfully, we're going to store the new | ||||||
|  |                 // stats of the index. Since the tasks have already been processed and | ||||||
|  |                 // this is a non-critical operation. If it fails, we should not fail | ||||||
|  |                 // the entire batch. | ||||||
|  |                 let res = || -> Result<()> { | ||||||
|  |                     let index_rtxn = index.read_txn()?; | ||||||
|  |                     let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) | ||||||
|  |                         .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; | ||||||
|  |                     let mut wtxn = self.env.write_txn()?; | ||||||
|  |                     self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; | ||||||
|  |                     wtxn.commit()?; | ||||||
|  |                     Ok(()) | ||||||
|  |                 }(); | ||||||
|  |  | ||||||
|  |                 match res { | ||||||
|  |                     Ok(_) => (), | ||||||
|  |                     Err(e) => tracing::error!( | ||||||
|  |                         error = &e as &dyn std::error::Error, | ||||||
|  |                         "Could not write the stats of the index" | ||||||
|  |                     ), | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |             Batch::IndexCreation { index_uid, primary_key, task } => { | ||||||
|  |                 progress.update_progress(CreateIndexProgress::CreatingTheIndex); | ||||||
|  |  | ||||||
|  |                 let wtxn = self.env.write_txn()?; | ||||||
|  |                 if self.index_mapper.exists(&wtxn, &index_uid)? { | ||||||
|  |                     return Err(Error::IndexAlreadyExists(index_uid)); | ||||||
|  |                 } | ||||||
|  |                 self.index_mapper.create_index(wtxn, &index_uid, None)?; | ||||||
|  |  | ||||||
|  |                 self.process_batch( | ||||||
|  |                     Batch::IndexUpdate { index_uid, primary_key, task }, | ||||||
|  |                     current_batch, | ||||||
|  |                     progress, | ||||||
|  |                 ) | ||||||
|  |             } | ||||||
|  |             Batch::IndexUpdate { index_uid, primary_key, mut task } => { | ||||||
|  |                 progress.update_progress(UpdateIndexProgress::UpdatingTheIndex); | ||||||
|  |                 let rtxn = self.env.read_txn()?; | ||||||
|  |                 let index = self.index_mapper.index(&rtxn, &index_uid)?; | ||||||
|  |  | ||||||
|  |                 if let Some(primary_key) = primary_key.clone() { | ||||||
|  |                     let mut index_wtxn = index.write_txn()?; | ||||||
|  |                     let mut builder = MilliSettings::new( | ||||||
|  |                         &mut index_wtxn, | ||||||
|  |                         &index, | ||||||
|  |                         self.index_mapper.indexer_config(), | ||||||
|  |                     ); | ||||||
|  |                     builder.set_primary_key(primary_key); | ||||||
|  |                     let must_stop_processing = self.scheduler.must_stop_processing.clone(); | ||||||
|  |                     builder | ||||||
|  |                         .execute( | ||||||
|  |                             |indexing_step| tracing::debug!(update = ?indexing_step), | ||||||
|  |                             || must_stop_processing.get(), | ||||||
|  |                         ) | ||||||
|  |                         .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; | ||||||
|  |                     index_wtxn.commit()?; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 // drop rtxn before starting a new wtxn on the same db | ||||||
|  |                 rtxn.commit()?; | ||||||
|  |  | ||||||
|  |                 task.status = Status::Succeeded; | ||||||
|  |                 task.details = Some(Details::IndexInfo { primary_key }); | ||||||
|  |  | ||||||
|  |                 // if the update processed successfully, we're going to store the new | ||||||
|  |                 // stats of the index. Since the tasks have already been processed and | ||||||
|  |                 // this is a non-critical operation. If it fails, we should not fail | ||||||
|  |                 // the entire batch. | ||||||
|  |                 let res = || -> Result<()> { | ||||||
|  |                     let mut wtxn = self.env.write_txn()?; | ||||||
|  |                     let index_rtxn = index.read_txn()?; | ||||||
|  |                     let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) | ||||||
|  |                         .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|  |                     self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; | ||||||
|  |                     wtxn.commit()?; | ||||||
|  |                     Ok(()) | ||||||
|  |                 }(); | ||||||
|  |  | ||||||
|  |                 match res { | ||||||
|  |                     Ok(_) => (), | ||||||
|  |                     Err(e) => tracing::error!( | ||||||
|  |                         error = &e as &dyn std::error::Error, | ||||||
|  |                         "Could not write the stats of the index" | ||||||
|  |                     ), | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(vec![task]) | ||||||
|  |             } | ||||||
|  |             Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { | ||||||
|  |                 progress.update_progress(DeleteIndexProgress::DeletingTheIndex); | ||||||
|  |                 let wtxn = self.env.write_txn()?; | ||||||
|  |  | ||||||
|  |                 // it's possible that the index doesn't exist | ||||||
|  |                 let number_of_documents = || -> Result<u64> { | ||||||
|  |                     let index = self.index_mapper.index(&wtxn, &index_uid)?; | ||||||
|  |                     let index_rtxn = index.read_txn()?; | ||||||
|  |                     index | ||||||
|  |                         .number_of_documents(&index_rtxn) | ||||||
|  |                         .map_err(|e| Error::from_milli(e, Some(index_uid.to_string()))) | ||||||
|  |                 }() | ||||||
|  |                 .unwrap_or_default(); | ||||||
|  |  | ||||||
|  |                 // The write transaction is directly owned and committed inside. | ||||||
|  |                 match self.index_mapper.delete_index(wtxn, &index_uid) { | ||||||
|  |                     Ok(()) => (), | ||||||
|  |                     Err(Error::IndexNotFound(_)) if index_has_been_created => (), | ||||||
|  |                     Err(e) => return Err(e), | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 // We set all the tasks details to the default value. | ||||||
|  |                 for task in &mut tasks { | ||||||
|  |                     task.status = Status::Succeeded; | ||||||
|  |                     task.details = match &task.kind { | ||||||
|  |                         KindWithContent::IndexDeletion { .. } => { | ||||||
|  |                             Some(Details::ClearAll { deleted_documents: Some(number_of_documents) }) | ||||||
|  |                         } | ||||||
|  |                         otherwise => otherwise.default_finished_details(), | ||||||
|  |                     }; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |             Batch::IndexSwap { mut task } => { | ||||||
|  |                 progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); | ||||||
|  |  | ||||||
|  |                 let mut wtxn = self.env.write_txn()?; | ||||||
|  |                 let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { | ||||||
|  |                     swaps | ||||||
|  |                 } else { | ||||||
|  |                     unreachable!() | ||||||
|  |                 }; | ||||||
|  |                 let mut not_found_indexes = BTreeSet::new(); | ||||||
|  |                 for IndexSwap { indexes: (lhs, rhs) } in swaps { | ||||||
|  |                     for index in [lhs, rhs] { | ||||||
|  |                         let index_exists = self.index_mapper.index_exists(&wtxn, index)?; | ||||||
|  |                         if !index_exists { | ||||||
|  |                             not_found_indexes.insert(index); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 if !not_found_indexes.is_empty() { | ||||||
|  |                     if not_found_indexes.len() == 1 { | ||||||
|  |                         return Err(Error::SwapIndexNotFound( | ||||||
|  |                             not_found_indexes.into_iter().next().unwrap().clone(), | ||||||
|  |                         )); | ||||||
|  |                     } else { | ||||||
|  |                         return Err(Error::SwapIndexesNotFound( | ||||||
|  |                             not_found_indexes.into_iter().cloned().collect(), | ||||||
|  |                         )); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 progress.update_progress(SwappingTheIndexes::SwappingTheIndexes); | ||||||
|  |                 for (step, swap) in swaps.iter().enumerate() { | ||||||
|  |                     progress.update_progress(VariableNameStep::new( | ||||||
|  |                         format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1), | ||||||
|  |                         step as u32, | ||||||
|  |                         swaps.len() as u32, | ||||||
|  |                     )); | ||||||
|  |                     self.apply_index_swap( | ||||||
|  |                         &mut wtxn, | ||||||
|  |                         &progress, | ||||||
|  |                         task.uid, | ||||||
|  |                         &swap.indexes.0, | ||||||
|  |                         &swap.indexes.1, | ||||||
|  |                     )?; | ||||||
|  |                 } | ||||||
|  |                 wtxn.commit()?; | ||||||
|  |                 task.status = Status::Succeeded; | ||||||
|  |                 Ok(vec![task]) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Swap the index `lhs` with the index `rhs`. | ||||||
|  |     fn apply_index_swap( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         progress: &Progress, | ||||||
|  |         task_id: u32, | ||||||
|  |         lhs: &str, | ||||||
|  |         rhs: &str, | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks); | ||||||
|  |         // 1. Verify that both lhs and rhs are existing indexes | ||||||
|  |         let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; | ||||||
|  |         if !index_lhs_exists { | ||||||
|  |             return Err(Error::IndexNotFound(lhs.to_owned())); | ||||||
|  |         } | ||||||
|  |         let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?; | ||||||
|  |         if !index_rhs_exists { | ||||||
|  |             return Err(Error::IndexNotFound(rhs.to_owned())); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 2. Get the task set for index = name that appeared before the index swap task | ||||||
|  |         let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?; | ||||||
|  |         index_lhs_task_ids.remove_range(task_id..); | ||||||
|  |         let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?; | ||||||
|  |         index_rhs_task_ids.remove_range(task_id..); | ||||||
|  |  | ||||||
|  |         // 3. before_name -> new_name in the task's KindWithContent | ||||||
|  |         progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks); | ||||||
|  |         let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids; | ||||||
|  |         let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32); | ||||||
|  |         progress.update_progress(task_progress); | ||||||
|  |  | ||||||
|  |         for task_id in tasks_to_update { | ||||||
|  |             let mut task = | ||||||
|  |                 self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |             swap_index_uid_in_task(&mut task, (lhs, rhs)); | ||||||
|  |             self.queue.tasks.all_tasks.put(wtxn, &task_id, &task)?; | ||||||
|  |             atomic.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 4. remove the task from indexuid = before_name | ||||||
|  |         // 5. add the task to indexuid = after_name | ||||||
|  |         progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata); | ||||||
|  |         self.queue.tasks.update_index(wtxn, lhs, |lhs_tasks| { | ||||||
|  |             *lhs_tasks -= &index_lhs_task_ids; | ||||||
|  |             *lhs_tasks |= &index_rhs_task_ids; | ||||||
|  |         })?; | ||||||
|  |         self.queue.tasks.update_index(wtxn, rhs, |rhs_tasks| { | ||||||
|  |             *rhs_tasks -= &index_rhs_task_ids; | ||||||
|  |             *rhs_tasks |= &index_lhs_task_ids; | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         // 6. Swap in the index mapper | ||||||
|  |         self.index_mapper.swap(wtxn, lhs, rhs)?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Delete each given task from all the databases (if it is deleteable). | ||||||
|  |     /// | ||||||
|  |     /// Return the number of tasks that were actually deleted. | ||||||
|  |     fn delete_matched_tasks( | ||||||
|  |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         matched_tasks: &RoaringBitmap, | ||||||
|  |         progress: &Progress, | ||||||
|  |     ) -> Result<RoaringBitmap> { | ||||||
|  |         progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime); | ||||||
|  |  | ||||||
|  |         // 1. Remove from this list the tasks that we are not allowed to delete | ||||||
|  |         let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?; | ||||||
|  |         let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); | ||||||
|  |  | ||||||
|  |         let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?; | ||||||
|  |         let mut to_delete_tasks = all_task_ids & matched_tasks; | ||||||
|  |         to_delete_tasks -= &**processing_tasks; | ||||||
|  |         to_delete_tasks -= &enqueued_tasks; | ||||||
|  |  | ||||||
|  |         // 2. We now have a list of tasks to delete, delete them | ||||||
|  |  | ||||||
|  |         let mut affected_indexes = HashSet::new(); | ||||||
|  |         let mut affected_statuses = HashSet::new(); | ||||||
|  |         let mut affected_kinds = HashSet::new(); | ||||||
|  |         let mut affected_canceled_by = RoaringBitmap::new(); | ||||||
|  |         // The tasks that have been removed *per batches*. | ||||||
|  |         let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new(); | ||||||
|  |  | ||||||
|  |         let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); | ||||||
|  |         progress.update_progress(task_progress); | ||||||
|  |         for task_id in to_delete_tasks.iter() { | ||||||
|  |             let task = | ||||||
|  |                 self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |  | ||||||
|  |             affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned())); | ||||||
|  |             affected_statuses.insert(task.status); | ||||||
|  |             affected_kinds.insert(task.kind.as_kind()); | ||||||
|  |             // Note: don't delete the persisted task data since | ||||||
|  |             // we can only delete succeeded, failed, and canceled tasks. | ||||||
|  |             // In each of those cases, the persisted data is supposed to | ||||||
|  |             // have been deleted already. | ||||||
|  |             utils::remove_task_datetime( | ||||||
|  |                 wtxn, | ||||||
|  |                 self.queue.tasks.enqueued_at, | ||||||
|  |                 task.enqueued_at, | ||||||
|  |                 task.uid, | ||||||
|  |             )?; | ||||||
|  |             if let Some(started_at) = task.started_at { | ||||||
|  |                 utils::remove_task_datetime( | ||||||
|  |                     wtxn, | ||||||
|  |                     self.queue.tasks.started_at, | ||||||
|  |                     started_at, | ||||||
|  |                     task.uid, | ||||||
|  |                 )?; | ||||||
|  |             } | ||||||
|  |             if let Some(finished_at) = task.finished_at { | ||||||
|  |                 utils::remove_task_datetime( | ||||||
|  |                     wtxn, | ||||||
|  |                     self.queue.tasks.finished_at, | ||||||
|  |                     finished_at, | ||||||
|  |                     task.uid, | ||||||
|  |                 )?; | ||||||
|  |             } | ||||||
|  |             if let Some(canceled_by) = task.canceled_by { | ||||||
|  |                 affected_canceled_by.insert(canceled_by); | ||||||
|  |             } | ||||||
|  |             if let Some(batch_uid) = task.batch_uid { | ||||||
|  |                 affected_batches.entry(batch_uid).or_default().insert(task_id); | ||||||
|  |             } | ||||||
|  |             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata); | ||||||
|  |         let (atomic_progress, task_progress) = AtomicTaskStep::new( | ||||||
|  |             (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32, | ||||||
|  |         ); | ||||||
|  |         progress.update_progress(task_progress); | ||||||
|  |         for index in affected_indexes.iter() { | ||||||
|  |             self.queue.tasks.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?; | ||||||
|  |             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         for status in affected_statuses.iter() { | ||||||
|  |             self.queue.tasks.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?; | ||||||
|  |             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         for kind in affected_kinds.iter() { | ||||||
|  |             self.queue.tasks.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?; | ||||||
|  |             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         progress.update_progress(TaskDeletionProgress::DeletingTasks); | ||||||
|  |         let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); | ||||||
|  |         progress.update_progress(task_progress); | ||||||
|  |         for task in to_delete_tasks.iter() { | ||||||
|  |             self.queue.tasks.all_tasks.delete(wtxn, &task)?; | ||||||
|  |             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |         for canceled_by in affected_canceled_by { | ||||||
|  |             if let Some(mut tasks) = self.queue.tasks.canceled_by.get(wtxn, &canceled_by)? { | ||||||
|  |                 tasks -= &to_delete_tasks; | ||||||
|  |                 if tasks.is_empty() { | ||||||
|  |                     self.queue.tasks.canceled_by.delete(wtxn, &canceled_by)?; | ||||||
|  |                 } else { | ||||||
|  |                     self.queue.tasks.canceled_by.put(wtxn, &canceled_by, &tasks)?; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         progress.update_progress(TaskDeletionProgress::DeletingBatches); | ||||||
|  |         let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32); | ||||||
|  |         progress.update_progress(batch_progress); | ||||||
|  |         for (batch_id, to_delete_tasks) in affected_batches { | ||||||
|  |             if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? { | ||||||
|  |                 tasks -= &to_delete_tasks; | ||||||
|  |                 // We must remove the batch entirely | ||||||
|  |                 if tasks.is_empty() { | ||||||
|  |                     self.queue.batches.all_batches.delete(wtxn, &batch_id)?; | ||||||
|  |                     self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; | ||||||
|  |                 } | ||||||
|  |                 // Anyway, we must remove the batch from all its reverse indexes. | ||||||
|  |                 // The only way to do that is to check | ||||||
|  |  | ||||||
|  |                 for index in affected_indexes.iter() { | ||||||
|  |                     let index_tasks = self.queue.tasks.index_tasks(wtxn, index)?; | ||||||
|  |                     let remaining_index_tasks = index_tasks & &tasks; | ||||||
|  |                     if remaining_index_tasks.is_empty() { | ||||||
|  |                         self.queue.batches.update_index(wtxn, index, |bitmap| { | ||||||
|  |                             bitmap.remove(batch_id); | ||||||
|  |                         })?; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 for status in affected_statuses.iter() { | ||||||
|  |                     let status_tasks = self.queue.tasks.get_status(wtxn, *status)?; | ||||||
|  |                     let remaining_status_tasks = status_tasks & &tasks; | ||||||
|  |                     if remaining_status_tasks.is_empty() { | ||||||
|  |                         self.queue.batches.update_status(wtxn, *status, |bitmap| { | ||||||
|  |                             bitmap.remove(batch_id); | ||||||
|  |                         })?; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 for kind in affected_kinds.iter() { | ||||||
|  |                     let kind_tasks = self.queue.tasks.get_kind(wtxn, *kind)?; | ||||||
|  |                     let remaining_kind_tasks = kind_tasks & &tasks; | ||||||
|  |                     if remaining_kind_tasks.is_empty() { | ||||||
|  |                         self.queue.batches.update_kind(wtxn, *kind, |bitmap| { | ||||||
|  |                             bitmap.remove(batch_id); | ||||||
|  |                         })?; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(to_delete_tasks) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Cancel each given task from all the databases (if it is cancelable). | ||||||
|  |     /// | ||||||
|  |     /// Returns the list of tasks that matched the filter and must be written in the database. | ||||||
|  |     fn cancel_matched_tasks( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         cancel_task_id: TaskId, | ||||||
|  |         current_batch: &mut ProcessingBatch, | ||||||
|  |         matched_tasks: &RoaringBitmap, | ||||||
|  |         progress: &Progress, | ||||||
|  |     ) -> Result<Vec<Task>> { | ||||||
|  |         progress.update_progress(TaskCancelationProgress::RetrievingTasks); | ||||||
|  |  | ||||||
|  |         // 1. Remove from this list the tasks that we are not allowed to cancel | ||||||
|  |         //    Notice that only the _enqueued_ ones are cancelable and we should | ||||||
|  |         //    have already aborted the indexation of the _processing_ ones | ||||||
|  |         let cancelable_tasks = self.queue.tasks.get_status(rtxn, Status::Enqueued)?; | ||||||
|  |         let tasks_to_cancel = cancelable_tasks & matched_tasks; | ||||||
|  |  | ||||||
|  |         let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); | ||||||
|  |         progress.update_progress(progress_obj); | ||||||
|  |  | ||||||
|  |         // 2. We now have a list of tasks to cancel, cancel them | ||||||
|  |         let mut tasks = self.queue.tasks.get_existing_tasks( | ||||||
|  |             rtxn, | ||||||
|  |             tasks_to_cancel.iter().inspect(|_| { | ||||||
|  |                 task_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |             }), | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         progress.update_progress(TaskCancelationProgress::UpdatingTasks); | ||||||
|  |         let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); | ||||||
|  |         progress.update_progress(progress_obj); | ||||||
|  |         for task in tasks.iter_mut() { | ||||||
|  |             task.status = Status::Canceled; | ||||||
|  |             task.canceled_by = Some(cancel_task_id); | ||||||
|  |             task.details = task.details.as_ref().map(|d| d.to_failed()); | ||||||
|  |             current_batch.processing(Some(task)); | ||||||
|  |             task_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(tasks) | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										236
									
								
								crates/index-scheduler/src/scheduler/process_dump_creation.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										236
									
								
								crates/index-scheduler/src/scheduler/process_dump_creation.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,236 @@ | |||||||
|  | use std::fs::File; | ||||||
|  | use std::io::BufWriter; | ||||||
|  | use std::sync::atomic::Ordering; | ||||||
|  |  | ||||||
|  | use dump::IndexMetadata; | ||||||
|  | use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; | ||||||
|  | use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; | ||||||
|  | use meilisearch_types::milli::progress::Progress; | ||||||
|  | use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | ||||||
|  | use meilisearch_types::milli::{self}; | ||||||
|  | use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; | ||||||
|  | use time::macros::format_description; | ||||||
|  | use time::OffsetDateTime; | ||||||
|  |  | ||||||
|  | use crate::processing::{ | ||||||
|  |     AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress, VariableNameStep, | ||||||
|  | }; | ||||||
|  | use crate::{Error, IndexScheduler, Result}; | ||||||
|  |  | ||||||
|  | impl IndexScheduler { | ||||||
|  |     pub(super) fn process_dump_creation( | ||||||
|  |         &self, | ||||||
|  |         progress: Progress, | ||||||
|  |         mut task: Task, | ||||||
|  |     ) -> Result<Vec<Task>> { | ||||||
|  |         progress.update_progress(DumpCreationProgress::StartTheDumpCreation); | ||||||
|  |         let started_at = OffsetDateTime::now_utc(); | ||||||
|  |         let (keys, instance_uid) = | ||||||
|  |             if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { | ||||||
|  |                 (keys, instance_uid) | ||||||
|  |             } else { | ||||||
|  |                 unreachable!(); | ||||||
|  |             }; | ||||||
|  |         let dump = dump::DumpWriter::new(*instance_uid)?; | ||||||
|  |  | ||||||
|  |         // 1. dump the keys | ||||||
|  |         progress.update_progress(DumpCreationProgress::DumpTheApiKeys); | ||||||
|  |         let mut dump_keys = dump.create_keys()?; | ||||||
|  |         for key in keys { | ||||||
|  |             dump_keys.push_key(key)?; | ||||||
|  |         } | ||||||
|  |         dump_keys.flush()?; | ||||||
|  |  | ||||||
|  |         let rtxn = self.env.read_txn()?; | ||||||
|  |  | ||||||
|  |         // 2. dump the tasks | ||||||
|  |         progress.update_progress(DumpCreationProgress::DumpTheTasks); | ||||||
|  |         let mut dump_tasks = dump.create_tasks_queue()?; | ||||||
|  |  | ||||||
|  |         let (atomic, update_task_progress) = | ||||||
|  |             AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32); | ||||||
|  |         progress.update_progress(update_task_progress); | ||||||
|  |  | ||||||
|  |         for ret in self.queue.tasks.all_tasks.iter(&rtxn)? { | ||||||
|  |             if self.scheduler.must_stop_processing.get() { | ||||||
|  |                 return Err(Error::AbortedTask); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             let (_, mut t) = ret?; | ||||||
|  |             let status = t.status; | ||||||
|  |             let content_file = t.content_uuid(); | ||||||
|  |  | ||||||
|  |             // In the case we're dumping ourselves we want to be marked as finished | ||||||
|  |             // to not loop over ourselves indefinitely. | ||||||
|  |             if t.uid == task.uid { | ||||||
|  |                 let finished_at = OffsetDateTime::now_utc(); | ||||||
|  |  | ||||||
|  |                 // We're going to fake the date because we don't know if everything is going to go well. | ||||||
|  |                 // But we need to dump the task as finished and successful. | ||||||
|  |                 // If something fail everything will be set appropriately in the end. | ||||||
|  |                 t.status = Status::Succeeded; | ||||||
|  |                 t.started_at = Some(started_at); | ||||||
|  |                 t.finished_at = Some(finished_at); | ||||||
|  |             } | ||||||
|  |             let mut dump_content_file = dump_tasks.push_task(&t.into())?; | ||||||
|  |  | ||||||
|  |             // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. | ||||||
|  |             if let Some(content_file) = content_file { | ||||||
|  |                 if self.scheduler.must_stop_processing.get() { | ||||||
|  |                     return Err(Error::AbortedTask); | ||||||
|  |                 } | ||||||
|  |                 if status == Status::Enqueued { | ||||||
|  |                     let content_file = self.queue.file_store.get_update(content_file)?; | ||||||
|  |  | ||||||
|  |                     let reader = DocumentsBatchReader::from_reader(content_file) | ||||||
|  |                         .map_err(|e| Error::from_milli(e.into(), None))?; | ||||||
|  |  | ||||||
|  |                     let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); | ||||||
|  |  | ||||||
|  |                     while let Some(doc) = | ||||||
|  |                         cursor.next_document().map_err(|e| Error::from_milli(e.into(), None))? | ||||||
|  |                     { | ||||||
|  |                         dump_content_file.push_document( | ||||||
|  |                             &obkv_to_object(doc, &documents_batch_index) | ||||||
|  |                                 .map_err(|e| Error::from_milli(e, None))?, | ||||||
|  |                         )?; | ||||||
|  |                     } | ||||||
|  |                     dump_content_file.flush()?; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             atomic.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |         dump_tasks.flush()?; | ||||||
|  |  | ||||||
|  |         // 3. Dump the indexes | ||||||
|  |         progress.update_progress(DumpCreationProgress::DumpTheIndexes); | ||||||
|  |         let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; | ||||||
|  |         let mut count = 0; | ||||||
|  |         self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { | ||||||
|  |             progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes)); | ||||||
|  |             count += 1; | ||||||
|  |  | ||||||
|  |             let rtxn = index.read_txn()?; | ||||||
|  |             let metadata = IndexMetadata { | ||||||
|  |                 uid: uid.to_owned(), | ||||||
|  |                 primary_key: index.primary_key(&rtxn)?.map(String::from), | ||||||
|  |                 created_at: index | ||||||
|  |                     .created_at(&rtxn) | ||||||
|  |                     .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, | ||||||
|  |                 updated_at: index | ||||||
|  |                     .updated_at(&rtxn) | ||||||
|  |                     .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, | ||||||
|  |             }; | ||||||
|  |             let mut index_dumper = dump.create_index(uid, &metadata)?; | ||||||
|  |  | ||||||
|  |             let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|  |             let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||||
|  |             let embedding_configs = index | ||||||
|  |                 .embedding_configs(&rtxn) | ||||||
|  |                 .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||||
|  |  | ||||||
|  |             let nb_documents = index | ||||||
|  |                 .number_of_documents(&rtxn) | ||||||
|  |                 .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? | ||||||
|  |                 as u32; | ||||||
|  |             let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); | ||||||
|  |             progress.update_progress(update_document_progress); | ||||||
|  |             let documents = index | ||||||
|  |                 .all_documents(&rtxn) | ||||||
|  |                 .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||||
|  |             // 3.1. Dump the documents | ||||||
|  |             for ret in documents { | ||||||
|  |                 if self.scheduler.must_stop_processing.get() { | ||||||
|  |                     return Err(Error::AbortedTask); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||||
|  |  | ||||||
|  |                 let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc) | ||||||
|  |                     .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||||
|  |  | ||||||
|  |                 'inject_vectors: { | ||||||
|  |                     let embeddings = index | ||||||
|  |                         .embeddings(&rtxn, id) | ||||||
|  |                         .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||||
|  |  | ||||||
|  |                     if embeddings.is_empty() { | ||||||
|  |                         break 'inject_vectors; | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     let vectors = document | ||||||
|  |                         .entry(RESERVED_VECTORS_FIELD_NAME.to_owned()) | ||||||
|  |                         .or_insert(serde_json::Value::Object(Default::default())); | ||||||
|  |  | ||||||
|  |                     let serde_json::Value::Object(vectors) = vectors else { | ||||||
|  |                         let user_err = | ||||||
|  |                             milli::Error::UserError(milli::UserError::InvalidVectorsMapType { | ||||||
|  |                                 document_id: { | ||||||
|  |                                     if let Ok(Some(Ok(index))) = index | ||||||
|  |                                         .external_id_of(&rtxn, std::iter::once(id)) | ||||||
|  |                                         .map(|it| it.into_iter().next()) | ||||||
|  |                                     { | ||||||
|  |                                         index | ||||||
|  |                                     } else { | ||||||
|  |                                         format!("internal docid={id}") | ||||||
|  |                                     } | ||||||
|  |                                 }, | ||||||
|  |                                 value: vectors.clone(), | ||||||
|  |                             }); | ||||||
|  |  | ||||||
|  |                         return Err(Error::from_milli(user_err, Some(uid.to_string()))); | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     for (embedder_name, embeddings) in embeddings { | ||||||
|  |                         let user_provided = embedding_configs | ||||||
|  |                             .iter() | ||||||
|  |                             .find(|conf| conf.name == embedder_name) | ||||||
|  |                             .is_some_and(|conf| conf.user_provided.contains(id)); | ||||||
|  |                         let embeddings = ExplicitVectors { | ||||||
|  |                             embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( | ||||||
|  |                                 embeddings, | ||||||
|  |                             )), | ||||||
|  |                             regenerate: !user_provided, | ||||||
|  |                         }; | ||||||
|  |                         vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap()); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 index_dumper.push_document(&document)?; | ||||||
|  |                 atomic.fetch_add(1, Ordering::Relaxed); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // 3.2. Dump the settings | ||||||
|  |             let settings = meilisearch_types::settings::settings( | ||||||
|  |                 index, | ||||||
|  |                 &rtxn, | ||||||
|  |                 meilisearch_types::settings::SecretPolicy::RevealSecrets, | ||||||
|  |             ) | ||||||
|  |             .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||||
|  |             index_dumper.settings(&settings)?; | ||||||
|  |             Ok(()) | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         // 4. Dump experimental feature settings | ||||||
|  |         progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); | ||||||
|  |         let features = self.features().runtime_features(); | ||||||
|  |         dump.create_experimental_features(features)?; | ||||||
|  |  | ||||||
|  |         let dump_uid = started_at.format(format_description!( | ||||||
|  |                     "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" | ||||||
|  |                 )).unwrap(); | ||||||
|  |  | ||||||
|  |         if self.scheduler.must_stop_processing.get() { | ||||||
|  |             return Err(Error::AbortedTask); | ||||||
|  |         } | ||||||
|  |         progress.update_progress(DumpCreationProgress::CompressTheDump); | ||||||
|  |         let path = self.scheduler.dumps_path.join(format!("{}.dump", dump_uid)); | ||||||
|  |         let file = File::create(path)?; | ||||||
|  |         dump.persist_to(BufWriter::new(file))?; | ||||||
|  |  | ||||||
|  |         // if we reached this step we can tell the scheduler we succeeded to dump ourselves. | ||||||
|  |         task.status = Status::Succeeded; | ||||||
|  |         task.details = Some(Details::Dump { dump_uid: Some(dump_uid) }); | ||||||
|  |         Ok(vec![task]) | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										529
									
								
								crates/index-scheduler/src/scheduler/process_index_operation.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										529
									
								
								crates/index-scheduler/src/scheduler/process_index_operation.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,529 @@ | |||||||
|  | use bumpalo::collections::CollectIn; | ||||||
|  | use bumpalo::Bump; | ||||||
|  | use meilisearch_types::heed::RwTxn; | ||||||
|  | use meilisearch_types::milli::documents::PrimaryKey; | ||||||
|  | use meilisearch_types::milli::progress::Progress; | ||||||
|  | use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; | ||||||
|  | use meilisearch_types::milli::update::DocumentAdditionResult; | ||||||
|  | use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; | ||||||
|  | use meilisearch_types::settings::apply_settings_to_builder; | ||||||
|  | use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; | ||||||
|  | use meilisearch_types::Index; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|  | use super::create_batch::{DocumentOperation, IndexOperation}; | ||||||
|  | use crate::processing::{ | ||||||
|  |     DocumentDeletionProgress, DocumentEditionProgress, DocumentOperationProgress, SettingsProgress, | ||||||
|  | }; | ||||||
|  | use crate::{Error, IndexScheduler, Result}; | ||||||
|  |  | ||||||
|  | impl IndexScheduler { | ||||||
|  |     /// Process the index operation on the given index. | ||||||
|  |     /// | ||||||
|  |     /// ## Return | ||||||
|  |     /// The list of processed tasks. | ||||||
|  |     #[tracing::instrument( | ||||||
|  |         level = "trace", | ||||||
|  |         skip(self, index_wtxn, index, progress), | ||||||
|  |         target = "indexing::scheduler" | ||||||
|  |     )] | ||||||
|  |     pub(crate) fn apply_index_operation<'i>( | ||||||
|  |         &self, | ||||||
|  |         index_wtxn: &mut RwTxn<'i>, | ||||||
|  |         index: &'i Index, | ||||||
|  |         operation: IndexOperation, | ||||||
|  |         progress: Progress, | ||||||
|  |     ) -> Result<Vec<Task>> { | ||||||
|  |         let indexer_alloc = Bump::new(); | ||||||
|  |  | ||||||
|  |         let started_processing_at = std::time::Instant::now(); | ||||||
|  |         let must_stop_processing = self.scheduler.must_stop_processing.clone(); | ||||||
|  |  | ||||||
|  |         match operation { | ||||||
|  |             IndexOperation::DocumentClear { index_uid, mut tasks } => { | ||||||
|  |                 let count = milli::update::ClearDocuments::new(index_wtxn, index) | ||||||
|  |                     .execute() | ||||||
|  |                     .map_err(|e| Error::from_milli(e, Some(index_uid)))?; | ||||||
|  |  | ||||||
|  |                 let mut first_clear_found = false; | ||||||
|  |                 for task in &mut tasks { | ||||||
|  |                     task.status = Status::Succeeded; | ||||||
|  |                     // The first document clear will effectively delete every documents | ||||||
|  |                     // in the database but the next ones will clear 0 documents. | ||||||
|  |                     task.details = match &task.kind { | ||||||
|  |                         KindWithContent::DocumentClear { .. } => { | ||||||
|  |                             let count = if first_clear_found { 0 } else { count }; | ||||||
|  |                             first_clear_found = true; | ||||||
|  |                             Some(Details::ClearAll { deleted_documents: Some(count) }) | ||||||
|  |                         } | ||||||
|  |                         otherwise => otherwise.default_details(), | ||||||
|  |                     }; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |             IndexOperation::DocumentOperation { | ||||||
|  |                 index_uid, | ||||||
|  |                 primary_key, | ||||||
|  |                 method, | ||||||
|  |                 operations, | ||||||
|  |                 mut tasks, | ||||||
|  |             } => { | ||||||
|  |                 progress.update_progress(DocumentOperationProgress::RetrievingConfig); | ||||||
|  |                 // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. | ||||||
|  |                 // this is made difficult by the fact we're doing private clones of the index scheduler and sending it | ||||||
|  |                 // to a fresh thread. | ||||||
|  |                 let mut content_files = Vec::new(); | ||||||
|  |                 for operation in &operations { | ||||||
|  |                     if let DocumentOperation::Add(content_uuid) = operation { | ||||||
|  |                         let content_file = self.queue.file_store.get_update(*content_uuid)?; | ||||||
|  |                         let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; | ||||||
|  |                         if !mmap.is_empty() { | ||||||
|  |                             content_files.push(mmap); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let rtxn = index.read_txn()?; | ||||||
|  |                 let db_fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|  |                 let mut new_fields_ids_map = db_fields_ids_map.clone(); | ||||||
|  |  | ||||||
|  |                 let mut content_files_iter = content_files.iter(); | ||||||
|  |                 let mut indexer = indexer::DocumentOperation::new(method); | ||||||
|  |                 let embedders = index | ||||||
|  |                     .embedding_configs(index_wtxn) | ||||||
|  |                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|  |                 let embedders = self.embedders(index_uid.clone(), embedders)?; | ||||||
|  |                 for operation in operations { | ||||||
|  |                     match operation { | ||||||
|  |                         DocumentOperation::Add(_content_uuid) => { | ||||||
|  |                             let mmap = content_files_iter.next().unwrap(); | ||||||
|  |                             indexer | ||||||
|  |                                 .add_documents(mmap) | ||||||
|  |                                 .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|  |                         } | ||||||
|  |                         DocumentOperation::Delete(document_ids) => { | ||||||
|  |                             let document_ids: bumpalo::collections::vec::Vec<_> = document_ids | ||||||
|  |                                 .iter() | ||||||
|  |                                 .map(|s| &*indexer_alloc.alloc_str(s)) | ||||||
|  |                                 .collect_in(&indexer_alloc); | ||||||
|  |                             indexer.delete_documents(document_ids.into_bump_slice()); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let local_pool; | ||||||
|  |                 let indexer_config = self.index_mapper.indexer_config(); | ||||||
|  |                 let pool = match &indexer_config.thread_pool { | ||||||
|  |                     Some(pool) => pool, | ||||||
|  |                     None => { | ||||||
|  |                         local_pool = ThreadPoolNoAbortBuilder::new() | ||||||
|  |                             .thread_name(|i| format!("indexing-thread-{i}")) | ||||||
|  |                             .build() | ||||||
|  |                             .unwrap(); | ||||||
|  |                         &local_pool | ||||||
|  |                     } | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges); | ||||||
|  |                 let (document_changes, operation_stats, primary_key) = indexer | ||||||
|  |                     .into_changes( | ||||||
|  |                         &indexer_alloc, | ||||||
|  |                         index, | ||||||
|  |                         &rtxn, | ||||||
|  |                         primary_key.as_deref(), | ||||||
|  |                         &mut new_fields_ids_map, | ||||||
|  |                         &|| must_stop_processing.get(), | ||||||
|  |                         progress.clone(), | ||||||
|  |                     ) | ||||||
|  |                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|  |  | ||||||
|  |                 let mut candidates_count = 0; | ||||||
|  |                 for (stats, task) in operation_stats.into_iter().zip(&mut tasks) { | ||||||
|  |                     candidates_count += stats.document_count; | ||||||
|  |                     match stats.error { | ||||||
|  |                         Some(error) => { | ||||||
|  |                             task.status = Status::Failed; | ||||||
|  |                             task.error = Some(milli::Error::UserError(error).into()); | ||||||
|  |                         } | ||||||
|  |                         None => task.status = Status::Succeeded, | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     task.details = match task.details { | ||||||
|  |                         Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => { | ||||||
|  |                             Some(Details::DocumentAdditionOrUpdate { | ||||||
|  |                                 received_documents, | ||||||
|  |                                 indexed_documents: Some(stats.document_count), | ||||||
|  |                             }) | ||||||
|  |                         } | ||||||
|  |                         Some(Details::DocumentDeletion { provided_ids, .. }) => { | ||||||
|  |                             Some(Details::DocumentDeletion { | ||||||
|  |                                 provided_ids, | ||||||
|  |                                 deleted_documents: Some(stats.document_count), | ||||||
|  |                             }) | ||||||
|  |                         } | ||||||
|  |                         _ => { | ||||||
|  |                             // In the case of a `documentAdditionOrUpdate` or `DocumentDeletion` | ||||||
|  |                             // the details MUST be set to either addition or deletion | ||||||
|  |                             unreachable!(); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 progress.update_progress(DocumentOperationProgress::Indexing); | ||||||
|  |                 if tasks.iter().any(|res| res.error.is_none()) { | ||||||
|  |                     indexer::index( | ||||||
|  |                         index_wtxn, | ||||||
|  |                         index, | ||||||
|  |                         pool, | ||||||
|  |                         indexer_config.grenad_parameters(), | ||||||
|  |                         &db_fields_ids_map, | ||||||
|  |                         new_fields_ids_map, | ||||||
|  |                         primary_key, | ||||||
|  |                         &document_changes, | ||||||
|  |                         embedders, | ||||||
|  |                         &|| must_stop_processing.get(), | ||||||
|  |                         &progress, | ||||||
|  |                     ) | ||||||
|  |                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||||
|  |  | ||||||
|  |                     let addition = DocumentAdditionResult { | ||||||
|  |                         indexed_documents: candidates_count, | ||||||
|  |                         number_of_documents: index | ||||||
|  |                             .number_of_documents(index_wtxn) | ||||||
|  |                             .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |             IndexOperation::DocumentEdition { index_uid, mut task } => { | ||||||
|  |                 progress.update_progress(DocumentEditionProgress::RetrievingConfig); | ||||||
|  |  | ||||||
|  |                 let (filter, code) = if let KindWithContent::DocumentEdition { | ||||||
|  |                     filter_expr, | ||||||
|  |                     context: _, | ||||||
|  |                     function, | ||||||
|  |                     .. | ||||||
|  |                 } = &task.kind | ||||||
|  |                 { | ||||||
|  |                     (filter_expr, function) | ||||||
|  |                 } else { | ||||||
|  |                     unreachable!() | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 let candidates = match filter.as_ref().map(Filter::from_json) { | ||||||
|  |                     Some(Ok(Some(filter))) => filter | ||||||
|  |                         .evaluate(index_wtxn, index) | ||||||
|  |                         .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, | ||||||
|  |                     None | Some(Ok(None)) => index.documents_ids(index_wtxn)?, | ||||||
|  |                     Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))), | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 let (original_filter, context, function) = if let Some(Details::DocumentEdition { | ||||||
|  |                     original_filter, | ||||||
|  |                     context, | ||||||
|  |                     function, | ||||||
|  |                     .. | ||||||
|  |                 }) = task.details | ||||||
|  |                 { | ||||||
|  |                     (original_filter, context, function) | ||||||
|  |                 } else { | ||||||
|  |                     // In the case of a `documentEdition` the details MUST be set | ||||||
|  |                     unreachable!(); | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 if candidates.is_empty() { | ||||||
|  |                     task.status = Status::Succeeded; | ||||||
|  |                     task.details = Some(Details::DocumentEdition { | ||||||
|  |                         original_filter, | ||||||
|  |                         context, | ||||||
|  |                         function, | ||||||
|  |                         deleted_documents: Some(0), | ||||||
|  |                         edited_documents: Some(0), | ||||||
|  |                     }); | ||||||
|  |  | ||||||
|  |                     return Ok(vec![task]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let rtxn = index.read_txn()?; | ||||||
|  |                 let db_fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|  |                 let mut new_fields_ids_map = db_fields_ids_map.clone(); | ||||||
|  |                 // candidates not empty => index not empty => a primary key is set | ||||||
|  |                 let primary_key = index.primary_key(&rtxn)?.unwrap(); | ||||||
|  |  | ||||||
|  |                 let primary_key = | ||||||
|  |                     PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) | ||||||
|  |                         .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; | ||||||
|  |  | ||||||
|  |                 let result_count = Ok((candidates.len(), candidates.len())) as Result<_>; | ||||||
|  |  | ||||||
|  |                 if task.error.is_none() { | ||||||
|  |                     let local_pool; | ||||||
|  |                     let indexer_config = self.index_mapper.indexer_config(); | ||||||
|  |                     let pool = match &indexer_config.thread_pool { | ||||||
|  |                         Some(pool) => pool, | ||||||
|  |                         None => { | ||||||
|  |                             local_pool = ThreadPoolNoAbortBuilder::new() | ||||||
|  |                                 .thread_name(|i| format!("indexing-thread-{i}")) | ||||||
|  |                                 .build() | ||||||
|  |                                 .unwrap(); | ||||||
|  |                             &local_pool | ||||||
|  |                         } | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     let candidates_count = candidates.len(); | ||||||
|  |                     progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges); | ||||||
|  |                     let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone()); | ||||||
|  |                     let document_changes = pool | ||||||
|  |                         .install(|| { | ||||||
|  |                             indexer | ||||||
|  |                                 .into_changes(&primary_key) | ||||||
|  |                                 .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))) | ||||||
|  |                         }) | ||||||
|  |                         .unwrap()?; | ||||||
|  |                     let embedders = index | ||||||
|  |                         .embedding_configs(index_wtxn) | ||||||
|  |                         .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||||
|  |                     let embedders = self.embedders(index_uid.clone(), embedders)?; | ||||||
|  |  | ||||||
|  |                     progress.update_progress(DocumentEditionProgress::Indexing); | ||||||
|  |                     indexer::index( | ||||||
|  |                         index_wtxn, | ||||||
|  |                         index, | ||||||
|  |                         pool, | ||||||
|  |                         indexer_config.grenad_parameters(), | ||||||
|  |                         &db_fields_ids_map, | ||||||
|  |                         new_fields_ids_map, | ||||||
|  |                         None, // cannot change primary key in DocumentEdition | ||||||
|  |                         &document_changes, | ||||||
|  |                         embedders, | ||||||
|  |                         &|| must_stop_processing.get(), | ||||||
|  |                         &progress, | ||||||
|  |                     ) | ||||||
|  |                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||||
|  |  | ||||||
|  |                     let addition = DocumentAdditionResult { | ||||||
|  |                         indexed_documents: candidates_count, | ||||||
|  |                         number_of_documents: index | ||||||
|  |                             .number_of_documents(index_wtxn) | ||||||
|  |                             .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 match result_count { | ||||||
|  |                     Ok((deleted_documents, edited_documents)) => { | ||||||
|  |                         task.status = Status::Succeeded; | ||||||
|  |                         task.details = Some(Details::DocumentEdition { | ||||||
|  |                             original_filter, | ||||||
|  |                             context, | ||||||
|  |                             function, | ||||||
|  |                             deleted_documents: Some(deleted_documents), | ||||||
|  |                             edited_documents: Some(edited_documents), | ||||||
|  |                         }); | ||||||
|  |                     } | ||||||
|  |                     Err(e) => { | ||||||
|  |                         task.status = Status::Failed; | ||||||
|  |                         task.details = Some(Details::DocumentEdition { | ||||||
|  |                             original_filter, | ||||||
|  |                             context, | ||||||
|  |                             function, | ||||||
|  |                             deleted_documents: Some(0), | ||||||
|  |                             edited_documents: Some(0), | ||||||
|  |                         }); | ||||||
|  |                         task.error = Some(e.into()); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(vec![task]) | ||||||
|  |             } | ||||||
|  |             IndexOperation::DocumentDeletion { mut tasks, index_uid } => { | ||||||
|  |                 progress.update_progress(DocumentDeletionProgress::RetrievingConfig); | ||||||
|  |  | ||||||
|  |                 let mut to_delete = RoaringBitmap::new(); | ||||||
|  |                 let external_documents_ids = index.external_documents_ids(); | ||||||
|  |  | ||||||
|  |                 for task in tasks.iter_mut() { | ||||||
|  |                     let before = to_delete.len(); | ||||||
|  |                     task.status = Status::Succeeded; | ||||||
|  |  | ||||||
|  |                     match &task.kind { | ||||||
|  |                         KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { | ||||||
|  |                             for id in documents_ids { | ||||||
|  |                                 if let Some(id) = external_documents_ids.get(index_wtxn, id)? { | ||||||
|  |                                     to_delete.insert(id); | ||||||
|  |                                 } | ||||||
|  |                             } | ||||||
|  |                             let will_be_removed = to_delete.len() - before; | ||||||
|  |                             task.details = Some(Details::DocumentDeletion { | ||||||
|  |                                 provided_ids: documents_ids.len(), | ||||||
|  |                                 deleted_documents: Some(will_be_removed), | ||||||
|  |                             }); | ||||||
|  |                         } | ||||||
|  |                         KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => { | ||||||
|  |                             let before = to_delete.len(); | ||||||
|  |                             let filter = match Filter::from_json(filter_expr) { | ||||||
|  |                                 Ok(filter) => filter, | ||||||
|  |                                 Err(err) => { | ||||||
|  |                                     // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens | ||||||
|  |                                     task.status = Status::Failed; | ||||||
|  |                                     task.error = Some( | ||||||
|  |                                         Error::from_milli(err, Some(index_uid.clone())).into(), | ||||||
|  |                                     ); | ||||||
|  |                                     None | ||||||
|  |                                 } | ||||||
|  |                             }; | ||||||
|  |                             if let Some(filter) = filter { | ||||||
|  |                                 let candidates = filter | ||||||
|  |                                     .evaluate(index_wtxn, index) | ||||||
|  |                                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))); | ||||||
|  |                                 match candidates { | ||||||
|  |                                     Ok(candidates) => to_delete |= candidates, | ||||||
|  |                                     Err(err) => { | ||||||
|  |                                         task.status = Status::Failed; | ||||||
|  |                                         task.error = Some(err.into()); | ||||||
|  |                                     } | ||||||
|  |                                 }; | ||||||
|  |                             } | ||||||
|  |                             let will_be_removed = to_delete.len() - before; | ||||||
|  |                             if let Some(Details::DocumentDeletionByFilter { | ||||||
|  |                                 original_filter: _, | ||||||
|  |                                 deleted_documents, | ||||||
|  |                             }) = &mut task.details | ||||||
|  |                             { | ||||||
|  |                                 *deleted_documents = Some(will_be_removed); | ||||||
|  |                             } else { | ||||||
|  |                                 // In the case of a `documentDeleteByFilter` the details MUST be set | ||||||
|  |                                 unreachable!() | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                         _ => unreachable!(), | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 if to_delete.is_empty() { | ||||||
|  |                     return Ok(tasks); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let rtxn = index.read_txn()?; | ||||||
|  |                 let db_fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|  |                 let mut new_fields_ids_map = db_fields_ids_map.clone(); | ||||||
|  |  | ||||||
|  |                 // to_delete not empty => index not empty => primary key set | ||||||
|  |                 let primary_key = index.primary_key(&rtxn)?.unwrap(); | ||||||
|  |  | ||||||
|  |                 let primary_key = | ||||||
|  |                     PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) | ||||||
|  |                         .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; | ||||||
|  |  | ||||||
|  |                 if !tasks.iter().all(|res| res.error.is_some()) { | ||||||
|  |                     let local_pool; | ||||||
|  |                     let indexer_config = self.index_mapper.indexer_config(); | ||||||
|  |                     let pool = match &indexer_config.thread_pool { | ||||||
|  |                         Some(pool) => pool, | ||||||
|  |                         None => { | ||||||
|  |                             local_pool = ThreadPoolNoAbortBuilder::new() | ||||||
|  |                                 .thread_name(|i| format!("indexing-thread-{i}")) | ||||||
|  |                                 .build() | ||||||
|  |                                 .unwrap(); | ||||||
|  |                             &local_pool | ||||||
|  |                         } | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     progress.update_progress(DocumentDeletionProgress::DeleteDocuments); | ||||||
|  |                     let mut indexer = indexer::DocumentDeletion::new(); | ||||||
|  |                     let candidates_count = to_delete.len(); | ||||||
|  |                     indexer.delete_documents_by_docids(to_delete); | ||||||
|  |                     let document_changes = indexer.into_changes(&indexer_alloc, primary_key); | ||||||
|  |                     let embedders = index | ||||||
|  |                         .embedding_configs(index_wtxn) | ||||||
|  |                         .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||||
|  |                     let embedders = self.embedders(index_uid.clone(), embedders)?; | ||||||
|  |  | ||||||
|  |                     progress.update_progress(DocumentDeletionProgress::Indexing); | ||||||
|  |                     indexer::index( | ||||||
|  |                         index_wtxn, | ||||||
|  |                         index, | ||||||
|  |                         pool, | ||||||
|  |                         indexer_config.grenad_parameters(), | ||||||
|  |                         &db_fields_ids_map, | ||||||
|  |                         new_fields_ids_map, | ||||||
|  |                         None, // document deletion never changes primary key | ||||||
|  |                         &document_changes, | ||||||
|  |                         embedders, | ||||||
|  |                         &|| must_stop_processing.get(), | ||||||
|  |                         &progress, | ||||||
|  |                     ) | ||||||
|  |                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||||
|  |  | ||||||
|  |                     let addition = DocumentAdditionResult { | ||||||
|  |                         indexed_documents: candidates_count, | ||||||
|  |                         number_of_documents: index | ||||||
|  |                             .number_of_documents(index_wtxn) | ||||||
|  |                             .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |             IndexOperation::Settings { index_uid, settings, mut tasks } => { | ||||||
|  |                 progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); | ||||||
|  |                 let indexer_config = self.index_mapper.indexer_config(); | ||||||
|  |                 let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); | ||||||
|  |  | ||||||
|  |                 for (task, (_, settings)) in tasks.iter_mut().zip(settings) { | ||||||
|  |                     let checked_settings = settings.clone().check(); | ||||||
|  |                     task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); | ||||||
|  |                     apply_settings_to_builder(&checked_settings, &mut builder); | ||||||
|  |  | ||||||
|  |                     // We can apply the status right now and if an update fail later | ||||||
|  |                     // the whole batch will be marked as failed. | ||||||
|  |                     task.status = Status::Succeeded; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 progress.update_progress(SettingsProgress::ApplyTheSettings); | ||||||
|  |                 builder | ||||||
|  |                     .execute( | ||||||
|  |                         |indexing_step| tracing::debug!(update = ?indexing_step), | ||||||
|  |                         || must_stop_processing.get(), | ||||||
|  |                     ) | ||||||
|  |                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||||
|  |  | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |             IndexOperation::DocumentClearAndSetting { | ||||||
|  |                 index_uid, | ||||||
|  |                 cleared_tasks, | ||||||
|  |                 settings, | ||||||
|  |                 settings_tasks, | ||||||
|  |             } => { | ||||||
|  |                 let mut import_tasks = self.apply_index_operation( | ||||||
|  |                     index_wtxn, | ||||||
|  |                     index, | ||||||
|  |                     IndexOperation::DocumentClear { | ||||||
|  |                         index_uid: index_uid.clone(), | ||||||
|  |                         tasks: cleared_tasks, | ||||||
|  |                     }, | ||||||
|  |                     progress.clone(), | ||||||
|  |                 )?; | ||||||
|  |  | ||||||
|  |                 let settings_tasks = self.apply_index_operation( | ||||||
|  |                     index_wtxn, | ||||||
|  |                     index, | ||||||
|  |                     IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, | ||||||
|  |                     progress, | ||||||
|  |                 )?; | ||||||
|  |  | ||||||
|  |                 let mut tasks = settings_tasks; | ||||||
|  |                 tasks.append(&mut import_tasks); | ||||||
|  |                 Ok(tasks) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -0,0 +1,134 @@ | |||||||
|  | use std::ffi::OsStr; | ||||||
|  | use std::fs; | ||||||
|  | use std::sync::atomic::Ordering; | ||||||
|  |  | ||||||
|  | use meilisearch_types::heed::CompactionOption; | ||||||
|  | use meilisearch_types::milli::progress::Progress; | ||||||
|  | use meilisearch_types::milli::{self}; | ||||||
|  | use meilisearch_types::tasks::{Status, Task}; | ||||||
|  | use meilisearch_types::{compression, VERSION_FILE_NAME}; | ||||||
|  |  | ||||||
|  | use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress, VariableNameStep}; | ||||||
|  | use crate::{Error, IndexScheduler, Result}; | ||||||
|  |  | ||||||
|  | impl IndexScheduler { | ||||||
|  |     pub(super) fn process_snapshot( | ||||||
|  |         &self, | ||||||
|  |         progress: Progress, | ||||||
|  |         mut tasks: Vec<Task>, | ||||||
|  |     ) -> Result<Vec<Task>> { | ||||||
|  |         progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation); | ||||||
|  |  | ||||||
|  |         fs::create_dir_all(&self.scheduler.snapshots_path)?; | ||||||
|  |         let temp_snapshot_dir = tempfile::tempdir()?; | ||||||
|  |  | ||||||
|  |         // 1. Snapshot the version file. | ||||||
|  |         let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME); | ||||||
|  |         fs::copy(&self.scheduler.version_file_path, dst)?; | ||||||
|  |  | ||||||
|  |         // 2. Snapshot the index-scheduler LMDB env | ||||||
|  |         // | ||||||
|  |         // When we call copy_to_file, LMDB opens a read transaction by itself, | ||||||
|  |         // we can't provide our own. It is an issue as we would like to know | ||||||
|  |         // the update files to copy but new ones can be enqueued between the copy | ||||||
|  |         // of the env and the new transaction we open to retrieve the enqueued tasks. | ||||||
|  |         // So we prefer opening a new transaction after copying the env and copy more | ||||||
|  |         // update files than not enough. | ||||||
|  |         // | ||||||
|  |         // Note that there cannot be any update files deleted between those | ||||||
|  |         // two read operations as the task processing is synchronous. | ||||||
|  |  | ||||||
|  |         // 2.1 First copy the LMDB env of the index-scheduler | ||||||
|  |         progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); | ||||||
|  |         let dst = temp_snapshot_dir.path().join("tasks"); | ||||||
|  |         fs::create_dir_all(&dst)?; | ||||||
|  |         self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; | ||||||
|  |  | ||||||
|  |         // 2.2 Create a read transaction on the index-scheduler | ||||||
|  |         let rtxn = self.env.read_txn()?; | ||||||
|  |  | ||||||
|  |         // 2.3 Create the update files directory | ||||||
|  |         let update_files_dir = temp_snapshot_dir.path().join("update_files"); | ||||||
|  |         fs::create_dir_all(&update_files_dir)?; | ||||||
|  |  | ||||||
|  |         // 2.4 Only copy the update files of the enqueued tasks | ||||||
|  |         progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles); | ||||||
|  |         let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?; | ||||||
|  |         let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32); | ||||||
|  |         progress.update_progress(update_file_progress); | ||||||
|  |         for task_id in enqueued { | ||||||
|  |             let task = | ||||||
|  |                 self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||||
|  |             if let Some(content_uuid) = task.content_uuid() { | ||||||
|  |                 let src = self.queue.file_store.get_update_path(content_uuid); | ||||||
|  |                 let dst = update_files_dir.join(content_uuid.to_string()); | ||||||
|  |                 fs::copy(src, dst)?; | ||||||
|  |             } | ||||||
|  |             atomic.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 3. Snapshot every indexes | ||||||
|  |         progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes); | ||||||
|  |         let index_mapping = self.index_mapper.index_mapping; | ||||||
|  |         let nb_indexes = index_mapping.len(&rtxn)? as u32; | ||||||
|  |  | ||||||
|  |         for (i, result) in index_mapping.iter(&rtxn)?.enumerate() { | ||||||
|  |             let (name, uuid) = result?; | ||||||
|  |             progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes)); | ||||||
|  |             let index = self.index_mapper.index(&rtxn, name)?; | ||||||
|  |             let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); | ||||||
|  |             fs::create_dir_all(&dst)?; | ||||||
|  |             index | ||||||
|  |                 .copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) | ||||||
|  |                 .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         drop(rtxn); | ||||||
|  |  | ||||||
|  |         // 4. Snapshot the auth LMDB env | ||||||
|  |         progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); | ||||||
|  |         let dst = temp_snapshot_dir.path().join("auth"); | ||||||
|  |         fs::create_dir_all(&dst)?; | ||||||
|  |         // TODO We can't use the open_auth_store_env function here but we should | ||||||
|  |         let auth = unsafe { | ||||||
|  |             milli::heed::EnvOpenOptions::new() | ||||||
|  |                 .map_size(1024 * 1024 * 1024) // 1 GiB | ||||||
|  |                 .max_dbs(2) | ||||||
|  |                 .open(&self.scheduler.auth_path) | ||||||
|  |         }?; | ||||||
|  |         auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; | ||||||
|  |  | ||||||
|  |         // 5. Copy and tarball the flat snapshot | ||||||
|  |         progress.update_progress(SnapshotCreationProgress::CreateTheTarball); | ||||||
|  |         // 5.1 Find the original name of the database | ||||||
|  |         // TODO find a better way to get this path | ||||||
|  |         let mut base_path = self.env.path().to_owned(); | ||||||
|  |         base_path.pop(); | ||||||
|  |         let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms"); | ||||||
|  |  | ||||||
|  |         // 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension | ||||||
|  |         let snapshot_path = self.scheduler.snapshots_path.join(format!("{}.snapshot", db_name)); | ||||||
|  |         let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.scheduler.snapshots_path)?; | ||||||
|  |         compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; | ||||||
|  |         let file = temp_snapshot_file.persist(snapshot_path)?; | ||||||
|  |  | ||||||
|  |         // 5.3 Change the permission to make the snapshot readonly | ||||||
|  |         let mut permissions = file.metadata()?.permissions(); | ||||||
|  |         permissions.set_readonly(true); | ||||||
|  |         #[cfg(unix)] | ||||||
|  |         { | ||||||
|  |             use std::os::unix::fs::PermissionsExt; | ||||||
|  |             #[allow(clippy::non_octal_unix_permissions)] | ||||||
|  |             //                     rwxrwxrwx | ||||||
|  |             permissions.set_mode(0b100100100); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         file.set_permissions(permissions)?; | ||||||
|  |  | ||||||
|  |         for task in &mut tasks { | ||||||
|  |             task.status = Status::Succeeded; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(tasks) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| expression: task.details | expression: task.details | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "embedders": { |   "embedders": { | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| expression: config.embedder_options | expression: config.embedder_options | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "Rest": { |   "Rest": { | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| expression: task.details | expression: task.details | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "embedders": { |   "embedders": { | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| expression: doc | expression: doc | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "doggo": "Intel", |   "doggo": "Intel", | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| expression: task.details | expression: task.details | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "embedders": { |   "embedders": { | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| expression: doc | expression: doc | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "doggo": "kefir", |   "doggo": "kefir", | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| expression: fakerest_config.embedder_options | expression: fakerest_config.embedder_options | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "Rest": { |   "Rest": { | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| expression: simple_hf_config.embedder_options | expression: simple_hf_config.embedder_options | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "HuggingFace": { |   "HuggingFace": { | ||||||
| @@ -1,6 +1,7 @@ | |||||||
| --- | --- | ||||||
| source: index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| expression: task.details | expression: task.details | ||||||
|  | snapshot_kind: text | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "embedders": { |   "embedders": { | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = false | ### Autobatching Enabled = false | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/lib.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text | snapshot_kind: text | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user