mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Progress, in the task queue
This commit is contained in:
		| @@ -22,7 +22,8 @@ use std::ffi::OsStr; | ||||
| use std::fmt; | ||||
| use std::fs::{self, File}; | ||||
| use std::io::BufWriter; | ||||
| use std::sync::atomic::{self, AtomicU16, AtomicU32}; | ||||
| use std::sync::atomic::{self, AtomicU64}; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use bumpalo::collections::CollectIn; | ||||
| use bumpalo::Bump; | ||||
| @@ -31,7 +32,6 @@ use meilisearch_types::error::Code; | ||||
| use meilisearch_types::heed::{RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; | ||||
| use meilisearch_types::milli::heed::CompactionOption; | ||||
| use meilisearch_types::milli::update::new::indexer::document_changes::Progress; | ||||
| use meilisearch_types::milli::update::new::indexer::{ | ||||
|     self, retrieve_or_guess_primary_key, UpdateByFunction, | ||||
| }; | ||||
| @@ -531,7 +531,7 @@ impl IndexScheduler { | ||||
|         if let Some(task_id) = to_cancel.max() { | ||||
|             // We retrieve the tasks that were processing before this tasks cancelation started. | ||||
|             // We must *not* reset the processing tasks before calling this method. | ||||
|             let ProcessingTasks { started_at, processing } = | ||||
|             let ProcessingTasks { started_at, processing, progress: _ } = | ||||
|                 &*self.processing_tasks.read().unwrap(); | ||||
|             return Ok(Some(Batch::TaskCancelation { | ||||
|                 task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?, | ||||
| @@ -1223,38 +1223,28 @@ impl IndexScheduler { | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         let indexer_alloc = Bump::new(); | ||||
|  | ||||
|         let last_finished_steps = AtomicU16::new(0); | ||||
|         let last_finished_documents = AtomicU32::new(0); | ||||
|         let started_processing_at = std::time::Instant::now(); | ||||
|         let secs_since_started_processing_at = AtomicU64::new(0); | ||||
|         const PRINT_SECS_DELTA: u64 = 1; | ||||
|  | ||||
|         let send_progress = | ||||
|             |Progress { finished_steps, total_steps, step_name, finished_total_documents }| { | ||||
|                 /* | ||||
|                 let current = rayon::current_thread_index(); | ||||
|         let processing_tasks = self.processing_tasks.clone(); | ||||
|  | ||||
|                 let last_finished_steps = | ||||
|                     last_finished_steps.fetch_max(finished_steps, atomic::Ordering::Relaxed); | ||||
|         let must_stop_processing = self.must_stop_processing.clone(); | ||||
|  | ||||
|                 if last_finished_steps > finished_steps { | ||||
|         let send_progress = |progress| { | ||||
|             let now = std::time::Instant::now(); | ||||
|             let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed); | ||||
|             let previous = started_processing_at + Duration::from_secs(elapsed); | ||||
|             let elapsed = now - previous; | ||||
|  | ||||
|             if elapsed.as_secs() < PRINT_SECS_DELTA { | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|                 if let Some((finished_documents, total_documents)) = finished_total_documents { | ||||
|                     if last_finished_steps < finished_steps { | ||||
|                         last_finished_documents.store(finished_documents, atomic::Ordering::Relaxed); | ||||
|                     } else { | ||||
|                         let last_finished_documents = last_finished_documents | ||||
|                             .fetch_max(finished_documents, atomic::Ordering::Relaxed); | ||||
|                         if last_finished_documents > finished_documents { | ||||
|                             return; | ||||
|                         } | ||||
|                     } | ||||
|                     tracing::warn!("Progress from {current:?}: {step_name} ({finished_steps}/{total_steps}), document {finished_documents}/{total_documents}") | ||||
|                 } else { | ||||
|                     tracing::warn!( | ||||
|                         "Progress from {current:?}: {step_name} ({finished_steps}/{total_steps})" | ||||
|                     ) | ||||
|                 } | ||||
|                 */ | ||||
|             secs_since_started_processing_at | ||||
|                 .store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed); | ||||
|  | ||||
|             processing_tasks.write().unwrap().update_progress(progress); | ||||
|         }; | ||||
|  | ||||
|         match operation { | ||||
| @@ -1286,8 +1276,6 @@ impl IndexScheduler { | ||||
|                 operations, | ||||
|                 mut tasks, | ||||
|             } => { | ||||
|                 let started_processing_at = std::time::Instant::now(); | ||||
|                 let must_stop_processing = self.must_stop_processing.clone(); | ||||
|                 let indexer_config = self.index_mapper.indexer_config(); | ||||
|                 // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. | ||||
|                 // this is made difficult by the fact we're doing private clones of the index scheduler and sending it | ||||
| @@ -1503,7 +1491,6 @@ impl IndexScheduler { | ||||
|                     let document_changes = indexer.into_changes(&primary_key)?; | ||||
|                     let embedders = index.embedding_configs(index_wtxn)?; | ||||
|                     let embedders = self.embedders(embedders)?; | ||||
|                     let must_stop_processing = &self.must_stop_processing; | ||||
|  | ||||
|                     indexer::index( | ||||
|                         index_wtxn, | ||||
| @@ -1645,7 +1632,6 @@ impl IndexScheduler { | ||||
|                     let document_changes = indexer.into_changes(&indexer_alloc, primary_key); | ||||
|                     let embedders = index.embedding_configs(index_wtxn)?; | ||||
|                     let embedders = self.embedders(embedders)?; | ||||
|                     let must_stop_processing = &self.must_stop_processing; | ||||
|  | ||||
|                     indexer::index( | ||||
|                         index_wtxn, | ||||
| @@ -1679,7 +1665,6 @@ impl IndexScheduler { | ||||
|                     task.status = Status::Succeeded; | ||||
|                 } | ||||
|  | ||||
|                 let must_stop_processing = self.must_stop_processing.clone(); | ||||
|                 builder.execute( | ||||
|                     |indexing_step| tracing::debug!(update = ?indexing_step), | ||||
|                     || must_stop_processing.get(), | ||||
|   | ||||
| @@ -148,6 +148,7 @@ pub fn snapshot_task(task: &Task) -> String { | ||||
|         enqueued_at: _, | ||||
|         started_at: _, | ||||
|         finished_at: _, | ||||
|         progress: _, | ||||
|         error, | ||||
|         canceled_by, | ||||
|         details, | ||||
|   | ||||
| @@ -55,11 +55,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; | ||||
| use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::documents::DocumentsBatchBuilder; | ||||
| use meilisearch_types::milli::index::IndexEmbeddingConfig; | ||||
| use meilisearch_types::milli::update::new::indexer::document_changes::Progress; | ||||
| use meilisearch_types::milli::update::IndexerConfig; | ||||
| use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; | ||||
| use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; | ||||
| use meilisearch_types::task_view::TaskView; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress}; | ||||
| use rayon::current_num_threads; | ||||
| use rayon::prelude::{IntoParallelIterator, ParallelIterator}; | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -161,12 +162,18 @@ struct ProcessingTasks { | ||||
|     started_at: OffsetDateTime, | ||||
|     /// The list of tasks ids that are currently running. | ||||
|     processing: RoaringBitmap, | ||||
|     /// The progress on processing tasks | ||||
|     progress: Option<TaskProgress>, | ||||
| } | ||||
|  | ||||
| impl ProcessingTasks { | ||||
|     /// Creates an empty `ProcessingAt` struct. | ||||
|     fn new() -> ProcessingTasks { | ||||
|         ProcessingTasks { started_at: OffsetDateTime::now_utc(), processing: RoaringBitmap::new() } | ||||
|         ProcessingTasks { | ||||
|             started_at: OffsetDateTime::now_utc(), | ||||
|             processing: RoaringBitmap::new(), | ||||
|             progress: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Stores the currently processing tasks, and the date time at which it started. | ||||
| @@ -175,8 +182,13 @@ impl ProcessingTasks { | ||||
|         self.processing = processing; | ||||
|     } | ||||
|  | ||||
|     fn update_progress(&mut self, progress: Progress) { | ||||
|         self.progress.get_or_insert_with(TaskProgress::default).update(progress); | ||||
|     } | ||||
|  | ||||
|     /// Set the processing tasks to an empty list | ||||
|     fn stop_processing(&mut self) -> RoaringBitmap { | ||||
|         self.progress = None; | ||||
|         std::mem::take(&mut self.processing) | ||||
|     } | ||||
|  | ||||
| @@ -956,7 +968,7 @@ impl IndexScheduler { | ||||
|             tasks.into_iter().rev().take(query.limit.unwrap_or(u32::MAX) as usize), | ||||
|         )?; | ||||
|  | ||||
|         let ProcessingTasks { started_at, processing, .. } = | ||||
|         let ProcessingTasks { started_at, processing, progress, .. } = | ||||
|             self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone(); | ||||
|  | ||||
|         let ret = tasks.into_iter(); | ||||
| @@ -966,7 +978,12 @@ impl IndexScheduler { | ||||
|             Ok(( | ||||
|                 ret.map(|task| { | ||||
|                     if processing.contains(task.uid) { | ||||
|                         Task { status: Status::Processing, started_at: Some(started_at), ..task } | ||||
|                         Task { | ||||
|                             status: Status::Processing, | ||||
|                             progress: progress.clone(), | ||||
|                             started_at: Some(started_at), | ||||
|                             ..task | ||||
|                         } | ||||
|                     } else { | ||||
|                         task | ||||
|                     } | ||||
| @@ -1008,6 +1025,7 @@ impl IndexScheduler { | ||||
|             enqueued_at: OffsetDateTime::now_utc(), | ||||
|             started_at: None, | ||||
|             finished_at: None, | ||||
|             progress: None, | ||||
|             error: None, | ||||
|             canceled_by: None, | ||||
|             details: kind.default_details(), | ||||
| @@ -1588,6 +1606,8 @@ impl<'a> Dump<'a> { | ||||
|             enqueued_at: task.enqueued_at, | ||||
|             started_at: task.started_at, | ||||
|             finished_at: task.finished_at, | ||||
|             /// FIXME: should we update dump to contain progress information? 🤔 | ||||
|             progress: None, | ||||
|             error: task.error, | ||||
|             canceled_by: task.canceled_by, | ||||
|             details: task.details, | ||||
|   | ||||
| @@ -345,6 +345,8 @@ impl IndexScheduler { | ||||
|                 enqueued_at, | ||||
|                 started_at, | ||||
|                 finished_at, | ||||
|                 /// FIXME: assert something here? ask tamo 🤔 | ||||
|                     progress: _, | ||||
|                 error: _, | ||||
|                 canceled_by, | ||||
|                 details, | ||||
|   | ||||
| @@ -4,7 +4,9 @@ use time::{Duration, OffsetDateTime}; | ||||
|  | ||||
| use crate::error::ResponseError; | ||||
| use crate::settings::{Settings, Unchecked}; | ||||
| use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId}; | ||||
| use crate::tasks::{ | ||||
|     serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId, TaskProgress, | ||||
| }; | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| @@ -27,6 +29,8 @@ pub struct TaskView { | ||||
|     pub started_at: Option<OffsetDateTime>, | ||||
|     #[serde(with = "time::serde::rfc3339::option", default)] | ||||
|     pub finished_at: Option<OffsetDateTime>, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub progress: Option<TaskProgress>, | ||||
| } | ||||
|  | ||||
| impl TaskView { | ||||
| @@ -43,6 +47,7 @@ impl TaskView { | ||||
|             enqueued_at: task.enqueued_at, | ||||
|             started_at: task.started_at, | ||||
|             finished_at: task.finished_at, | ||||
|             progress: task.progress.clone(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -4,6 +4,7 @@ use std::fmt::{Display, Write}; | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use enum_iterator::Sequence; | ||||
| use milli::update::new::indexer::document_changes::Progress; | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use milli::Object; | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -30,6 +31,8 @@ pub struct Task { | ||||
|     #[serde(with = "time::serde::rfc3339::option")] | ||||
|     pub finished_at: Option<OffsetDateTime>, | ||||
|  | ||||
|     pub progress: Option<TaskProgress>, | ||||
|  | ||||
|     pub error: Option<ResponseError>, | ||||
|     pub canceled_by: Option<TaskId>, | ||||
|     pub details: Option<Details>, | ||||
| @@ -38,6 +41,59 @@ pub struct Task { | ||||
|     pub kind: KindWithContent, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct TaskProgress { | ||||
|     pub current_step: String, | ||||
|     pub finished_steps: u16, | ||||
|     pub total_steps: u16, | ||||
|     pub finished_documents: Option<u32>, | ||||
|     pub total_documents: Option<u32>, | ||||
| } | ||||
|  | ||||
| impl Default for TaskProgress { | ||||
|     fn default() -> Self { | ||||
|         Self::new() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TaskProgress { | ||||
|     pub fn new() -> Self { | ||||
|         Self { | ||||
|             current_step: String::new(), | ||||
|             finished_steps: 0, | ||||
|             total_steps: 1, | ||||
|             finished_documents: None, | ||||
|             total_documents: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn update(&mut self, progress: Progress) { | ||||
|         if self.current_step != progress.step_name { | ||||
|             self.current_step.clear(); | ||||
|             self.current_step.push_str(progress.step_name); | ||||
|         } | ||||
|         self.total_steps = progress.total_steps; | ||||
|         if self.finished_steps > progress.finished_steps { | ||||
|             return; | ||||
|         } | ||||
|         if self.finished_steps < progress.finished_steps { | ||||
|             self.finished_documents = None; | ||||
|             self.total_documents = None; | ||||
|         } | ||||
|         self.finished_steps = progress.finished_steps; | ||||
|         if let Some((finished_documents, total_documents)) = progress.finished_total_documents { | ||||
|             if let Some(task_finished_documents) = self.finished_documents { | ||||
|                 if task_finished_documents > finished_documents { | ||||
|                     return; | ||||
|                 } | ||||
|             } | ||||
|             self.finished_documents = Some(finished_documents); | ||||
|             self.total_documents = Some(total_documents); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Task { | ||||
|     pub fn index_uid(&self) -> Option<&str> { | ||||
|         use KindWithContent::*; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user