mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	feat(lib): auto-batching
This commit is contained in:
		| @@ -6,10 +6,10 @@ use anyhow::Context; | ||||
| use heed::{EnvOpenOptions, RoTxn}; | ||||
| use indexmap::IndexMap; | ||||
| use milli::documents::DocumentBatchReader; | ||||
| use milli::update::{IndexDocumentsConfig, IndexerConfig}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::document_formats::read_ndjson; | ||||
| use crate::index::update_handler::UpdateHandler; | ||||
| use crate::index::updates::apply_settings_to_builder; | ||||
|  | ||||
| use super::error::Result; | ||||
| @@ -85,7 +85,7 @@ impl Index { | ||||
|         src: impl AsRef<Path>, | ||||
|         dst: impl AsRef<Path>, | ||||
|         size: usize, | ||||
|         update_handler: &UpdateHandler, | ||||
|         indexer_config: &IndexerConfig, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         let dir_name = src | ||||
|             .as_ref() | ||||
| @@ -110,8 +110,7 @@ impl Index { | ||||
|         let mut txn = index.write_txn()?; | ||||
|  | ||||
|         // Apply settings first | ||||
|         let builder = update_handler.update_builder(); | ||||
|         let mut builder = builder.settings(&mut txn, &index); | ||||
|         let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config); | ||||
|  | ||||
|         if let Some(primary_key) = primary_key { | ||||
|             builder.set_primary_key(primary_key); | ||||
| @@ -140,12 +139,16 @@ impl Index { | ||||
|  | ||||
|             //If the document file is empty, we don't perform the document addition, to prevent | ||||
|             //a primary key error to be thrown. | ||||
|             if !documents_reader.is_empty() { | ||||
|                 let builder = update_handler | ||||
|                     .update_builder() | ||||
|                     .index_documents(&mut txn, &index); | ||||
|                 builder.execute(documents_reader, |_| ())?; | ||||
|             } | ||||
|             let config = IndexDocumentsConfig::default(); | ||||
|             let mut builder = milli::update::IndexDocuments::new( | ||||
|                 &mut txn, | ||||
|                 &index, | ||||
|                 indexer_config, | ||||
|                 config, | ||||
|                 |_| (), | ||||
|             ); | ||||
|             builder.add_documents(documents_reader)?; | ||||
|             builder.execute()?; | ||||
|         } | ||||
|  | ||||
|         txn.commit()?; | ||||
|   | ||||
| @@ -3,7 +3,7 @@ use std::error::Error; | ||||
| use meilisearch_error::{internal_error, Code, ErrorCode}; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use crate::error::MilliError; | ||||
| use crate::{error::MilliError, update_file_store}; | ||||
|  | ||||
| pub type Result<T> = std::result::Result<T, IndexError>; | ||||
|  | ||||
| @@ -23,7 +23,9 @@ internal_error!( | ||||
|     IndexError: std::io::Error, | ||||
|     heed::Error, | ||||
|     fst::Error, | ||||
|     serde_json::Error | ||||
|     serde_json::Error, | ||||
|     update_file_store::UpdateFileStoreError, | ||||
|     milli::documents::Error | ||||
| ); | ||||
|  | ||||
| impl ErrorCode for IndexError { | ||||
|   | ||||
| @@ -7,7 +7,7 @@ use std::sync::Arc; | ||||
|  | ||||
| use chrono::{DateTime, Utc}; | ||||
| use heed::{EnvOpenOptions, RoTxn}; | ||||
| use milli::update::Setting; | ||||
| use milli::update::{IndexerConfig, Setting}; | ||||
| use milli::{obkv_to_json, FieldDistribution, FieldId}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_json::{Map, Value}; | ||||
| @@ -17,7 +17,6 @@ use crate::EnvSizer; | ||||
|  | ||||
| use super::error::IndexError; | ||||
| use super::error::Result; | ||||
| use super::update_handler::UpdateHandler; | ||||
| use super::{Checked, Settings}; | ||||
|  | ||||
| pub type Document = Map<String, Value>; | ||||
| @@ -68,7 +67,7 @@ pub struct Index { | ||||
|     #[derivative(Debug = "ignore")] | ||||
|     pub inner: Arc<milli::Index>, | ||||
|     #[derivative(Debug = "ignore")] | ||||
|     pub update_handler: Arc<UpdateHandler>, | ||||
|     pub indexer_config: Arc<IndexerConfig>, | ||||
| } | ||||
|  | ||||
| impl Deref for Index { | ||||
| @@ -84,7 +83,7 @@ impl Index { | ||||
|         path: impl AsRef<Path>, | ||||
|         size: usize, | ||||
|         uuid: Uuid, | ||||
|         update_handler: Arc<UpdateHandler>, | ||||
|         update_handler: Arc<IndexerConfig>, | ||||
|     ) -> Result<Self> { | ||||
|         log::debug!("opening index in {}", path.as_ref().display()); | ||||
|         create_dir_all(&path)?; | ||||
| @@ -94,7 +93,7 @@ impl Index { | ||||
|         Ok(Index { | ||||
|             inner, | ||||
|             uuid, | ||||
|             update_handler, | ||||
|             indexer_config: update_handler, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,6 @@ pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecke | ||||
| mod dump; | ||||
| pub mod error; | ||||
| mod search; | ||||
| pub mod update_handler; | ||||
| pub mod updates; | ||||
|  | ||||
| #[allow(clippy::module_inception)] | ||||
| @@ -26,6 +25,7 @@ pub mod test { | ||||
|     use std::path::PathBuf; | ||||
|     use std::sync::Arc; | ||||
|  | ||||
|     use milli::update::IndexerConfig; | ||||
|     use milli::update::{DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod}; | ||||
|     use nelson::Mocker; | ||||
|     use serde_json::{Map, Value}; | ||||
| @@ -33,7 +33,6 @@ pub mod test { | ||||
|  | ||||
|     use super::error::Result; | ||||
|     use super::index::Index; | ||||
|     use super::update_handler::UpdateHandler; | ||||
|     use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings}; | ||||
|     use crate::update_file_store::UpdateFileStore; | ||||
|  | ||||
| @@ -52,7 +51,7 @@ pub mod test { | ||||
|             path: impl AsRef<Path>, | ||||
|             size: usize, | ||||
|             uuid: Uuid, | ||||
|             update_handler: Arc<UpdateHandler>, | ||||
|             update_handler: Arc<IndexerConfig>, | ||||
|         ) -> Result<Self> { | ||||
|             let index = Index::open(path, size, uuid, update_handler)?; | ||||
|             Ok(Self::Real(index)) | ||||
| @@ -62,7 +61,7 @@ pub mod test { | ||||
|             src: impl AsRef<Path>, | ||||
|             dst: impl AsRef<Path>, | ||||
|             size: usize, | ||||
|             update_handler: &UpdateHandler, | ||||
|             update_handler: &IndexerConfig, | ||||
|         ) -> anyhow::Result<()> { | ||||
|             Index::load_dump(src, dst, size, update_handler) | ||||
|         } | ||||
| @@ -157,21 +156,18 @@ pub mod test { | ||||
|         pub fn update_documents( | ||||
|             &self, | ||||
|             method: IndexDocumentsMethod, | ||||
|             content_uuid: Uuid, | ||||
|             primary_key: Option<String>, | ||||
|             file_store: UpdateFileStore, | ||||
|             contents: impl Iterator<Item = Uuid>, | ||||
|         ) -> Result<DocumentAdditionResult> { | ||||
|             match self { | ||||
|                 MockIndex::Real(index) => { | ||||
|                     index.update_documents(method, content_uuid, primary_key, file_store) | ||||
|                     index.update_documents(method, primary_key, file_store, contents) | ||||
|                 } | ||||
|                 MockIndex::Mock(mocker) => unsafe { | ||||
|                     mocker.get("update_documents").call(( | ||||
|                         method, | ||||
|                         content_uuid, | ||||
|                         primary_key, | ||||
|                         file_store, | ||||
|                     )) | ||||
|                     mocker | ||||
|                         .get("update_documents") | ||||
|                         .call((method, primary_key, file_store, contents)) | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|   | ||||
| @@ -295,7 +295,7 @@ fn compute_value_matches<'a, A: AsRef<[u8]>>( | ||||
|             let mut start = 0; | ||||
|             for (word, token) in analyzed.reconstruct() { | ||||
|                 if token.is_word() { | ||||
|                     if let Some(length) = matcher.matches(token.text()) { | ||||
|                     if let Some(length) = matcher.matches(&token) { | ||||
|                         infos.push(MatchInfo { start, length }); | ||||
|                     } | ||||
|                 } | ||||
| @@ -486,18 +486,18 @@ fn format_fields<A: AsRef<[u8]>>( | ||||
|  | ||||
| /// trait to allow unit testing of `format_fields` | ||||
| trait Matcher { | ||||
|     fn matches(&self, w: &str) -> Option<usize>; | ||||
|     fn matches(&self, w: &Token) -> Option<usize>; | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| impl Matcher for BTreeMap<&str, Option<usize>> { | ||||
|     fn matches(&self, w: &str) -> Option<usize> { | ||||
|         self.get(w).cloned().flatten() | ||||
|     fn matches(&self, w: &Token) -> Option<usize> { | ||||
|         self.get(w.text()).cloned().flatten() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Matcher for MatchingWords { | ||||
|     fn matches(&self, w: &str) -> Option<usize> { | ||||
|     fn matches(&self, w: &Token) -> Option<usize> { | ||||
|         self.matching_bytes(w) | ||||
|     } | ||||
| } | ||||
| @@ -579,7 +579,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { | ||||
|                 let mut tokens = analyzed.reconstruct().peekable(); | ||||
|  | ||||
|                 while let Some((word, token)) = | ||||
|                     tokens.next_if(|(_, token)| matcher.matches(token.text()).is_none()) | ||||
|                     tokens.next_if(|(_, token)| matcher.matches(token).is_none()) | ||||
|                 { | ||||
|                     buffer.push((word, token)); | ||||
|                 } | ||||
| @@ -623,7 +623,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { | ||||
|             // Check if we need to do highlighting or computed matches before calling | ||||
|             // Matcher::match since the call is expensive. | ||||
|             if format_options.highlight && token.is_word() { | ||||
|                 if let Some(length) = matcher.matches(token.text()) { | ||||
|                 if let Some(length) = matcher.matches(&token) { | ||||
|                     match word.get(..length).zip(word.get(length..)) { | ||||
|                         Some((head, tail)) => { | ||||
|                             out.push_str(&self.marks.0); | ||||
| @@ -653,7 +653,7 @@ fn parse_filter(facets: &Value) -> Result<Option<Filter>> { | ||||
|     match facets { | ||||
|         Value::String(expr) => { | ||||
|             let condition = Filter::from_str(expr)?; | ||||
|             Ok(Some(condition)) | ||||
|             Ok(condition) | ||||
|         } | ||||
|         Value::Array(arr) => parse_filter_array(arr), | ||||
|         v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()), | ||||
|   | ||||
| @@ -1,49 +0,0 @@ | ||||
| use milli::update::UpdateBuilder; | ||||
| use milli::CompressionType; | ||||
| use rayon::ThreadPool; | ||||
|  | ||||
| use crate::options::IndexerOpts; | ||||
|  | ||||
| pub struct UpdateHandler { | ||||
|     max_nb_chunks: Option<usize>, | ||||
|     chunk_compression_level: Option<u32>, | ||||
|     thread_pool: ThreadPool, | ||||
|     log_frequency: usize, | ||||
|     max_memory: Option<usize>, | ||||
|     chunk_compression_type: CompressionType, | ||||
| } | ||||
|  | ||||
| impl UpdateHandler { | ||||
|     pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> { | ||||
|         let thread_pool = rayon::ThreadPoolBuilder::new() | ||||
|             .num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2)) | ||||
|             .build()?; | ||||
|  | ||||
|         Ok(Self { | ||||
|             max_nb_chunks: opt.max_nb_chunks, | ||||
|             chunk_compression_level: opt.chunk_compression_level, | ||||
|             thread_pool, | ||||
|             log_frequency: opt.log_every_n, | ||||
|             max_memory: opt.max_memory.map(|m| m.get_bytes() as usize), | ||||
|             chunk_compression_type: opt.chunk_compression_type, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn update_builder(&self) -> UpdateBuilder { | ||||
|         // We prepare the update by using the update builder. | ||||
|         let mut update_builder = UpdateBuilder::new(); | ||||
|         if let Some(max_nb_chunks) = self.max_nb_chunks { | ||||
|             update_builder.max_nb_chunks(max_nb_chunks); | ||||
|         } | ||||
|         if let Some(chunk_compression_level) = self.chunk_compression_level { | ||||
|             update_builder.chunk_compression_level(chunk_compression_level); | ||||
|         } | ||||
|         update_builder.thread_pool(&self.thread_pool); | ||||
|         update_builder.log_every_n(self.log_frequency); | ||||
|         if let Some(max_memory) = self.max_memory { | ||||
|             update_builder.max_memory(max_memory); | ||||
|         } | ||||
|         update_builder.chunk_compression_type(self.chunk_compression_type); | ||||
|         update_builder | ||||
|     } | ||||
| } | ||||
| @@ -5,7 +5,8 @@ use std::num::NonZeroUsize; | ||||
| use log::{debug, info, trace}; | ||||
| use milli::documents::DocumentBatchReader; | ||||
| use milli::update::{ | ||||
|     DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, Setting, | ||||
|     DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, | ||||
|     Setting, | ||||
| }; | ||||
| use serde::{Deserialize, Serialize, Serializer}; | ||||
| use uuid::Uuid; | ||||
| @@ -178,7 +179,7 @@ impl Index { | ||||
|         txn: &mut heed::RwTxn<'a, 'b>, | ||||
|         primary_key: String, | ||||
|     ) -> Result<IndexMeta> { | ||||
|         let mut builder = self.update_handler.update_builder().settings(txn, self); | ||||
|         let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref()); | ||||
|         builder.set_primary_key(primary_key); | ||||
|         builder.execute(|_| ())?; | ||||
|         let meta = IndexMeta::new_txn(self, txn)?; | ||||
| @@ -197,10 +198,7 @@ impl Index { | ||||
|     /// Deletes `ids` from the index, and returns how many documents were deleted. | ||||
|     pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> { | ||||
|         let mut txn = self.write_txn()?; | ||||
|         let mut builder = self | ||||
|             .update_handler | ||||
|             .update_builder() | ||||
|             .delete_documents(&mut txn, self)?; | ||||
|         let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?; | ||||
|  | ||||
|         // We ignore unexisting document ids | ||||
|         ids.iter().for_each(|id| { | ||||
| @@ -216,11 +214,7 @@ impl Index { | ||||
|  | ||||
|     pub fn clear_documents(&self) -> Result<()> { | ||||
|         let mut txn = self.write_txn()?; | ||||
|         self.update_handler | ||||
|             .update_builder() | ||||
|             .clear_documents(&mut txn, self) | ||||
|             .execute()?; | ||||
|  | ||||
|         milli::update::ClearDocuments::new(&mut txn, self).execute()?; | ||||
|         txn.commit()?; | ||||
|  | ||||
|         Ok(()) | ||||
| @@ -229,9 +223,9 @@ impl Index { | ||||
|     pub fn update_documents( | ||||
|         &self, | ||||
|         method: IndexDocumentsMethod, | ||||
|         content_uuid: Uuid, | ||||
|         primary_key: Option<String>, | ||||
|         file_store: UpdateFileStore, | ||||
|         contents: impl IntoIterator<Item = Uuid>, | ||||
|     ) -> Result<DocumentAdditionResult> { | ||||
|         trace!("performing document addition"); | ||||
|         let mut txn = self.write_txn()?; | ||||
| @@ -242,17 +236,27 @@ impl Index { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let config = IndexDocumentsConfig { | ||||
|             update_method: method, | ||||
|             ..Default::default() | ||||
|         }; | ||||
|  | ||||
|         let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step); | ||||
|         let mut builder = milli::update::IndexDocuments::new( | ||||
|             &mut txn, | ||||
|             self, | ||||
|             self.indexer_config.as_ref(), | ||||
|             config, | ||||
|             indexing_callback, | ||||
|         ); | ||||
|  | ||||
|         let content_file = file_store.get_update(content_uuid).unwrap(); | ||||
|         let reader = DocumentBatchReader::from_reader(content_file).unwrap(); | ||||
|         for content_uuid in contents.into_iter() { | ||||
|             let content_file = file_store.get_update(content_uuid)?; | ||||
|             let reader = DocumentBatchReader::from_reader(content_file)?; | ||||
|             builder.add_documents(reader)?; | ||||
|         } | ||||
|  | ||||
|         let mut builder = self | ||||
|             .update_handler | ||||
|             .update_builder() | ||||
|             .index_documents(&mut txn, self); | ||||
|         builder.index_documents_method(method); | ||||
|         let addition = builder.execute(reader, indexing_callback)?; | ||||
|         let addition = builder.execute()?; | ||||
|  | ||||
|         txn.commit()?; | ||||
|  | ||||
| @@ -264,10 +268,8 @@ impl Index { | ||||
|     pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut txn = self.write_txn()?; | ||||
|         let mut builder = self | ||||
|             .update_handler | ||||
|             .update_builder() | ||||
|             .settings(&mut txn, self); | ||||
|         let mut builder = | ||||
|             milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref()); | ||||
|  | ||||
|         apply_settings_to_builder(settings, &mut builder); | ||||
|  | ||||
|   | ||||
| @@ -10,7 +10,7 @@ use tokio::sync::{mpsc, oneshot, RwLock}; | ||||
|  | ||||
| use super::error::{DumpActorError, Result}; | ||||
| use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus}; | ||||
| use crate::tasks::TaskStore; | ||||
| use crate::tasks::Scheduler; | ||||
| use crate::update_file_store::UpdateFileStore; | ||||
|  | ||||
| pub const CONCURRENT_DUMP_MSG: usize = 10; | ||||
| @@ -18,7 +18,7 @@ pub const CONCURRENT_DUMP_MSG: usize = 10; | ||||
| pub struct DumpActor { | ||||
|     inbox: Option<mpsc::Receiver<DumpMsg>>, | ||||
|     update_file_store: UpdateFileStore, | ||||
|     task_store: TaskStore, | ||||
|     scheduler: Arc<RwLock<Scheduler>>, | ||||
|     dump_path: PathBuf, | ||||
|     analytics_path: PathBuf, | ||||
|     lock: Arc<Mutex<()>>, | ||||
| @@ -36,7 +36,7 @@ impl DumpActor { | ||||
|     pub fn new( | ||||
|         inbox: mpsc::Receiver<DumpMsg>, | ||||
|         update_file_store: UpdateFileStore, | ||||
|         task_store: TaskStore, | ||||
|         scheduler: Arc<RwLock<Scheduler>>, | ||||
|         dump_path: impl AsRef<Path>, | ||||
|         analytics_path: impl AsRef<Path>, | ||||
|         index_db_size: usize, | ||||
| @@ -46,7 +46,7 @@ impl DumpActor { | ||||
|         let lock = Arc::new(Mutex::new(())); | ||||
|         Self { | ||||
|             inbox: Some(inbox), | ||||
|             task_store, | ||||
|             scheduler, | ||||
|             update_file_store, | ||||
|             dump_path: dump_path.as_ref().into(), | ||||
|             analytics_path: analytics_path.as_ref().into(), | ||||
| @@ -118,13 +118,13 @@ impl DumpActor { | ||||
|             dump_path: self.dump_path.clone(), | ||||
|             db_path: self.analytics_path.clone(), | ||||
|             update_file_store: self.update_file_store.clone(), | ||||
|             task_store: self.task_store.clone(), | ||||
|             scheduler: self.scheduler.clone(), | ||||
|             uid: uid.clone(), | ||||
|             update_db_size: self.update_db_size, | ||||
|             index_db_size: self.index_db_size, | ||||
|         }; | ||||
|  | ||||
|         let task_result = tokio::task::spawn(task.run()).await; | ||||
|         let task_result = tokio::task::spawn_local(task.run()).await; | ||||
|  | ||||
|         let mut dump_infos = self.dump_infos.write().await; | ||||
|         let dump_infos = dump_infos | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| use std::fs::File; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use anyhow::bail; | ||||
| use chrono::{DateTime, Utc}; | ||||
| @@ -12,7 +13,7 @@ use meilisearch_auth::AuthController; | ||||
| pub use message::DumpMsg; | ||||
| use tempfile::TempDir; | ||||
| use tokio::fs::create_dir_all; | ||||
| use tokio::sync::oneshot; | ||||
| use tokio::sync::{oneshot, RwLock}; | ||||
|  | ||||
| use crate::analytics; | ||||
| use crate::compression::{from_tar_gz, to_tar_gz}; | ||||
| @@ -20,7 +21,7 @@ use crate::index_controller::dump_actor::error::DumpActorError; | ||||
| use crate::index_controller::dump_actor::loaders::{v2, v3, v4}; | ||||
| use crate::options::IndexerOpts; | ||||
| use crate::tasks::task::Job; | ||||
| use crate::tasks::TaskStore; | ||||
| use crate::tasks::Scheduler; | ||||
| use crate::update_file_store::UpdateFileStore; | ||||
| use error::Result; | ||||
|  | ||||
| @@ -319,7 +320,7 @@ struct DumpJob { | ||||
|     dump_path: PathBuf, | ||||
|     db_path: PathBuf, | ||||
|     update_file_store: UpdateFileStore, | ||||
|     task_store: TaskStore, | ||||
|     scheduler: Arc<RwLock<Scheduler>>, | ||||
|     uid: String, | ||||
|     update_db_size: usize, | ||||
|     index_db_size: usize, | ||||
| @@ -344,21 +345,28 @@ impl DumpJob { | ||||
|  | ||||
|         let (sender, receiver) = oneshot::channel(); | ||||
|  | ||||
|         self.task_store | ||||
|             .register_job(Job::Dump { | ||||
|         self.scheduler | ||||
|             .write() | ||||
|             .await | ||||
|             .schedule_job(Job::Dump { | ||||
|                 ret: sender, | ||||
|                 path: temp_dump_path.clone(), | ||||
|             }) | ||||
|             .await; | ||||
|         receiver.await??; | ||||
|         self.task_store | ||||
|             .dump(&temp_dump_path, self.update_file_store.clone()) | ||||
|             .await?; | ||||
|  | ||||
|         // wait until the job has started performing before finishing the dump process | ||||
|         let sender = receiver.await??; | ||||
|  | ||||
|         AuthController::dump(&self.db_path, &temp_dump_path)?; | ||||
|  | ||||
|         //TODO(marin): this is not right, the scheduler should dump itself, not do it here... | ||||
|         self.scheduler | ||||
|             .read() | ||||
|             .await | ||||
|             .dump(&temp_dump_path, self.update_file_store.clone()) | ||||
|             .await?; | ||||
|  | ||||
|         let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> { | ||||
|             let _ = &self; | ||||
|             // for now we simply copy the updates/updates_files | ||||
|             // FIXME: We may copy more files than necessary, if new files are added while we are | ||||
|             // performing the dump. We need a way to filter them out. | ||||
| @@ -374,6 +382,9 @@ impl DumpJob { | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         // notify the update loop that we are finished performing the dump. | ||||
|         let _ = sender.send(()); | ||||
|  | ||||
|         info!("Created dump in {:?}.", dump_path); | ||||
|  | ||||
|         Ok(()) | ||||
| @@ -382,19 +393,15 @@ impl DumpJob { | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::collections::HashSet; | ||||
|  | ||||
|     use futures::future::{err, ok}; | ||||
|     use nelson::Mocker; | ||||
|     use once_cell::sync::Lazy; | ||||
|     use uuid::Uuid; | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::index::error::Result as IndexResult; | ||||
|     use crate::index::Index; | ||||
|     use crate::index_resolver::error::IndexResolverError; | ||||
|     use crate::index_resolver::index_store::MockIndexStore; | ||||
|     use crate::index_resolver::meta_store::MockIndexMetaStore; | ||||
|     use crate::options::SchedulerConfig; | ||||
|     use crate::tasks::error::Result as TaskResult; | ||||
|     use crate::tasks::task::{Task, TaskId}; | ||||
|     use crate::tasks::{MockTaskPerformer, TaskFilter, TaskStore}; | ||||
|     use crate::update_file_store::UpdateFileStore; | ||||
|  | ||||
|     fn setup() { | ||||
| @@ -411,86 +418,91 @@ mod test { | ||||
|     } | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     #[ignore] | ||||
|     async fn test_dump_normal() { | ||||
|         setup(); | ||||
|  | ||||
|         let tmp = tempfile::tempdir().unwrap(); | ||||
|  | ||||
|         let uuids = std::iter::repeat_with(Uuid::new_v4) | ||||
|             .take(4) | ||||
|             .collect::<HashSet<_>>(); | ||||
|         let mut uuid_store = MockIndexMetaStore::new(); | ||||
|         uuid_store | ||||
|             .expect_dump() | ||||
|             .once() | ||||
|             .returning(move |_| Box::pin(ok(()))); | ||||
|  | ||||
|         let mut index_store = MockIndexStore::new(); | ||||
|         index_store.expect_get().times(4).returning(move |uuid| { | ||||
|             let mocker = Mocker::default(); | ||||
|             let uuids_clone = uuids.clone(); | ||||
|             mocker.when::<(), Uuid>("uuid").once().then(move |_| { | ||||
|                 assert!(uuids_clone.contains(&uuid)); | ||||
|                 uuid | ||||
|             }); | ||||
|             mocker | ||||
|                 .when::<&Path, IndexResult<()>>("dump") | ||||
|                 .once() | ||||
|                 .then(move |_| Ok(())); | ||||
|             Box::pin(ok(Some(Index::mock(mocker)))) | ||||
|         }); | ||||
|  | ||||
|         let mocker = Mocker::default(); | ||||
|         let update_file_store = UpdateFileStore::mock(mocker); | ||||
|  | ||||
|         //let update_sender = | ||||
|         //    create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap(); | ||||
|  | ||||
|         //TODO: fix dump tests | ||||
|         let mut performer = MockTaskPerformer::new(); | ||||
|         performer | ||||
|             .expect_process_job() | ||||
|             .once() | ||||
|             .returning(|j| match j { | ||||
|                 Job::Dump { ret, .. } => { | ||||
|                     let (sender, _receiver) = oneshot::channel(); | ||||
|                     ret.send(Ok(sender)).unwrap(); | ||||
|                 } | ||||
|                 _ => unreachable!(), | ||||
|             }); | ||||
|         let performer = Arc::new(performer); | ||||
|         let mocker = Mocker::default(); | ||||
|         let task_store = TaskStore::mock(mocker); | ||||
|         mocker | ||||
|             .when::<(&Path, UpdateFileStore), TaskResult<()>>("dump") | ||||
|             .then(|_| Ok(())); | ||||
|         mocker | ||||
|             .when::<(Option<TaskId>, Option<TaskFilter>, Option<usize>), TaskResult<Vec<Task>>>( | ||||
|                 "list_tasks", | ||||
|             ) | ||||
|             .then(|_| Ok(Vec::new())); | ||||
|         let store = TaskStore::mock(mocker); | ||||
|         let config = SchedulerConfig::default(); | ||||
|  | ||||
|         let scheduler = Scheduler::new(store, performer, config).unwrap(); | ||||
|  | ||||
|         let task = DumpJob { | ||||
|             dump_path: tmp.path().into(), | ||||
|             // this should do nothing | ||||
|             update_file_store, | ||||
|             db_path: tmp.path().into(), | ||||
|             task_store, | ||||
|             uid: String::from("test"), | ||||
|             update_db_size: 4096 * 10, | ||||
|             index_db_size: 4096 * 10, | ||||
|             scheduler, | ||||
|         }; | ||||
|  | ||||
|         task.run().await.unwrap(); | ||||
|     } | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     #[ignore] | ||||
|     async fn error_performing_dump() { | ||||
|         let tmp = tempfile::tempdir().unwrap(); | ||||
|  | ||||
|         let mut uuid_store = MockIndexMetaStore::new(); | ||||
|         uuid_store | ||||
|             .expect_dump() | ||||
|             .once() | ||||
|             .returning(move |_| Box::pin(err(IndexResolverError::ExistingPrimaryKey))); | ||||
|  | ||||
|         let mocker = Mocker::default(); | ||||
|         let file_store = UpdateFileStore::mock(mocker); | ||||
|  | ||||
|         let mocker = Mocker::default(); | ||||
|         mocker | ||||
|             .when::<(Option<TaskId>, Option<TaskFilter>, Option<usize>), TaskResult<Vec<Task>>>( | ||||
|                 "list_tasks", | ||||
|             ) | ||||
|             .then(|_| Ok(Vec::new())); | ||||
|         let task_store = TaskStore::mock(mocker); | ||||
|         let mut performer = MockTaskPerformer::new(); | ||||
|         performer | ||||
|             .expect_process_job() | ||||
|             .once() | ||||
|             .returning(|job| match job { | ||||
|                 Job::Dump { ret, .. } => drop(ret.send(Err(IndexResolverError::BadlyFormatted( | ||||
|                     "blabla".to_string(), | ||||
|                 )))), | ||||
|                 _ => unreachable!(), | ||||
|             }); | ||||
|         let performer = Arc::new(performer); | ||||
|  | ||||
|         let scheduler = Scheduler::new(task_store, performer, SchedulerConfig::default()).unwrap(); | ||||
|  | ||||
|         let task = DumpJob { | ||||
|             dump_path: tmp.path().into(), | ||||
|             // this should do nothing | ||||
|             db_path: tmp.path().into(), | ||||
|             update_file_store: file_store, | ||||
|             task_store, | ||||
|             uid: String::from("test"), | ||||
|             update_db_size: 4096 * 10, | ||||
|             index_db_size: 4096 * 10, | ||||
|             scheduler, | ||||
|         }; | ||||
|  | ||||
|         assert!(task.run().await.is_err()); | ||||
|   | ||||
| @@ -13,7 +13,7 @@ use futures::Stream; | ||||
| use futures::StreamExt; | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use tokio::sync::mpsc; | ||||
| use tokio::sync::{mpsc, RwLock}; | ||||
| use tokio::task::spawn_blocking; | ||||
| use tokio::time::sleep; | ||||
| use uuid::Uuid; | ||||
| @@ -23,12 +23,11 @@ use crate::index::{ | ||||
|     Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, | ||||
| }; | ||||
| use crate::index_controller::dump_actor::{load_dump, DumpActor, DumpActorHandleImpl}; | ||||
| use crate::options::IndexerOpts; | ||||
| use crate::options::{IndexerOpts, SchedulerConfig}; | ||||
| use crate::snapshot::{load_snapshot, SnapshotService}; | ||||
| use crate::tasks::create_task_store; | ||||
| use crate::tasks::error::TaskError; | ||||
| use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId}; | ||||
| use crate::tasks::{TaskFilter, TaskStore}; | ||||
| use crate::tasks::{Scheduler, TaskFilter, TaskStore}; | ||||
| use error::Result; | ||||
|  | ||||
| use self::dump_actor::{DumpActorHandle, DumpInfo}; | ||||
| @@ -68,6 +67,7 @@ pub struct IndexSettings { | ||||
|  | ||||
| pub struct IndexController<U, I> { | ||||
|     index_resolver: Arc<IndexResolver<U, I>>, | ||||
|     scheduler: Arc<RwLock<Scheduler>>, | ||||
|     task_store: TaskStore, | ||||
|     dump_handle: dump_actor::DumpActorHandleImpl, | ||||
|     update_file_store: UpdateFileStore, | ||||
| @@ -78,9 +78,10 @@ impl<U, I> Clone for IndexController<U, I> { | ||||
|     fn clone(&self) -> Self { | ||||
|         Self { | ||||
|             index_resolver: self.index_resolver.clone(), | ||||
|             task_store: self.task_store.clone(), | ||||
|             scheduler: self.scheduler.clone(), | ||||
|             dump_handle: self.dump_handle.clone(), | ||||
|             update_file_store: self.update_file_store.clone(), | ||||
|             task_store: self.task_store.clone(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -160,6 +161,7 @@ impl IndexControllerBuilder { | ||||
|         self, | ||||
|         db_path: impl AsRef<Path>, | ||||
|         indexer_options: IndexerOpts, | ||||
|         scheduler_config: SchedulerConfig, | ||||
|     ) -> anyhow::Result<MeiliSearch> { | ||||
|         let index_size = self | ||||
|             .max_index_size | ||||
| @@ -217,8 +219,9 @@ impl IndexControllerBuilder { | ||||
|             update_file_store.clone(), | ||||
|         )?); | ||||
|  | ||||
|         let task_store = | ||||
|             create_task_store(meta_env, index_resolver.clone()).map_err(|e| anyhow::anyhow!(e))?; | ||||
|         let task_store = TaskStore::new(meta_env)?; | ||||
|         let scheduler = | ||||
|             Scheduler::new(task_store.clone(), index_resolver.clone(), scheduler_config)?; | ||||
|  | ||||
|         let dump_path = self | ||||
|             .dump_dst | ||||
| @@ -229,14 +232,14 @@ impl IndexControllerBuilder { | ||||
|             let actor = DumpActor::new( | ||||
|                 receiver, | ||||
|                 update_file_store.clone(), | ||||
|                 task_store.clone(), | ||||
|                 scheduler.clone(), | ||||
|                 dump_path, | ||||
|                 analytics_path, | ||||
|                 index_size, | ||||
|                 task_store_size, | ||||
|             ); | ||||
|  | ||||
|             tokio::task::spawn(actor.run()); | ||||
|             tokio::task::spawn_local(actor.run()); | ||||
|  | ||||
|             DumpActorHandleImpl { sender } | ||||
|         }; | ||||
| @@ -255,17 +258,18 @@ impl IndexControllerBuilder { | ||||
|                 snapshot_path, | ||||
|                 index_size, | ||||
|                 meta_env_size: task_store_size, | ||||
|                 task_store: task_store.clone(), | ||||
|                 scheduler: scheduler.clone(), | ||||
|             }; | ||||
|  | ||||
|             tokio::task::spawn(snapshot_service.run()); | ||||
|             tokio::task::spawn_local(snapshot_service.run()); | ||||
|         } | ||||
|  | ||||
|         Ok(IndexController { | ||||
|             index_resolver, | ||||
|             task_store, | ||||
|             scheduler, | ||||
|             dump_handle, | ||||
|             update_file_store, | ||||
|             task_store, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
| @@ -415,12 +419,13 @@ where | ||||
|         }; | ||||
|  | ||||
|         let task = self.task_store.register(uid, content).await?; | ||||
|         self.scheduler.read().await.notify(); | ||||
|  | ||||
|         Ok(task) | ||||
|     } | ||||
|  | ||||
|     pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> { | ||||
|         let task = self.task_store.get_task(id, filter).await?; | ||||
|         let task = self.scheduler.read().await.get_task(id, filter).await?; | ||||
|         Ok(task) | ||||
|     } | ||||
|  | ||||
| @@ -435,7 +440,12 @@ where | ||||
|  | ||||
|         let mut filter = TaskFilter::default(); | ||||
|         filter.filter_index(index_uid); | ||||
|         let task = self.task_store.get_task(task_id, Some(filter)).await?; | ||||
|         let task = self | ||||
|             .scheduler | ||||
|             .read() | ||||
|             .await | ||||
|             .get_task(task_id, Some(filter)) | ||||
|             .await?; | ||||
|  | ||||
|         Ok(task) | ||||
|     } | ||||
| @@ -446,7 +456,12 @@ where | ||||
|         limit: Option<usize>, | ||||
|         offset: Option<TaskId>, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         let tasks = self.task_store.list_tasks(offset, filter, limit).await?; | ||||
|         let tasks = self | ||||
|             .scheduler | ||||
|             .read() | ||||
|             .await | ||||
|             .list_tasks(offset, filter, limit) | ||||
|             .await?; | ||||
|  | ||||
|         Ok(tasks) | ||||
|     } | ||||
| @@ -466,7 +481,9 @@ where | ||||
|         filter.filter_index(index_uid); | ||||
|  | ||||
|         let tasks = self | ||||
|             .task_store | ||||
|             .scheduler | ||||
|             .read() | ||||
|             .await | ||||
|             .list_tasks( | ||||
|                 Some(offset.unwrap_or_default() + task_id), | ||||
|                 Some(filter), | ||||
| @@ -547,10 +564,11 @@ where | ||||
|     } | ||||
|  | ||||
|     pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> { | ||||
|         let last_task = self.task_store.get_processing_task().await?; | ||||
|         let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?; | ||||
|         // Check if the currently indexing update is from our index. | ||||
|         let is_indexing = last_task | ||||
|             .map(|task| task.index_uid.into_inner() == uid) | ||||
|         let is_indexing = processing_tasks | ||||
|             .first() | ||||
|             .map(|task| task.index_uid.as_str() == uid) | ||||
|             .unwrap_or_default(); | ||||
|  | ||||
|         let index = self.index_resolver.get_index(uid).await?; | ||||
| @@ -564,7 +582,7 @@ where | ||||
|         let mut last_task: Option<DateTime<_>> = None; | ||||
|         let mut indexes = BTreeMap::new(); | ||||
|         let mut database_size = 0; | ||||
|         let processing_task = self.task_store.get_processing_task().await?; | ||||
|         let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?; | ||||
|  | ||||
|         for (index_uid, index) in self.index_resolver.list().await? { | ||||
|             if !search_rules.is_index_authorized(&index_uid) { | ||||
| @@ -584,8 +602,8 @@ where | ||||
|             }); | ||||
|  | ||||
|             // Check if the currently indexing update is from our index. | ||||
|             stats.is_indexing = processing_task | ||||
|                 .as_ref() | ||||
|             stats.is_indexing = processing_tasks | ||||
|                 .first() | ||||
|                 .map(|p| p.index_uid.as_str() == index_uid) | ||||
|                 .or(Some(false)); | ||||
|  | ||||
| @@ -637,16 +655,18 @@ mod test { | ||||
|  | ||||
|     impl IndexController<MockIndexMetaStore, MockIndexStore> { | ||||
|         pub fn mock( | ||||
|             index_resolver: IndexResolver<MockIndexMetaStore, MockIndexStore>, | ||||
|             index_resolver: Arc<IndexResolver<MockIndexMetaStore, MockIndexStore>>, | ||||
|             task_store: TaskStore, | ||||
|             update_file_store: UpdateFileStore, | ||||
|             dump_handle: DumpActorHandleImpl, | ||||
|             scheduler: Arc<RwLock<Scheduler>>, | ||||
|         ) -> Self { | ||||
|             IndexController { | ||||
|                 index_resolver: Arc::new(index_resolver), | ||||
|                 index_resolver, | ||||
|                 task_store, | ||||
|                 dump_handle, | ||||
|                 update_file_store, | ||||
|                 scheduler, | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| @@ -719,13 +739,27 @@ mod test { | ||||
|         let task_store_mocker = nelson::Mocker::default(); | ||||
|         let mocker = Mocker::default(); | ||||
|         let update_file_store = UpdateFileStore::mock(mocker); | ||||
|         let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store.clone()); | ||||
|         let index_resolver = Arc::new(IndexResolver::new( | ||||
|             uuid_store, | ||||
|             index_store, | ||||
|             update_file_store.clone(), | ||||
|         )); | ||||
|         let task_store = TaskStore::mock(task_store_mocker); | ||||
|         // let dump_actor = MockDumpActorHandle::new(); | ||||
|         let scheduler = Scheduler::new( | ||||
|             task_store.clone(), | ||||
|             index_resolver.clone(), | ||||
|             SchedulerConfig::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         let (sender, _) = mpsc::channel(1); | ||||
|         let dump_handle = DumpActorHandleImpl { sender }; | ||||
|         let index_controller = | ||||
|             IndexController::mock(index_resolver, task_store, update_file_store, dump_handle); | ||||
|         let index_controller = IndexController::mock( | ||||
|             index_resolver, | ||||
|             task_store, | ||||
|             update_file_store, | ||||
|             dump_handle, | ||||
|             scheduler, | ||||
|         ); | ||||
|  | ||||
|         let r = index_controller | ||||
|             .search(index_uid.to_owned(), query.clone()) | ||||
|   | ||||
| @@ -1,14 +1,15 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::convert::TryFrom; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use milli::update::IndexerConfig; | ||||
| use tokio::fs; | ||||
| use tokio::sync::RwLock; | ||||
| use tokio::task::spawn_blocking; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::{IndexResolverError, Result}; | ||||
| use crate::index::update_handler::UpdateHandler; | ||||
| use crate::index::Index; | ||||
| use crate::options::IndexerOpts; | ||||
|  | ||||
| @@ -26,7 +27,7 @@ pub struct MapIndexStore { | ||||
|     index_store: AsyncMap<Uuid, Index>, | ||||
|     path: PathBuf, | ||||
|     index_size: usize, | ||||
|     update_handler: Arc<UpdateHandler>, | ||||
|     indexer_config: Arc<IndexerConfig>, | ||||
| } | ||||
|  | ||||
| impl MapIndexStore { | ||||
| @@ -35,14 +36,14 @@ impl MapIndexStore { | ||||
|         index_size: usize, | ||||
|         indexer_opts: &IndexerOpts, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?); | ||||
|         let indexer_config = Arc::new(IndexerConfig::try_from(indexer_opts)?); | ||||
|         let path = path.as_ref().join("indexes/"); | ||||
|         let index_store = Arc::new(RwLock::new(HashMap::new())); | ||||
|         Ok(Self { | ||||
|             index_store, | ||||
|             path, | ||||
|             index_size, | ||||
|             update_handler, | ||||
|             indexer_config, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
| @@ -63,7 +64,7 @@ impl IndexStore for MapIndexStore { | ||||
|         } | ||||
|  | ||||
|         let index_size = self.index_size; | ||||
|         let update_handler = self.update_handler.clone(); | ||||
|         let update_handler = self.indexer_config.clone(); | ||||
|         let index = spawn_blocking(move || -> Result<Index> { | ||||
|             let index = Index::open(path, index_size, uuid, update_handler)?; | ||||
|             Ok(index) | ||||
| @@ -88,7 +89,7 @@ impl IndexStore for MapIndexStore { | ||||
|                 } | ||||
|  | ||||
|                 let index_size = self.index_size; | ||||
|                 let update_handler = self.update_handler.clone(); | ||||
|                 let update_handler = self.indexer_config.clone(); | ||||
|                 let index = | ||||
|                     spawn_blocking(move || Index::open(path, index_size, uuid, update_handler)) | ||||
|                         .await??; | ||||
|   | ||||
| @@ -2,7 +2,7 @@ pub mod error; | ||||
| pub mod index_store; | ||||
| pub mod meta_store; | ||||
|  | ||||
| use std::convert::TryInto; | ||||
| use std::convert::{TryFrom, TryInto}; | ||||
| use std::path::Path; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| @@ -12,16 +12,17 @@ use heed::Env; | ||||
| use index_store::{IndexStore, MapIndexStore}; | ||||
| use meilisearch_error::ResponseError; | ||||
| use meta_store::{HeedMetaStore, IndexMetaStore}; | ||||
| use milli::update::DocumentDeletionResult; | ||||
| use milli::update::{DocumentDeletionResult, IndexerConfig}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use tokio::sync::oneshot; | ||||
| use tokio::task::spawn_blocking; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index::{error::Result as IndexResult, update_handler::UpdateHandler, Index}; | ||||
| use crate::index::{error::Result as IndexResult, Index}; | ||||
| use crate::options::IndexerOpts; | ||||
| use crate::tasks::batch::Batch; | ||||
| use crate::tasks::task::{DocumentDeletion, Job, Task, TaskContent, TaskEvent, TaskId, TaskResult}; | ||||
| use crate::tasks::{Pending, TaskPerformer}; | ||||
| use crate::tasks::TaskPerformer; | ||||
| use crate::update_file_store::UpdateFileStore; | ||||
|  | ||||
| use self::meta_store::IndexMeta; | ||||
| @@ -96,14 +97,24 @@ where | ||||
|     U: IndexMetaStore + Send + Sync + 'static, | ||||
|     I: IndexStore + Send + Sync + 'static, | ||||
| { | ||||
|     type Error = ResponseError; | ||||
|     async fn process_batch(&self, mut batch: Batch) -> Batch { | ||||
|         // If a batch contains multiple tasks, then it must be a document addition batch | ||||
|         if let Some(Task { | ||||
|             content: TaskContent::DocumentAddition { .. }, | ||||
|             .. | ||||
|         }) = batch.tasks.first() | ||||
|         { | ||||
|             debug_assert!(batch.tasks.iter().all(|t| matches!( | ||||
|                 t, | ||||
|                 Task { | ||||
|                     content: TaskContent::DocumentAddition { .. }, | ||||
|                     .. | ||||
|                 } | ||||
|             ))); | ||||
|  | ||||
|     async fn process(&self, mut batch: Batch) -> Batch { | ||||
|         // Until batching is implemented, all batch should contain only one update. | ||||
|         debug_assert_eq!(batch.len(), 1); | ||||
|  | ||||
|         match batch.tasks.first_mut() { | ||||
|             Some(Pending::Task(task)) => { | ||||
|             self.process_document_addition_batch(batch).await | ||||
|         } else { | ||||
|             if let Some(task) = batch.tasks.first_mut() { | ||||
|                 task.events.push(TaskEvent::Processing(Utc::now())); | ||||
|  | ||||
|                 match self.process_task(task).await { | ||||
| @@ -119,15 +130,12 @@ where | ||||
|                     }), | ||||
|                 } | ||||
|             } | ||||
|             Some(Pending::Job(job)) => { | ||||
|                 let job = std::mem::take(job); | ||||
|                 self.process_job(job).await; | ||||
|             } | ||||
|  | ||||
|             None => (), | ||||
|             batch | ||||
|         } | ||||
|     } | ||||
|  | ||||
|         batch | ||||
|     async fn process_job(&self, job: Job) { | ||||
|         self.process_job(job).await; | ||||
|     } | ||||
|  | ||||
|     async fn finish(&self, batch: &Batch) { | ||||
| @@ -158,9 +166,9 @@ impl IndexResolver<HeedMetaStore, MapIndexStore> { | ||||
|         HeedMetaStore::load_dump(&src, env)?; | ||||
|         let indexes_path = src.as_ref().join("indexes"); | ||||
|         let indexes = indexes_path.read_dir()?; | ||||
|         let update_handler = UpdateHandler::new(indexer_opts)?; | ||||
|         let indexer_config = IndexerConfig::try_from(indexer_opts)?; | ||||
|         for index in indexes { | ||||
|             Index::load_dump(&index?.path(), &dst, index_db_size, &update_handler)?; | ||||
|             Index::load_dump(&index?.path(), &dst, index_db_size, &indexer_config)?; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
| @@ -180,33 +188,100 @@ where | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn process_task(&self, task: &Task) -> Result<TaskResult> { | ||||
|         let index_uid = task.index_uid.clone(); | ||||
|         match &task.content { | ||||
|             TaskContent::DocumentAddition { | ||||
|                 content_uuid, | ||||
|                 merge_strategy, | ||||
|                 primary_key, | ||||
|                 allow_index_creation, | ||||
|     async fn process_document_addition_batch(&self, mut batch: Batch) -> Batch { | ||||
|         fn get_content_uuid(task: &Task) -> Uuid { | ||||
|             match task { | ||||
|                 Task { | ||||
|                     content: TaskContent::DocumentAddition { content_uuid, .. }, | ||||
|                     .. | ||||
|                 } => *content_uuid, | ||||
|                 _ => panic!("unexpected task in the document addition batch"), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let content_uuids = batch.tasks.iter().map(get_content_uuid).collect::<Vec<_>>(); | ||||
|  | ||||
|         match batch.tasks.first() { | ||||
|             Some(Task { | ||||
|                 index_uid, | ||||
|                 id, | ||||
|                 content: | ||||
|                     TaskContent::DocumentAddition { | ||||
|                         merge_strategy, | ||||
|                         primary_key, | ||||
|                         allow_index_creation, | ||||
|                         .. | ||||
|                     }, | ||||
|                 .. | ||||
|             } => { | ||||
|             }) => { | ||||
|                 let primary_key = primary_key.clone(); | ||||
|                 let content_uuid = *content_uuid; | ||||
|                 let method = *merge_strategy; | ||||
|  | ||||
|                 let index = if *allow_index_creation { | ||||
|                     self.get_or_create_index(index_uid, task.id).await? | ||||
|                     self.get_or_create_index(index_uid.clone(), *id).await | ||||
|                 } else { | ||||
|                     self.get_index(index_uid.into_inner()).await? | ||||
|                     self.get_index(index_uid.as_str().to_string()).await | ||||
|                 }; | ||||
|  | ||||
|                 // If the index doesn't exist and we are not allowed to create it with the first | ||||
|                 // task, we must fails the whole batch. | ||||
|                 let now = Utc::now(); | ||||
|                 let index = match index { | ||||
|                     Ok(index) => index, | ||||
|                     Err(e) => { | ||||
|                         let error = ResponseError::from(e); | ||||
|                         for task in batch.tasks.iter_mut() { | ||||
|                             task.events.push(TaskEvent::Failed { | ||||
|                                 error: error.clone(), | ||||
|                                 timestamp: now, | ||||
|                             }); | ||||
|                         } | ||||
|                         return batch; | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 let file_store = self.file_store.clone(); | ||||
|                 let result = spawn_blocking(move || { | ||||
|                     index.update_documents(method, content_uuid, primary_key, file_store) | ||||
|                     index.update_documents( | ||||
|                         method, | ||||
|                         primary_key, | ||||
|                         file_store, | ||||
|                         content_uuids.into_iter(), | ||||
|                     ) | ||||
|                 }) | ||||
|                 .await??; | ||||
|                 .await; | ||||
|  | ||||
|                 Ok(result.into()) | ||||
|                 let event = match result { | ||||
|                     Ok(Ok(result)) => TaskEvent::Succeded { | ||||
|                         timestamp: Utc::now(), | ||||
|                         result: TaskResult::DocumentAddition { | ||||
|                             indexed_documents: result.indexed_documents, | ||||
|                         }, | ||||
|                     }, | ||||
|                     Ok(Err(e)) => TaskEvent::Failed { | ||||
|                         timestamp: Utc::now(), | ||||
|                         error: e.into(), | ||||
|                     }, | ||||
|                     Err(e) => TaskEvent::Failed { | ||||
|                         timestamp: Utc::now(), | ||||
|                         error: IndexResolverError::from(e).into(), | ||||
|                     }, | ||||
|                 }; | ||||
|  | ||||
|                 for task in batch.tasks.iter_mut() { | ||||
|                     task.events.push(event.clone()); | ||||
|                 } | ||||
|  | ||||
|                 batch | ||||
|             } | ||||
|             _ => panic!("invalid batch!"), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn process_task(&self, task: &Task) -> Result<TaskResult> { | ||||
|         let index_uid = task.index_uid.clone(); | ||||
|         match &task.content { | ||||
|             TaskContent::DocumentAddition { .. } => panic!("updates should be handled by batch"), | ||||
|             TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) => { | ||||
|                 let ids = ids.clone(); | ||||
|                 let index = self.get_index(index_uid.into_inner()).await?; | ||||
| @@ -282,9 +357,13 @@ where | ||||
|             Job::Dump { ret, path } => { | ||||
|                 log::trace!("The Dump task is getting executed"); | ||||
|  | ||||
|                 if ret.send(self.dump(path).await).is_err() { | ||||
|                 let (sender, receiver) = oneshot::channel(); | ||||
|                 if ret.send(self.dump(path).await.map(|_| sender)).is_err() { | ||||
|                     log::error!("The dump actor died."); | ||||
|                 } | ||||
|  | ||||
|                 // wait until the dump has finished performing. | ||||
|                 let _ = receiver.await; | ||||
|             } | ||||
|             Job::Empty => log::error!("Tried to process an empty task."), | ||||
|             Job::Snapshot(job) => { | ||||
| @@ -404,7 +483,7 @@ where | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::collections::BTreeMap; | ||||
|     use std::{collections::BTreeMap, vec::IntoIter}; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
| @@ -447,7 +526,7 @@ mod test { | ||||
|                             mocker.when::<String, IndexResult<IndexMeta>>("update_primary_key") | ||||
|                                 .then(move |_| Ok(IndexMeta{ created_at: Utc::now(), updated_at: Utc::now(), primary_key: None })); | ||||
|                         } | ||||
|                         mocker.when::<(IndexDocumentsMethod, Uuid, Option<String>, UpdateFileStore), IndexResult<DocumentAdditionResult>>("update_documents") | ||||
|                         mocker.when::<(IndexDocumentsMethod, Option<String>, UpdateFileStore, IntoIter<Uuid>), IndexResult<DocumentAdditionResult>>("update_documents") | ||||
|                                 .then(move |(_, _, _, _)| result()); | ||||
|                     } | ||||
|                     TaskContent::SettingsUpdate{..} => { | ||||
| @@ -462,13 +541,13 @@ mod test { | ||||
|                     } | ||||
|                     TaskContent::DocumentDeletion(DocumentDeletion::Ids(_ids)) => { | ||||
|                         let result = move || if !index_op_fails { | ||||
|                             Ok(any_int as u64) | ||||
|                             Ok(DocumentDeletionResult { deleted_documents: any_int as u64, remaining_documents: any_int as u64 }) | ||||
|                         } else { | ||||
|                             // return this error because it's easy to generate... | ||||
|                             Err(IndexError::DocumentNotFound("a doc".into())) | ||||
|                         }; | ||||
|  | ||||
|                         mocker.when::<&[String], IndexResult<u64>>("delete_documents") | ||||
|                         mocker.when::<&[String], IndexResult<DocumentDeletionResult>>("delete_documents") | ||||
|                                 .then(move |_| result()); | ||||
|                     }, | ||||
|                     TaskContent::DocumentDeletion(DocumentDeletion::Clear) => { | ||||
| @@ -561,7 +640,8 @@ mod test { | ||||
|                 let update_file_store = UpdateFileStore::mock(mocker); | ||||
|                 let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store); | ||||
|  | ||||
|                 let result = index_resolver.process_task(&task).await; | ||||
|                 let batch = Batch { id: 1, created_at: Utc::now(), tasks: vec![task.clone()] }; | ||||
|                 let result = index_resolver.process_batch(batch).await; | ||||
|  | ||||
|                 // Test for some expected output scenarios: | ||||
|                 // Index creation and deletion cannot fail because of a failed index op, since they | ||||
| @@ -575,9 +655,9 @@ mod test { | ||||
|                                                                 | TaskContent::DocumentAddition { allow_index_creation: false, ..} | ||||
|                                                                 | TaskContent::IndexUpdate { .. } )) | ||||
|                 { | ||||
|                     assert!(result.is_err(), "{:?}", result); | ||||
|                     assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Failed { .. }), "{:?}", result); | ||||
|                 } else { | ||||
|                     assert!(result.is_ok(), "{:?}", result); | ||||
|                     assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Succeded { .. }), "{:?}", result); | ||||
|                 } | ||||
|             }); | ||||
|         } | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| use core::fmt; | ||||
| use std::{ops::Deref, str::FromStr}; | ||||
| use std::{convert::TryFrom, ops::Deref, str::FromStr}; | ||||
|  | ||||
| use byte_unit::{Byte, ByteError}; | ||||
| use clap::Parser; | ||||
| use milli::CompressionType; | ||||
| use milli::{update::IndexerConfig, CompressionType}; | ||||
| use serde::Serialize; | ||||
| use sysinfo::{RefreshKind, System, SystemExt}; | ||||
|  | ||||
| #[derive(Debug, Clone, Parser)] | ||||
| @@ -43,6 +44,52 @@ pub struct IndexerOpts { | ||||
|     pub indexing_jobs: Option<usize>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Parser, Default, Serialize)] | ||||
| pub struct SchedulerConfig { | ||||
|     /// enable the autobatching experimental feature | ||||
|     #[clap(long, hide = true)] | ||||
|     pub enable_autobatching: bool, | ||||
|  | ||||
|     // The maximum number of updates of the same type that can be batched together. | ||||
|     // If unspecified, this is unlimited. A value of 0 is interpreted as 1. | ||||
|     #[clap(long, requires = "enable-autobatching", hide = true)] | ||||
|     pub max_batch_size: Option<usize>, | ||||
|  | ||||
|     // The maximum number of documents in a document batch. Since batches must contain at least one | ||||
|     // update for the scheduler to make progress, the number of documents in a batch will be at | ||||
|     // least the number of documents of its first update. | ||||
|     #[clap(long, requires = "enable-autobatching", hide = true)] | ||||
|     pub max_documents_per_batch: Option<usize>, | ||||
|  | ||||
|     /// Debounce duration in seconds | ||||
|     /// | ||||
|     /// When a new task is enqueued, the scheduler waits for `debounce_duration_sec` seconds for new updates before | ||||
|     /// starting to process a batch of updates. | ||||
|     #[clap(long, requires = "enable-autobatching", hide = true)] | ||||
|     pub debounce_duration_sec: Option<u64>, | ||||
| } | ||||
|  | ||||
| impl TryFrom<&IndexerOpts> for IndexerConfig { | ||||
|     type Error = anyhow::Error; | ||||
|  | ||||
|     fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> { | ||||
|         let thread_pool = rayon::ThreadPoolBuilder::new() | ||||
|             .num_threads(other.indexing_jobs.unwrap_or(num_cpus::get() / 2)) | ||||
|             .build()?; | ||||
|  | ||||
|         Ok(Self { | ||||
|             log_every_n: Some(other.log_every_n), | ||||
|             max_nb_chunks: other.max_nb_chunks, | ||||
|             max_memory: (*other.max_memory).map(|b| b.get_bytes() as usize), | ||||
|             chunk_compression_type: other.chunk_compression_type, | ||||
|             chunk_compression_level: other.chunk_compression_level, | ||||
|             thread_pool: Some(thread_pool), | ||||
|             max_positions_per_attributes: None, | ||||
|             ..Default::default() | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Default for IndexerOpts { | ||||
|     fn default() -> Self { | ||||
|         Self { | ||||
|   | ||||
| @@ -1,17 +1,19 @@ | ||||
| use std::fs; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use anyhow::bail; | ||||
| use fs_extra::dir::{self, CopyOptions}; | ||||
| use log::{info, trace}; | ||||
| use tokio::sync::RwLock; | ||||
| use tokio::time::sleep; | ||||
| use walkdir::WalkDir; | ||||
|  | ||||
| use crate::compression::from_tar_gz; | ||||
| use crate::index_controller::versioning::VERSION_FILE_NAME; | ||||
| use crate::tasks::task::Job; | ||||
| use crate::tasks::TaskStore; | ||||
| use crate::tasks::Scheduler; | ||||
|  | ||||
| pub struct SnapshotService { | ||||
|     pub(crate) db_path: PathBuf, | ||||
| @@ -19,7 +21,7 @@ pub struct SnapshotService { | ||||
|     pub(crate) snapshot_path: PathBuf, | ||||
|     pub(crate) index_size: usize, | ||||
|     pub(crate) meta_env_size: usize, | ||||
|     pub(crate) task_store: TaskStore, | ||||
|     pub(crate) scheduler: Arc<RwLock<Scheduler>>, | ||||
| } | ||||
|  | ||||
| impl SnapshotService { | ||||
| @@ -36,7 +38,7 @@ impl SnapshotService { | ||||
|                 index_size: self.index_size, | ||||
|             }; | ||||
|             let job = Job::Snapshot(snapshot_job); | ||||
|             self.task_store.register_job(job).await; | ||||
|             self.scheduler.write().await.schedule_job(job).await; | ||||
|  | ||||
|             sleep(self.snapshot_period).await; | ||||
|         } | ||||
|   | ||||
| @@ -1,14 +1,14 @@ | ||||
| use chrono::{DateTime, Utc}; | ||||
|  | ||||
| use super::{task::Task, task_store::Pending}; | ||||
| use super::task::Task; | ||||
|  | ||||
| pub type BatchId = u32; | ||||
| pub type BatchId = u64; | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct Batch { | ||||
|     pub id: BatchId, | ||||
|     pub created_at: DateTime<Utc>, | ||||
|     pub tasks: Vec<Pending<Task>>, | ||||
|     pub tasks: Vec<Task>, | ||||
| } | ||||
|  | ||||
| impl Batch { | ||||
|   | ||||
| @@ -1,47 +1,38 @@ | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use async_trait::async_trait; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| pub use scheduler::Scheduler; | ||||
| pub use task_store::TaskFilter; | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub use task_store::test::MockTaskStore as TaskStore; | ||||
| #[cfg(not(test))] | ||||
| pub use task_store::TaskStore; | ||||
|  | ||||
| pub use task_store::{Pending, TaskFilter}; | ||||
|  | ||||
| use batch::Batch; | ||||
| use error::Result; | ||||
| use scheduler::Scheduler; | ||||
|  | ||||
| use self::task::Job; | ||||
|  | ||||
| pub mod batch; | ||||
| pub mod error; | ||||
| pub mod scheduler; | ||||
| mod scheduler; | ||||
| pub mod task; | ||||
| mod task_store; | ||||
| pub mod update_loop; | ||||
|  | ||||
| #[cfg_attr(test, mockall::automock(type Error=test::DebugError;))] | ||||
| #[async_trait] | ||||
| pub trait TaskPerformer: Sync + Send + 'static { | ||||
|     type Error: Serialize + for<'de> Deserialize<'de> + std::error::Error + Sync + Send + 'static; | ||||
|     /// Processes the `Task` batch returning the batch with the `Task` updated. | ||||
|     async fn process(&self, batch: Batch) -> Batch; | ||||
|     async fn process_batch(&self, batch: Batch) -> Batch; | ||||
|  | ||||
|     async fn process_job(&self, job: Job); | ||||
|  | ||||
|     /// `finish` is called when the result of `process` has been commited to the task store. This | ||||
|     /// method can be used to perform cleanup after the update has been completed for example. | ||||
|     async fn finish(&self, batch: &Batch); | ||||
| } | ||||
|  | ||||
| pub fn create_task_store<P>(env: Arc<heed::Env>, performer: Arc<P>) -> Result<TaskStore> | ||||
| where | ||||
|     P: TaskPerformer, | ||||
| { | ||||
|     let task_store = TaskStore::new(env)?; | ||||
|     let scheduler = Scheduler::new(task_store.clone(), performer, Duration::from_millis(1)); | ||||
|     tokio::task::spawn_local(scheduler.run()); | ||||
|     Ok(task_store) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use serde::{Deserialize, Serialize}; | ||||
|   | ||||
| @@ -1,253 +1,526 @@ | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque}; | ||||
| use std::ops::{Deref, DerefMut}; | ||||
| use std::path::Path; | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use atomic_refcell::AtomicRefCell; | ||||
| use chrono::Utc; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use tokio::sync::{watch, RwLock}; | ||||
|  | ||||
| use crate::options::SchedulerConfig; | ||||
| use crate::update_file_store::UpdateFileStore; | ||||
|  | ||||
| use super::batch::Batch; | ||||
| use super::error::Result; | ||||
| #[cfg(test)] | ||||
| use super::task_store::test::MockTaskStore as TaskStore; | ||||
| use super::task_store::Pending; | ||||
| #[cfg(not(test))] | ||||
| use super::task_store::TaskStore; | ||||
| use super::TaskPerformer; | ||||
| use crate::tasks::task::TaskEvent; | ||||
| use super::task::{Job, Task, TaskContent, TaskEvent, TaskId}; | ||||
| use super::update_loop::UpdateLoop; | ||||
| use super::{TaskFilter, TaskPerformer, TaskStore}; | ||||
|  | ||||
| /// The scheduler roles is to perform batches of tasks one at a time. It will monitor the TaskStore | ||||
| /// for new tasks, put them in a batch, and process the batch as soon as possible. | ||||
| /// | ||||
| /// When a batch is currently processing, the scheduler is just waiting. | ||||
| pub struct Scheduler<P: TaskPerformer> { | ||||
|     store: TaskStore, | ||||
|     performer: Arc<P>, | ||||
|  | ||||
|     /// The interval at which the the `TaskStore` should be checked for new updates | ||||
|     task_store_check_interval: Duration, | ||||
| #[derive(Eq, Debug, Clone, Copy)] | ||||
| enum TaskType { | ||||
|     DocumentAddition { number: usize }, | ||||
|     DocumentUpdate { number: usize }, | ||||
|     Other, | ||||
| } | ||||
|  | ||||
| impl<P> Scheduler<P> | ||||
| where | ||||
|     P: TaskPerformer + Send + Sync + 'static, | ||||
|     P::Error: Serialize + for<'de> Deserialize<'de> + Send + Sync + 'static, | ||||
| { | ||||
|     pub fn new(store: TaskStore, performer: Arc<P>, task_store_check_interval: Duration) -> Self { | ||||
| /// Two tasks are equal if they have the same type. | ||||
| impl PartialEq for TaskType { | ||||
|     fn eq(&self, other: &Self) -> bool { | ||||
|         matches!( | ||||
|             (self, other), | ||||
|             (Self::DocumentAddition { .. }, Self::DocumentAddition { .. }) | ||||
|                 | (Self::DocumentUpdate { .. }, Self::DocumentUpdate { .. }) | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Eq, Debug, Clone, Copy)] | ||||
| struct PendingTask { | ||||
|     kind: TaskType, | ||||
|     id: TaskId, | ||||
| } | ||||
|  | ||||
| impl PartialEq for PendingTask { | ||||
|     fn eq(&self, other: &Self) -> bool { | ||||
|         self.id.eq(&other.id) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl PartialOrd for PendingTask { | ||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|         Some(self.cmp(other)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Ord for PendingTask { | ||||
|     fn cmp(&self, other: &Self) -> Ordering { | ||||
|         self.id.cmp(&other.id).reverse() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| struct TaskList { | ||||
|     index: String, | ||||
|     tasks: BinaryHeap<PendingTask>, | ||||
| } | ||||
|  | ||||
| impl Deref for TaskList { | ||||
|     type Target = BinaryHeap<PendingTask>; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.tasks | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DerefMut for TaskList { | ||||
|     fn deref_mut(&mut self) -> &mut Self::Target { | ||||
|         &mut self.tasks | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TaskList { | ||||
|     fn new(index: String) -> Self { | ||||
|         Self { | ||||
|             store, | ||||
|             performer, | ||||
|             task_store_check_interval, | ||||
|             index, | ||||
|             tasks: Default::default(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
|     pub async fn run(self) { | ||||
|         loop { | ||||
|             if let Err(e) = self.process_next_batch().await { | ||||
|                 log::error!("an error occured while processing an update batch: {}", e); | ||||
| impl PartialEq for TaskList { | ||||
|     fn eq(&self, other: &Self) -> bool { | ||||
|         self.index == other.index | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Eq for TaskList {} | ||||
|  | ||||
| impl Ord for TaskList { | ||||
|     fn cmp(&self, other: &Self) -> Ordering { | ||||
|         match (self.peek(), other.peek()) { | ||||
|             (None, None) => Ordering::Equal, | ||||
|             (None, Some(_)) => Ordering::Less, | ||||
|             (Some(_), None) => Ordering::Greater, | ||||
|             (Some(lhs), Some(rhs)) => lhs.cmp(rhs), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl PartialOrd for TaskList { | ||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|         Some(self.cmp(other)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Default)] | ||||
| struct TaskQueue { | ||||
|     /// Maps index uids to their TaskList, for quick access | ||||
|     index_tasks: HashMap<String, Arc<AtomicRefCell<TaskList>>>, | ||||
|     /// A queue that orders TaskList by the priority of their fist update | ||||
|     queue: BinaryHeap<Arc<AtomicRefCell<TaskList>>>, | ||||
| } | ||||
|  | ||||
| impl TaskQueue { | ||||
|     fn insert(&mut self, task: Task) { | ||||
|         let uid = task.index_uid.into_inner(); | ||||
|         let id = task.id; | ||||
|         let kind = match task.content { | ||||
|             TaskContent::DocumentAddition { | ||||
|                 documents_count, | ||||
|                 merge_strategy: IndexDocumentsMethod::ReplaceDocuments, | ||||
|                 .. | ||||
|             } => TaskType::DocumentAddition { | ||||
|                 number: documents_count, | ||||
|             }, | ||||
|             TaskContent::DocumentAddition { | ||||
|                 documents_count, | ||||
|                 merge_strategy: IndexDocumentsMethod::UpdateDocuments, | ||||
|                 .. | ||||
|             } => TaskType::DocumentUpdate { | ||||
|                 number: documents_count, | ||||
|             }, | ||||
|             _ => TaskType::Other, | ||||
|         }; | ||||
|         let task = PendingTask { kind, id }; | ||||
|  | ||||
|         match self.index_tasks.entry(uid) { | ||||
|             Entry::Occupied(entry) => { | ||||
|                 // A task list already exists for this index, all we have to to is to push the new | ||||
|                 // update to the end of the list. This won't change the order since ids are | ||||
|                 // monotically increasing. | ||||
|                 let mut list = entry.get().borrow_mut(); | ||||
|  | ||||
|                 // We only need the first element to be lower than the one we want to | ||||
|                 // insert to preserve the order in the queue. | ||||
|                 assert!(list.peek().map(|old_id| id >= old_id.id).unwrap_or(true)); | ||||
|  | ||||
|                 list.push(task); | ||||
|             } | ||||
|             Entry::Vacant(entry) => { | ||||
|                 let mut task_list = TaskList::new(entry.key().to_owned()); | ||||
|                 task_list.push(task); | ||||
|                 let task_list = Arc::new(AtomicRefCell::new(task_list)); | ||||
|                 entry.insert(task_list.clone()); | ||||
|                 self.queue.push(task_list); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn process_next_batch(&self) -> Result<()> { | ||||
|         match self.prepare_batch().await? { | ||||
|             Some(mut batch) => { | ||||
|                 for task in &mut batch.tasks { | ||||
|                     match task { | ||||
|                         Pending::Task(task) => task.events.push(TaskEvent::Processing(Utc::now())), | ||||
|                         Pending::Job(_) => (), | ||||
|     /// Passes a context with a view to the task list of the next index to schedule. It is | ||||
|     /// guaranteed that the first id from task list will be the lowest pending task id. | ||||
|     fn head_mut<R>(&mut self, mut f: impl FnMut(&mut TaskList) -> R) -> Option<R> { | ||||
|         let head = self.queue.pop()?; | ||||
|         let result = { | ||||
|             let mut ref_head = head.borrow_mut(); | ||||
|             f(&mut *ref_head) | ||||
|         }; | ||||
|         if !head.borrow().tasks.is_empty() { | ||||
|             // After being mutated, the head is reinserted to the correct position. | ||||
|             self.queue.push(head); | ||||
|         } else { | ||||
|             self.index_tasks.remove(&head.borrow().index); | ||||
|         } | ||||
|  | ||||
|         Some(result) | ||||
|     } | ||||
|  | ||||
|     pub fn is_empty(&self) -> bool { | ||||
|         self.queue.is_empty() && self.index_tasks.is_empty() | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Scheduler { | ||||
|     jobs: VecDeque<Job>, | ||||
|     tasks: TaskQueue, | ||||
|  | ||||
|     store: TaskStore, | ||||
|     processing: Vec<TaskId>, | ||||
|     next_fetched_task_id: TaskId, | ||||
|     config: SchedulerConfig, | ||||
|     /// Notifies the update loop that a new task was received | ||||
|     notifier: watch::Sender<()>, | ||||
| } | ||||
|  | ||||
| impl Scheduler { | ||||
|     pub fn new<P>( | ||||
|         store: TaskStore, | ||||
|         performer: Arc<P>, | ||||
|         mut config: SchedulerConfig, | ||||
|     ) -> Result<Arc<RwLock<Self>>> | ||||
|     where | ||||
|         P: TaskPerformer, | ||||
|     { | ||||
|         let (notifier, rcv) = watch::channel(()); | ||||
|  | ||||
|         let debounce_time = config.debounce_duration_sec; | ||||
|  | ||||
|         // Disable autobatching | ||||
|         if !config.enable_autobatching { | ||||
|             config.max_batch_size = Some(1); | ||||
|         } | ||||
|  | ||||
|         let this = Self { | ||||
|             jobs: VecDeque::new(), | ||||
|             tasks: TaskQueue::default(), | ||||
|  | ||||
|             store, | ||||
|             processing: Vec::new(), | ||||
|             next_fetched_task_id: 0, | ||||
|             config, | ||||
|             notifier, | ||||
|         }; | ||||
|  | ||||
|         // Notify update loop to start processing pending updates immediately after startup. | ||||
|         this.notify(); | ||||
|  | ||||
|         let this = Arc::new(RwLock::new(this)); | ||||
|  | ||||
|         let update_loop = UpdateLoop::new( | ||||
|             this.clone(), | ||||
|             performer, | ||||
|             debounce_time.filter(|&v| v > 0).map(Duration::from_secs), | ||||
|             rcv, | ||||
|         ); | ||||
|  | ||||
|         tokio::task::spawn_local(update_loop.run()); | ||||
|  | ||||
|         Ok(this) | ||||
|     } | ||||
|  | ||||
|     pub async fn dump(&self, path: &Path, file_store: UpdateFileStore) -> Result<()> { | ||||
|         self.store.dump(path, file_store).await | ||||
|     } | ||||
|  | ||||
|     fn register_task(&mut self, task: Task) { | ||||
|         assert!(!task.is_finished()); | ||||
|         self.tasks.insert(task); | ||||
|     } | ||||
|  | ||||
|     /// Clears the processing list, this method should be called when the processing of a batch is finished. | ||||
|     pub fn finish(&mut self) { | ||||
|         self.processing.clear(); | ||||
|     } | ||||
|  | ||||
|     pub fn notify(&self) { | ||||
|         let _ = self.notifier.send(()); | ||||
|     } | ||||
|  | ||||
|     fn notify_if_not_empty(&self) { | ||||
|         if !self.jobs.is_empty() || !self.tasks.is_empty() { | ||||
|             self.notify(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> { | ||||
|         self.store.update_tasks(tasks).await | ||||
|     } | ||||
|  | ||||
|     pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> { | ||||
|         self.store.get_task(id, filter).await | ||||
|     } | ||||
|  | ||||
|     pub async fn list_tasks( | ||||
|         &self, | ||||
|         offset: Option<TaskId>, | ||||
|         filter: Option<TaskFilter>, | ||||
|         limit: Option<usize>, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         self.store.list_tasks(offset, filter, limit).await | ||||
|     } | ||||
|  | ||||
|     pub async fn get_processing_tasks(&self) -> Result<Vec<Task>> { | ||||
|         let mut tasks = Vec::new(); | ||||
|  | ||||
|         for id in self.processing.iter() { | ||||
|             let task = self.store.get_task(*id, None).await?; | ||||
|             tasks.push(task); | ||||
|         } | ||||
|  | ||||
|         Ok(tasks) | ||||
|     } | ||||
|  | ||||
|     pub async fn schedule_job(&mut self, job: Job) { | ||||
|         self.jobs.push_back(job); | ||||
|         self.notify(); | ||||
|     } | ||||
|  | ||||
|     async fn fetch_pending_tasks(&mut self) -> Result<()> { | ||||
|         // We must NEVER re-enqueue an already processed task! It's content uuid would point to an unexisting file. | ||||
|         // | ||||
|         // TODO(marin): This may create some latency when the first batch lazy loads the pending updates. | ||||
|         let mut filter = TaskFilter::default(); | ||||
|         filter.filter_fn(|task| !task.is_finished()); | ||||
|  | ||||
|         self.store | ||||
|             .list_tasks(Some(self.next_fetched_task_id), Some(filter), None) | ||||
|             .await? | ||||
|             .into_iter() | ||||
|             // The tasks arrive in reverse order, and we need to insert them in order. | ||||
|             .rev() | ||||
|             .for_each(|t| { | ||||
|                 self.next_fetched_task_id = t.id + 1; | ||||
|                 self.register_task(t); | ||||
|             }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Prepare the next batch, and set `processing` to the ids in that batch. | ||||
|     pub async fn prepare(&mut self) -> Result<Pending> { | ||||
|         // If there is a job to process, do it first. | ||||
|         if let Some(job) = self.jobs.pop_front() { | ||||
|             // There is more work to do, notify the update loop | ||||
|             self.notify_if_not_empty(); | ||||
|             return Ok(Pending::Job(job)); | ||||
|         } | ||||
|         // Try to fill the queue with pending tasks. | ||||
|         self.fetch_pending_tasks().await?; | ||||
|  | ||||
|         make_batch(&mut self.tasks, &mut self.processing, &self.config); | ||||
|  | ||||
|         log::debug!("prepared batch with {} tasks", self.processing.len()); | ||||
|  | ||||
|         if !self.processing.is_empty() { | ||||
|             let ids = std::mem::take(&mut self.processing); | ||||
|  | ||||
|             let (ids, mut tasks) = self.store.get_pending_tasks(ids).await?; | ||||
|  | ||||
|             // The batch id is the id of the first update it contains | ||||
|             let id = match tasks.first() { | ||||
|                 Some(Task { id, .. }) => *id, | ||||
|                 _ => panic!("invalid batch"), | ||||
|             }; | ||||
|  | ||||
|             tasks.iter_mut().for_each(|t| { | ||||
|                 t.events.push(TaskEvent::Batched { | ||||
|                     batch_id: id, | ||||
|                     timestamp: Utc::now(), | ||||
|                 }) | ||||
|             }); | ||||
|  | ||||
|             self.processing = ids; | ||||
|  | ||||
|             let batch = Batch { | ||||
|                 id, | ||||
|                 created_at: Utc::now(), | ||||
|                 tasks, | ||||
|             }; | ||||
|  | ||||
|             // There is more work to do, notify the update loop | ||||
|             self.notify_if_not_empty(); | ||||
|  | ||||
|             Ok(Pending::Batch(batch)) | ||||
|         } else { | ||||
|             Ok(Pending::Nothing) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum Pending { | ||||
|     Batch(Batch), | ||||
|     Job(Job), | ||||
|     Nothing, | ||||
| } | ||||
|  | ||||
| fn make_batch(tasks: &mut TaskQueue, processing: &mut Vec<TaskId>, config: &SchedulerConfig) { | ||||
|     processing.clear(); | ||||
|  | ||||
|     let mut doc_count = 0; | ||||
|     tasks.head_mut(|list| match list.peek().copied() { | ||||
|         Some(PendingTask { | ||||
|             kind: TaskType::Other, | ||||
|             id, | ||||
|         }) => { | ||||
|             processing.push(id); | ||||
|             list.pop(); | ||||
|         } | ||||
|         Some(PendingTask { kind, .. }) => loop { | ||||
|             match list.peek() { | ||||
|                 Some(pending) if pending.kind == kind => { | ||||
|                     // We always need to process at least one task for the scheduler to make progress. | ||||
|                     if processing.len() >= config.max_batch_size.unwrap_or(usize::MAX).max(1) { | ||||
|                         break; | ||||
|                     } | ||||
|                     let pending = list.pop().unwrap(); | ||||
|                     processing.push(pending.id); | ||||
|  | ||||
|                     // We add the number of documents to the count if we are scheduling document additions and | ||||
|                     // stop adding if we already have enough. | ||||
|                     // | ||||
|                     // We check that bound only after adding the current task to the batch, so that a batch contains at least one task. | ||||
|                     match pending.kind { | ||||
|                         TaskType::DocumentUpdate { number } | ||||
|                         | TaskType::DocumentAddition { number } => { | ||||
|                             doc_count += number; | ||||
|  | ||||
|                             if doc_count >= config.max_documents_per_batch.unwrap_or(usize::MAX) { | ||||
|                                 break; | ||||
|                             } | ||||
|                         } | ||||
|                         _ => (), | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // the jobs are ignored | ||||
|                 batch.tasks = self.store.update_tasks(batch.tasks).await?; | ||||
|  | ||||
|                 let performer = self.performer.clone(); | ||||
|                 let batch_result = performer.process(batch).await; | ||||
|                 self.handle_batch_result(batch_result).await?; | ||||
|                 _ => break, | ||||
|             } | ||||
|             None => { | ||||
|                 // No update found to create a batch we wait a bit before we retry. | ||||
|                 tokio::time::sleep(self.task_store_check_interval).await; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Checks for pending tasks and groups them in a batch. If there are no pending update, | ||||
|     /// return Ok(None) | ||||
|     /// | ||||
|     /// Until batching is properly implemented, the batches contain only one task. | ||||
|     async fn prepare_batch(&self) -> Result<Option<Batch>> { | ||||
|         match self.store.peek_pending_task().await { | ||||
|             Some(Pending::Task(next_task_id)) => { | ||||
|                 let mut task = self.store.get_task(next_task_id, None).await?; | ||||
|  | ||||
|                 task.events.push(TaskEvent::Batched { | ||||
|                     timestamp: Utc::now(), | ||||
|                     batch_id: 0, | ||||
|                 }); | ||||
|  | ||||
|                 let batch = Batch { | ||||
|                     id: 0, | ||||
|                     // index_uid: task.index_uid.clone(), | ||||
|                     created_at: Utc::now(), | ||||
|                     tasks: vec![Pending::Task(task)], | ||||
|                 }; | ||||
|                 Ok(Some(batch)) | ||||
|             } | ||||
|             Some(Pending::Job(job)) => Ok(Some(Batch { | ||||
|                 id: 0, | ||||
|                 created_at: Utc::now(), | ||||
|                 tasks: vec![Pending::Job(job)], | ||||
|             })), | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Handles the result from a batch processing. | ||||
|     /// | ||||
|     /// When a task is processed, the result of the processing is pushed to its event list. The | ||||
|     /// handle batch result make sure that the new state is save into its store. | ||||
|     /// The tasks are then removed from the processing queue. | ||||
|     async fn handle_batch_result(&self, mut batch: Batch) -> Result<()> { | ||||
|         let tasks = self.store.update_tasks(batch.tasks).await?; | ||||
|         batch.tasks = tasks; | ||||
|         self.store.delete_pending(&batch.tasks[0]).await; | ||||
|         self.performer.finish(&batch).await; | ||||
|         Ok(()) | ||||
|     } | ||||
|         }, | ||||
|         None => (), | ||||
|     }); | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use nelson::Mocker; | ||||
|     use milli::update::IndexDocumentsMethod; | ||||
|     use uuid::Uuid; | ||||
|  | ||||
|     use crate::index_resolver::IndexUid; | ||||
|     use crate::tasks::task::Task; | ||||
|     use crate::tasks::task_store::TaskFilter; | ||||
|     use crate::{index_resolver::IndexUid, tasks::task::TaskContent}; | ||||
|  | ||||
|     use super::super::task::{TaskContent, TaskEvent, TaskId, TaskResult}; | ||||
|     use super::super::MockTaskPerformer; | ||||
|     use super::*; | ||||
|  | ||||
|     #[tokio::test] | ||||
|     async fn test_prepare_batch_full() { | ||||
|         let mocker = Mocker::default(); | ||||
|  | ||||
|         mocker | ||||
|             .when::<(TaskId, Option<TaskFilter>), Result<Option<Task>>>("get_task") | ||||
|             .once() | ||||
|             .then(|(id, _filter)| { | ||||
|                 let task = Task { | ||||
|                     id, | ||||
|                     index_uid: IndexUid::new("Test".to_string()).unwrap(), | ||||
|                     content: TaskContent::IndexDeletion, | ||||
|                     events: vec![TaskEvent::Created(Utc::now())], | ||||
|                 }; | ||||
|                 Ok(Some(task)) | ||||
|             }); | ||||
|  | ||||
|         mocker | ||||
|             .when::<(), Option<Pending<TaskId>>>("peek_pending_task") | ||||
|             .then(|()| Some(Pending::Task(1))); | ||||
|  | ||||
|         let store = TaskStore::mock(mocker); | ||||
|         let performer = Arc::new(MockTaskPerformer::new()); | ||||
|  | ||||
|         let scheduler = Scheduler { | ||||
|             store, | ||||
|             performer, | ||||
|             task_store_check_interval: Duration::from_millis(1), | ||||
|         }; | ||||
|  | ||||
|         let batch = scheduler.prepare_batch().await.unwrap().unwrap(); | ||||
|  | ||||
|         assert_eq!(batch.tasks.len(), 1); | ||||
|         assert!( | ||||
|             matches!(batch.tasks[0], Pending::Task(Task { id: 1, .. })), | ||||
|             "{:?}", | ||||
|             batch.tasks[0] | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[tokio::test] | ||||
|     async fn test_prepare_batch_empty() { | ||||
|         let mocker = Mocker::default(); | ||||
|         mocker | ||||
|             .when::<(), Option<Pending<TaskId>>>("peek_pending_task") | ||||
|             .then(|()| None); | ||||
|  | ||||
|         let store = TaskStore::mock(mocker); | ||||
|         let performer = Arc::new(MockTaskPerformer::new()); | ||||
|  | ||||
|         let scheduler = Scheduler { | ||||
|             store, | ||||
|             performer, | ||||
|             task_store_check_interval: Duration::from_millis(1), | ||||
|         }; | ||||
|  | ||||
|         assert!(scheduler.prepare_batch().await.unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[tokio::test] | ||||
|     async fn test_loop_run_normal() { | ||||
|         let mocker = Mocker::default(); | ||||
|         let mut id = Some(1); | ||||
|         mocker | ||||
|             .when::<(), Option<Pending<TaskId>>>("peek_pending_task") | ||||
|             .then(move |()| id.take().map(Pending::Task)); | ||||
|         mocker | ||||
|             .when::<(TaskId, Option<TaskFilter>), Result<Task>>("get_task") | ||||
|             .once() | ||||
|             .then(|(id, _)| { | ||||
|                 let task = Task { | ||||
|                     id, | ||||
|                     index_uid: IndexUid::new("Test".to_string()).unwrap(), | ||||
|                     content: TaskContent::IndexDeletion, | ||||
|                     events: vec![TaskEvent::Created(Utc::now())], | ||||
|                 }; | ||||
|                 Ok(task) | ||||
|             }); | ||||
|  | ||||
|         mocker | ||||
|             .when::<Vec<Pending<Task>>, Result<Vec<Pending<Task>>>>("update_tasks") | ||||
|             .times(2) | ||||
|             .then(|tasks| { | ||||
|                 assert_eq!(tasks.len(), 1); | ||||
|                 Ok(tasks) | ||||
|             }); | ||||
|  | ||||
|         mocker.when::<(), ()>("delete_pending").once().then(|_| ()); | ||||
|  | ||||
|         let store = TaskStore::mock(mocker); | ||||
|  | ||||
|         let mut performer = MockTaskPerformer::new(); | ||||
|         performer.expect_process().once().returning(|mut batch| { | ||||
|             batch.tasks.iter_mut().for_each(|t| match t { | ||||
|                 Pending::Task(Task { ref mut events, .. }) => events.push(TaskEvent::Succeded { | ||||
|                     result: TaskResult::Other, | ||||
|                     timestamp: Utc::now(), | ||||
|                 }), | ||||
|                 _ => panic!("expected a task, found a job"), | ||||
|             }); | ||||
|  | ||||
|             batch | ||||
|         }); | ||||
|  | ||||
|         performer.expect_finish().once().returning(|_| ()); | ||||
|  | ||||
|         let performer = Arc::new(performer); | ||||
|  | ||||
|         let scheduler = Scheduler { | ||||
|             store, | ||||
|             performer, | ||||
|             task_store_check_interval: Duration::from_millis(1), | ||||
|         }; | ||||
|  | ||||
|         let handle = tokio::spawn(scheduler.run()); | ||||
|  | ||||
|         if let Ok(r) = tokio::time::timeout(Duration::from_millis(100), handle).await { | ||||
|             r.unwrap(); | ||||
|     fn gen_task(id: TaskId, index_uid: &str, content: TaskContent) -> Task { | ||||
|         Task { | ||||
|             id, | ||||
|             index_uid: IndexUid::new_unchecked(index_uid.to_owned()), | ||||
|             content, | ||||
|             events: vec![], | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn register_updates_multiples_indexes() { | ||||
|         let mut queue = TaskQueue::default(); | ||||
|         queue.insert(gen_task(0, "test1", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(1, "test2", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(2, "test2", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(3, "test2", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(4, "test1", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(5, "test1", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(6, "test2", TaskContent::IndexDeletion)); | ||||
|  | ||||
|         let test1_tasks = queue | ||||
|             .head_mut(|tasks| tasks.drain().map(|t| t.id).collect::<Vec<_>>()) | ||||
|             .unwrap(); | ||||
|  | ||||
|         assert_eq!(test1_tasks, &[0, 4, 5]); | ||||
|  | ||||
|         let test2_tasks = queue | ||||
|             .head_mut(|tasks| tasks.drain().map(|t| t.id).collect::<Vec<_>>()) | ||||
|             .unwrap(); | ||||
|  | ||||
|         assert_eq!(test2_tasks, &[1, 2, 3, 6]); | ||||
|  | ||||
|         assert!(queue.index_tasks.is_empty()); | ||||
|         assert!(queue.queue.is_empty()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_make_batch() { | ||||
|         let mut queue = TaskQueue::default(); | ||||
|         let content = TaskContent::DocumentAddition { | ||||
|             content_uuid: Uuid::new_v4(), | ||||
|             merge_strategy: IndexDocumentsMethod::ReplaceDocuments, | ||||
|             primary_key: Some("test".to_string()), | ||||
|             documents_count: 0, | ||||
|             allow_index_creation: true, | ||||
|         }; | ||||
|         queue.insert(gen_task(0, "test1", content.clone())); | ||||
|         queue.insert(gen_task(1, "test2", content.clone())); | ||||
|         queue.insert(gen_task(2, "test2", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(3, "test2", content.clone())); | ||||
|         queue.insert(gen_task(4, "test1", content.clone())); | ||||
|         queue.insert(gen_task(5, "test1", TaskContent::IndexDeletion)); | ||||
|         queue.insert(gen_task(6, "test2", content.clone())); | ||||
|         queue.insert(gen_task(7, "test1", content)); | ||||
|  | ||||
|         let mut batch = Vec::new(); | ||||
|  | ||||
|         let config = SchedulerConfig::default(); | ||||
|         make_batch(&mut queue, &mut batch, &config); | ||||
|         assert_eq!(batch, &[0, 4]); | ||||
|  | ||||
|         batch.clear(); | ||||
|         make_batch(&mut queue, &mut batch, &config); | ||||
|         assert_eq!(batch, &[1]); | ||||
|  | ||||
|         batch.clear(); | ||||
|         make_batch(&mut queue, &mut batch, &config); | ||||
|         assert_eq!(batch, &[2]); | ||||
|  | ||||
|         batch.clear(); | ||||
|         make_batch(&mut queue, &mut batch, &config); | ||||
|         assert_eq!(batch, &[3, 6]); | ||||
|  | ||||
|         batch.clear(); | ||||
|         make_batch(&mut queue, &mut batch, &config); | ||||
|         assert_eq!(batch, &[5]); | ||||
|  | ||||
|         batch.clear(); | ||||
|         make_batch(&mut queue, &mut batch, &config); | ||||
|         assert_eq!(batch, &[7]); | ||||
|  | ||||
|         assert!(queue.is_empty()); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -97,7 +97,7 @@ impl Task { | ||||
| pub enum Job { | ||||
|     Dump { | ||||
|         #[derivative(PartialEq = "ignore")] | ||||
|         ret: oneshot::Sender<Result<(), IndexResolverError>>, | ||||
|         ret: oneshot::Sender<Result<oneshot::Sender<()>, IndexResolverError>>, | ||||
|         path: PathBuf, | ||||
|     }, | ||||
|     Snapshot(#[derivative(PartialEq = "ignore")] SnapshotJob), | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| mod store; | ||||
|  | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::{BinaryHeap, HashSet}; | ||||
| use std::collections::HashSet; | ||||
| use std::io::{BufWriter, Write}; | ||||
| use std::path::Path; | ||||
| use std::sync::Arc; | ||||
| @@ -9,11 +8,9 @@ use std::sync::Arc; | ||||
| use chrono::Utc; | ||||
| use heed::{Env, RwTxn}; | ||||
| use log::debug; | ||||
| use tokio::sync::RwLock; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::TaskError; | ||||
| use super::task::{Job, Task, TaskContent, TaskId}; | ||||
| use super::task::{Task, TaskContent, TaskId}; | ||||
| use super::Result; | ||||
| use crate::index_resolver::IndexUid; | ||||
| use crate::tasks::task::TaskEvent; | ||||
| @@ -25,9 +22,10 @@ pub use store::test::MockStore as Store; | ||||
| pub use store::Store; | ||||
|  | ||||
| /// Defines constraints to be applied when querying for Tasks from the store. | ||||
| #[derive(Default, Debug)] | ||||
| #[derive(Default)] | ||||
| pub struct TaskFilter { | ||||
|     indexes: Option<HashSet<String>>, | ||||
|     filter_fn: Option<Box<dyn Fn(&Task) -> bool + Sync + Send + 'static>>, | ||||
| } | ||||
|  | ||||
| impl TaskFilter { | ||||
| @@ -44,85 +42,28 @@ impl TaskFilter { | ||||
|             .get_or_insert_with(Default::default) | ||||
|             .insert(index); | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// You can't clone a job because of its volatile nature. | ||||
| /// If you need to take the `Job` with you though. You can call the method | ||||
| /// `Pending::take`. It'll return the `Pending` as-is but `Empty` the original. | ||||
| #[derive(Debug, PartialEq)] | ||||
| pub enum Pending<T> { | ||||
|     /// A task stored on disk that must be processed. | ||||
|     Task(T), | ||||
|     /// Job always have a higher priority over normal tasks and are not stored on disk. | ||||
|     /// It can be refered as `Volatile job`. | ||||
|     Job(Job), | ||||
| } | ||||
|  | ||||
| impl Pending<TaskId> { | ||||
|     /// Makes a copy of the task or take the content of the volatile job. | ||||
|     pub(crate) fn take(&mut self) -> Self { | ||||
|         match self { | ||||
|             Self::Task(id) => Self::Task(*id), | ||||
|             Self::Job(job) => Self::Job(job.take()), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Eq for Pending<TaskId> {} | ||||
|  | ||||
| impl PartialOrd for Pending<TaskId> { | ||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|         match (self, other) { | ||||
|             // in case of two tasks we want to return the lowest taskId first. | ||||
|             (Pending::Task(lhs), Pending::Task(rhs)) => Some(lhs.cmp(rhs).reverse()), | ||||
|             // A job is always better than a task. | ||||
|             (Pending::Task(_), Pending::Job(_)) => Some(Ordering::Less), | ||||
|             (Pending::Job(_), Pending::Task(_)) => Some(Ordering::Greater), | ||||
|             // When there is two jobs we consider them equals. | ||||
|             (Pending::Job(_), Pending::Job(_)) => Some(Ordering::Equal), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Pending<Task> { | ||||
|     pub fn get_content_uuid(&self) -> Option<Uuid> { | ||||
|         match self { | ||||
|             Pending::Task(task) => task.get_content_uuid(), | ||||
|             _ => None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Ord for Pending<TaskId> { | ||||
|     fn cmp(&self, other: &Self) -> Ordering { | ||||
|         self.partial_cmp(other).unwrap() | ||||
|     pub fn filter_fn(&mut self, f: impl Fn(&Task) -> bool + Sync + Send + 'static) { | ||||
|         self.filter_fn.replace(Box::new(f)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct TaskStore { | ||||
|     store: Arc<Store>, | ||||
|     pending_queue: Arc<RwLock<BinaryHeap<Pending<TaskId>>>>, | ||||
| } | ||||
|  | ||||
| impl Clone for TaskStore { | ||||
|     fn clone(&self) -> Self { | ||||
|         Self { | ||||
|             store: self.store.clone(), | ||||
|             pending_queue: self.pending_queue.clone(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TaskStore { | ||||
|     pub fn new(env: Arc<heed::Env>) -> Result<Self> { | ||||
|         let mut store = Store::new(env)?; | ||||
|         let unfinished_tasks = store.reset_and_return_unfinished_tasks()?; | ||||
|         let store = Arc::new(store); | ||||
|  | ||||
|         Ok(Self { | ||||
|             store, | ||||
|             pending_queue: Arc::new(RwLock::new(unfinished_tasks)), | ||||
|         }) | ||||
|         let store = Arc::new(Store::new(env)?); | ||||
|         Ok(Self { store }) | ||||
|     } | ||||
|  | ||||
|     pub async fn register(&self, index_uid: IndexUid, content: TaskContent) -> Result<Task> { | ||||
| @@ -146,11 +87,6 @@ impl TaskStore { | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         self.pending_queue | ||||
|             .write() | ||||
|             .await | ||||
|             .push(Pending::Task(task.id)); | ||||
|  | ||||
|         Ok(task) | ||||
|     } | ||||
|  | ||||
| @@ -159,35 +95,6 @@ impl TaskStore { | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Register an update that applies on multiple indexes. | ||||
|     /// Currently the update is considered as a priority. | ||||
|     pub async fn register_job(&self, content: Job) { | ||||
|         debug!("registering a job: {:?}", content); | ||||
|         self.pending_queue.write().await.push(Pending::Job(content)); | ||||
|     } | ||||
|  | ||||
|     /// Returns the next task to process. | ||||
|     pub async fn peek_pending_task(&self) -> Option<Pending<TaskId>> { | ||||
|         let mut pending_queue = self.pending_queue.write().await; | ||||
|         loop { | ||||
|             match pending_queue.peek()? { | ||||
|                 Pending::Job(Job::Empty) => drop(pending_queue.pop()), | ||||
|                 _ => return Some(pending_queue.peek_mut()?.take()), | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Returns the next task to process if there is one. | ||||
|     pub async fn get_processing_task(&self) -> Result<Option<Task>> { | ||||
|         match self.peek_pending_task().await { | ||||
|             Some(Pending::Task(tid)) => { | ||||
|                 let task = self.get_task(tid, None).await?; | ||||
|                 Ok(matches!(task.events.last(), Some(TaskEvent::Processing(_))).then(|| task)) | ||||
|             } | ||||
|             _ => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> { | ||||
|         let store = self.store.clone(); | ||||
|         let task = tokio::task::spawn_blocking(move || -> Result<_> { | ||||
| @@ -207,17 +114,33 @@ impl TaskStore { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub async fn update_tasks(&self, tasks: Vec<Pending<Task>>) -> Result<Vec<Pending<Task>>> { | ||||
|     pub async fn get_pending_tasks(&self, ids: Vec<TaskId>) -> Result<(Vec<TaskId>, Vec<Task>)> { | ||||
|         let store = self.store.clone(); | ||||
|         let tasks = tokio::task::spawn_blocking(move || -> Result<_> { | ||||
|             let mut tasks = Vec::new(); | ||||
|             let txn = store.rtxn()?; | ||||
|  | ||||
|             for id in ids.iter() { | ||||
|                 let task = store | ||||
|                     .get(&txn, *id)? | ||||
|                     .ok_or(TaskError::UnexistingTask(*id))?; | ||||
|                 tasks.push(task); | ||||
|             } | ||||
|             Ok((ids, tasks)) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(tasks) | ||||
|     } | ||||
|  | ||||
|     pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> { | ||||
|         let store = self.store.clone(); | ||||
|  | ||||
|         let tasks = tokio::task::spawn_blocking(move || -> Result<_> { | ||||
|             let mut txn = store.wtxn()?; | ||||
|  | ||||
|             for task in &tasks { | ||||
|                 match task { | ||||
|                     Pending::Task(task) => store.put(&mut txn, task)?, | ||||
|                     Pending::Job(_) => (), | ||||
|                 } | ||||
|                 store.put(&mut txn, task)?; | ||||
|             } | ||||
|  | ||||
|             txn.commit()?; | ||||
| @@ -229,21 +152,6 @@ impl TaskStore { | ||||
|         Ok(tasks) | ||||
|     } | ||||
|  | ||||
|     /// Delete one task from the queue and remove all `Empty` job. | ||||
|     pub async fn delete_pending(&self, to_delete: &Pending<Task>) { | ||||
|         if let Pending::Task(Task { id: pending_id, .. }) = to_delete { | ||||
|             let mut pending_queue = self.pending_queue.write().await; | ||||
|             *pending_queue = std::mem::take(&mut *pending_queue) | ||||
|                 .into_iter() | ||||
|                 .filter(|pending| match pending { | ||||
|                     Pending::Job(Job::Empty) => false, | ||||
|                     Pending::Task(id) => pending_id != id, | ||||
|                     _ => true, | ||||
|                 }) | ||||
|                 .collect::<BinaryHeap<Pending<TaskId>>>(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub async fn list_tasks( | ||||
|         &self, | ||||
|         offset: Option<TaskId>, | ||||
| @@ -348,23 +256,15 @@ pub mod test { | ||||
|             Self::Mock(Arc::new(mocker)) | ||||
|         } | ||||
|  | ||||
|         pub async fn update_tasks(&self, tasks: Vec<Pending<Task>>) -> Result<Vec<Pending<Task>>> { | ||||
|         pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.update_tasks(tasks).await, | ||||
|                 Self::Mock(m) => unsafe { | ||||
|                     m.get::<_, Result<Vec<Pending<Task>>>>("update_tasks") | ||||
|                         .call(tasks) | ||||
|                     m.get::<_, Result<Vec<Task>>>("update_tasks").call(tasks) | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn delete_pending(&self, to_delete: &Pending<Task>) { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.delete_pending(to_delete).await, | ||||
|                 Self::Mock(m) => unsafe { m.get("delete_pending").call(to_delete) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.get_task(id, filter).await, | ||||
| @@ -372,23 +272,13 @@ pub mod test { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn get_processing_task(&self) -> Result<Option<Task>> { | ||||
|         pub async fn get_pending_tasks( | ||||
|             &self, | ||||
|             tasks: Vec<TaskId>, | ||||
|         ) -> Result<(Vec<TaskId>, Vec<Task>)> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.get_processing_task().await, | ||||
|                 Self::Mock(m) => unsafe { | ||||
|                     m.get::<_, Result<Option<Task>>>("get_pending_task") | ||||
|                         .call(()) | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn peek_pending_task(&self) -> Option<Pending<TaskId>> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.peek_pending_task().await, | ||||
|                 Self::Mock(m) => unsafe { | ||||
|                     m.get::<_, Option<Pending<TaskId>>>("peek_pending_task") | ||||
|                         .call(()) | ||||
|                 }, | ||||
|                 Self::Real(s) => s.get_pending_tasks(tasks).await, | ||||
|                 Self::Mock(m) => unsafe { m.get("get_pending_task").call(tasks) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
| @@ -400,14 +290,18 @@ pub mod test { | ||||
|         ) -> Result<Vec<Task>> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.list_tasks(from, filter, limit).await, | ||||
|                 Self::Mock(_m) => todo!(), | ||||
|                 Self::Mock(m) => unsafe { m.get("list_tasks").call((from, filter, limit)) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn dump(&self, path: &Path, update_file_store: UpdateFileStore) -> Result<()> { | ||||
|         pub async fn dump( | ||||
|             &self, | ||||
|             path: impl AsRef<Path>, | ||||
|             update_file_store: UpdateFileStore, | ||||
|         ) -> Result<()> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.dump(path, update_file_store).await, | ||||
|                 Self::Mock(_m) => todo!(), | ||||
|                 Self::Mock(m) => unsafe { m.get("dump").call((path, update_file_store)) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
| @@ -425,13 +319,6 @@ pub mod test { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn register_job(&self, content: Job) { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.register_job(content).await, | ||||
|                 Self::Mock(_m) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn load_dump(path: impl AsRef<Path>, env: Arc<Env>) -> anyhow::Result<()> { | ||||
|             TaskStore::load_dump(path, env) | ||||
|         } | ||||
|   | ||||
| @@ -19,7 +19,7 @@ use crate::tasks::task::{Task, TaskId}; | ||||
|  | ||||
| use super::super::Result; | ||||
|  | ||||
| use super::{Pending, TaskFilter}; | ||||
| use super::TaskFilter; | ||||
|  | ||||
| enum IndexUidTaskIdCodec {} | ||||
|  | ||||
| @@ -84,41 +84,6 @@ impl Store { | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// This function should be called *right after* creating the store. | ||||
|     /// It put back all unfinished update in the `Created` state. This | ||||
|     /// allow us to re-enqueue an update that didn't had the time to finish | ||||
|     /// when Meilisearch closed. | ||||
|     pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> { | ||||
|         let mut unfinished_tasks: BinaryHeap<Pending<TaskId>> = BinaryHeap::new(); | ||||
|  | ||||
|         let mut wtxn = self.wtxn()?; | ||||
|         let mut iter = self.tasks.rev_iter_mut(&mut wtxn)?; | ||||
|  | ||||
|         while let Some(entry) = iter.next() { | ||||
|             let entry = entry?; | ||||
|             let (id, mut task): (BEU64, Task) = entry; | ||||
|  | ||||
|             // Since all tasks are ordered, we can stop iterating when we encounter our first non-finished task. | ||||
|             if task.is_finished() { | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             // we only keep the first state. It’s supposed to be a `Created` state. | ||||
|             task.events.drain(1..); | ||||
|             unfinished_tasks.push(Pending::Task(id.get())); | ||||
|  | ||||
|             // Since we own the id and the task this is a safe operation. | ||||
|             unsafe { | ||||
|                 iter.put_current(&id, &task)?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         drop(iter); | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         Ok(unfinished_tasks) | ||||
|     } | ||||
|  | ||||
|     pub fn wtxn(&self) -> Result<RwTxn> { | ||||
|         Ok(self.env.write_txn()?) | ||||
|     } | ||||
| @@ -166,7 +131,11 @@ impl Store { | ||||
|             .map(|limit| (limit as u64).saturating_add(from)) | ||||
|             .unwrap_or(u64::MAX); | ||||
|         let iter: Box<dyn Iterator<Item = StdResult<_, heed::Error>>> = match filter { | ||||
|             Some(filter) => { | ||||
|             Some( | ||||
|                 ref filter @ TaskFilter { | ||||
|                     indexes: Some(_), .. | ||||
|                 }, | ||||
|             ) => { | ||||
|                 let iter = self | ||||
|                     .compute_candidates(txn, filter, range)? | ||||
|                     .into_iter() | ||||
| @@ -174,15 +143,24 @@ impl Store { | ||||
|  | ||||
|                 Box::new(iter) | ||||
|             } | ||||
|             None => Box::new( | ||||
|             _ => Box::new( | ||||
|                 self.tasks | ||||
|                     .rev_range(txn, &(BEU64::new(range.start)..BEU64::new(range.end)))? | ||||
|                     .map(|r| r.map(|(_, t)| t)), | ||||
|             ), | ||||
|         }; | ||||
|  | ||||
|         let apply_fitler = |task: &StdResult<_, heed::Error>| match task { | ||||
|             Ok(ref t) => filter | ||||
|                 .as_ref() | ||||
|                 .and_then(|filter| filter.filter_fn.as_ref()) | ||||
|                 .map(|f| f(t)) | ||||
|                 .unwrap_or(true), | ||||
|             Err(_) => true, | ||||
|         }; | ||||
|         // Collect 'limit' task if it exists or all of them. | ||||
|         let tasks = iter | ||||
|             .filter(apply_fitler) | ||||
|             .take(limit.unwrap_or(usize::MAX)) | ||||
|             .try_fold::<_, _, StdResult<_, heed::Error>>(Vec::new(), |mut v, task| { | ||||
|                 v.push(task?); | ||||
| @@ -195,11 +173,11 @@ impl Store { | ||||
|     fn compute_candidates( | ||||
|         &self, | ||||
|         txn: &heed::RoTxn, | ||||
|         filter: TaskFilter, | ||||
|         filter: &TaskFilter, | ||||
|         range: Range<TaskId>, | ||||
|     ) -> Result<BinaryHeap<TaskId>> { | ||||
|         let mut candidates = BinaryHeap::new(); | ||||
|         if let Some(indexes) = filter.indexes { | ||||
|         if let Some(ref indexes) = filter.indexes { | ||||
|             for index in indexes { | ||||
|                 // We need to prefix search the null terminated string to make sure that we only | ||||
|                 // get exact matches for the index, and not other uids that would share the same | ||||
| @@ -290,13 +268,6 @@ pub mod test { | ||||
|             Ok(Self::Real(Store::new(env)?)) | ||||
|         } | ||||
|  | ||||
|         pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.reset_and_return_unfinished_tasks(), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn wtxn(&self) -> Result<RwTxn> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.wtxn(), | ||||
|   | ||||
							
								
								
									
										107
									
								
								meilisearch-lib/src/tasks/update_loop.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								meilisearch-lib/src/tasks/update_loop.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use chrono::Utc; | ||||
| use tokio::sync::{watch, RwLock}; | ||||
| use tokio::time::interval_at; | ||||
|  | ||||
| use super::batch::Batch; | ||||
| use super::error::Result; | ||||
| use super::scheduler::Pending; | ||||
| use super::{Scheduler, TaskPerformer}; | ||||
| use crate::tasks::task::TaskEvent; | ||||
|  | ||||
| /// The update loop sequentially performs batches of updates by asking the scheduler for a batch, | ||||
| /// and handing it to the `TaskPerformer`. | ||||
| pub struct UpdateLoop<P: TaskPerformer> { | ||||
|     scheduler: Arc<RwLock<Scheduler>>, | ||||
|     performer: Arc<P>, | ||||
|  | ||||
|     notifier: Option<watch::Receiver<()>>, | ||||
|     debounce_duration: Option<Duration>, | ||||
| } | ||||
|  | ||||
| impl<P> UpdateLoop<P> | ||||
| where | ||||
|     P: TaskPerformer + Send + Sync + 'static, | ||||
| { | ||||
|     pub fn new( | ||||
|         scheduler: Arc<RwLock<Scheduler>>, | ||||
|         performer: Arc<P>, | ||||
|         debuf_duration: Option<Duration>, | ||||
|         notifier: watch::Receiver<()>, | ||||
|     ) -> Self { | ||||
|         Self { | ||||
|             scheduler, | ||||
|             performer, | ||||
|             debounce_duration: debuf_duration, | ||||
|             notifier: Some(notifier), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub async fn run(mut self) { | ||||
|         let mut notifier = self.notifier.take().unwrap(); | ||||
|  | ||||
|         loop { | ||||
|             if notifier.changed().await.is_err() { | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             if let Some(t) = self.debounce_duration { | ||||
|                 let mut interval = interval_at(tokio::time::Instant::now() + t, t); | ||||
|                 interval.tick().await; | ||||
|             }; | ||||
|  | ||||
|             if let Err(e) = self.process_next_batch().await { | ||||
|                 log::error!("an error occured while processing an update batch: {}", e); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn process_next_batch(&self) -> Result<()> { | ||||
|         let pending = { self.scheduler.write().await.prepare().await? }; | ||||
|         match pending { | ||||
|             Pending::Batch(mut batch) => { | ||||
|                 for task in &mut batch.tasks { | ||||
|                     task.events.push(TaskEvent::Processing(Utc::now())); | ||||
|                 } | ||||
|  | ||||
|                 batch.tasks = { | ||||
|                     self.scheduler | ||||
|                         .read() | ||||
|                         .await | ||||
|                         .update_tasks(batch.tasks) | ||||
|                         .await? | ||||
|                 }; | ||||
|  | ||||
|                 let performer = self.performer.clone(); | ||||
|  | ||||
|                 let batch = performer.process_batch(batch).await; | ||||
|  | ||||
|                 self.handle_batch_result(batch).await?; | ||||
|             } | ||||
|             Pending::Job(job) => { | ||||
|                 let performer = self.performer.clone(); | ||||
|                 performer.process_job(job).await; | ||||
|             } | ||||
|             Pending::Nothing => (), | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Handles the result from a processed batch. | ||||
|     /// | ||||
|     /// When a task is processed, the result of the process is pushed to its event list. The | ||||
|     /// `handle_batch_result` make sure that the new state is saved to the store. | ||||
|     /// The tasks are then removed from the processing queue. | ||||
|     async fn handle_batch_result(&self, mut batch: Batch) -> Result<()> { | ||||
|         let mut scheduler = self.scheduler.write().await; | ||||
|         let tasks = scheduler.update_tasks(batch.tasks).await?; | ||||
|         scheduler.finish(); | ||||
|         drop(scheduler); | ||||
|         batch.tasks = tasks; | ||||
|         self.performer.finish(&batch).await; | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user