mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	split meilisearch-http and meilisearch-lib
This commit is contained in:
		
							
								
								
									
										62
									
								
								meilisearch-lib/src/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								meilisearch-lib/src/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| use std::error::Error; | ||||
| use std::fmt; | ||||
|  | ||||
| use meilisearch_error::{Code, ErrorCode}; | ||||
| use milli::UserError; | ||||
|  | ||||
| macro_rules! internal_error { | ||||
|     ($target:ty : $($other:path), *) => { | ||||
|         $( | ||||
|             impl From<$other> for $target { | ||||
|                 fn from(other: $other) -> Self { | ||||
|                     Self::Internal(Box::new(other)) | ||||
|                 } | ||||
|             } | ||||
|         )* | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct MilliError<'a>(pub &'a milli::Error); | ||||
|  | ||||
| impl Error for MilliError<'_> {} | ||||
|  | ||||
| impl fmt::Display for MilliError<'_> { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         self.0.fmt(f) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl ErrorCode for MilliError<'_> { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self.0 { | ||||
|             milli::Error::InternalError(_) => Code::Internal, | ||||
|             milli::Error::IoError(_) => Code::Internal, | ||||
|             milli::Error::UserError(ref error) => { | ||||
|                 match error { | ||||
|                     // TODO: wait for spec for new error codes. | ||||
|                     | UserError::SerdeJson(_) | ||||
|                     | UserError::MaxDatabaseSizeReached | ||||
|                     | UserError::InvalidCriterionName { .. } | ||||
|                     | UserError::InvalidDocumentId { .. } | ||||
|                     | UserError::InvalidStoreFile | ||||
|                     | UserError::NoSpaceLeftOnDevice | ||||
|                     | UserError::InvalidAscDescSyntax { .. } | ||||
|                     | UserError::DocumentLimitReached => Code::Internal, | ||||
|                     UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, | ||||
|                     UserError::InvalidFilter(_) => Code::Filter, | ||||
|                     UserError::InvalidFilterAttribute(_) => Code::Filter, | ||||
|                     UserError::InvalidSortName { .. } => Code::Sort, | ||||
|                     UserError::MissingDocumentId { .. } => Code::MissingDocumentId, | ||||
|                     UserError::MissingPrimaryKey => Code::MissingPrimaryKey, | ||||
|                     UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, | ||||
|                     UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent, | ||||
|                     UserError::SortRankingRuleMissing => Code::Sort, | ||||
|                     UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, | ||||
|                     UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, | ||||
|                     UserError::InvalidSortableAttribute { .. } => Code::Sort, | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										166
									
								
								meilisearch-lib/src/index/dump.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										166
									
								
								meilisearch-lib/src/index/dump.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,166 @@ | ||||
| use std::fs::File; | ||||
| use std::io::Write; | ||||
| use std::path::Path; | ||||
|  | ||||
| use heed::RoTxn; | ||||
| use indexmap::IndexMap; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::options::IndexerOpts; | ||||
|  | ||||
| use super::error::Result; | ||||
| use super::{Index, Settings, Unchecked}; | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| struct DumpMeta { | ||||
|     settings: Settings<Unchecked>, | ||||
|     primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| const META_FILE_NAME: &str = "meta.json"; | ||||
| const DATA_FILE_NAME: &str = "documents.jsonl"; | ||||
|  | ||||
| impl Index { | ||||
|     pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> { | ||||
|         // acquire write txn make sure any ongoing write is finished before we start. | ||||
|         let txn = self.env.write_txn()?; | ||||
|  | ||||
|         self.dump_documents(&txn, &path)?; | ||||
|         self.dump_meta(&txn, &path)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> { | ||||
|         let document_file_path = path.as_ref().join(DATA_FILE_NAME); | ||||
|         let mut document_file = File::create(&document_file_path)?; | ||||
|  | ||||
|         let documents = self.all_documents(txn)?; | ||||
|         let fields_ids_map = self.fields_ids_map(txn)?; | ||||
|  | ||||
|         // dump documents | ||||
|         let mut json_map = IndexMap::new(); | ||||
|         for document in documents { | ||||
|             let (_, reader) = document?; | ||||
|  | ||||
|             for (fid, bytes) in reader.iter() { | ||||
|                 if let Some(name) = fields_ids_map.name(fid) { | ||||
|                     json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             serde_json::to_writer(&mut document_file, &json_map)?; | ||||
|             document_file.write_all(b"\n")?; | ||||
|  | ||||
|             json_map.clear(); | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> { | ||||
|         let meta_file_path = path.as_ref().join(META_FILE_NAME); | ||||
|         let mut meta_file = File::create(&meta_file_path)?; | ||||
|  | ||||
|         let settings = self.settings_txn(txn)?.into_unchecked(); | ||||
|         let primary_key = self.primary_key(txn)?.map(String::from); | ||||
|         let meta = DumpMeta { | ||||
|             settings, | ||||
|             primary_key, | ||||
|         }; | ||||
|  | ||||
|         serde_json::to_writer(&mut meta_file, &meta)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn load_dump( | ||||
|         _src: impl AsRef<Path>, | ||||
|         _dst: impl AsRef<Path>, | ||||
|         _size: usize, | ||||
|         _indexing_options: &IndexerOpts, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         //let dir_name = src | ||||
|             //.as_ref() | ||||
|             //.file_name() | ||||
|             //.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; | ||||
|  | ||||
|         //let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); | ||||
|         //create_dir_all(&dst_dir_path)?; | ||||
|  | ||||
|         //let meta_path = src.as_ref().join(META_FILE_NAME); | ||||
|         //let mut meta_file = File::open(meta_path)?; | ||||
|  | ||||
|         //// We first deserialize the dump meta into a serde_json::Value and change | ||||
|         //// the custom ranking rules settings from the old format to the new format. | ||||
|         //let mut meta: Value = serde_json::from_reader(&mut meta_file)?; | ||||
|         //if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { | ||||
|             //convert_custom_ranking_rules(ranking_rules); | ||||
|         //} | ||||
|  | ||||
|         //// Then we serialize it back into a vec to deserialize it | ||||
|         //// into a `DumpMeta` struct with the newly patched `rankingRules` format. | ||||
|         //let patched_meta = serde_json::to_vec(&meta)?; | ||||
|  | ||||
|         //let DumpMeta { | ||||
|             //settings, | ||||
|             //primary_key, | ||||
|         //} = serde_json::from_slice(&patched_meta)?; | ||||
|         //let settings = settings.check(); | ||||
|         //let index = Self::open(&dst_dir_path, size)?; | ||||
|         //let mut txn = index.write_txn()?; | ||||
|  | ||||
|         //let handler = UpdateHandler::new(indexing_options)?; | ||||
|  | ||||
|         //index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?; | ||||
|  | ||||
|         //let document_file_path = src.as_ref().join(DATA_FILE_NAME); | ||||
|         //let reader = File::open(&document_file_path)?; | ||||
|         //let mut reader = BufReader::new(reader); | ||||
|         //reader.fill_buf()?; | ||||
|         // If the document file is empty, we don't perform the document addition, to prevent | ||||
|         // a primary key error to be thrown. | ||||
|  | ||||
|         todo!("fix obk document dumps") | ||||
|         //if !reader.buffer().is_empty() { | ||||
|             //index.update_documents_txn( | ||||
|                 //&mut txn, | ||||
|                 //IndexDocumentsMethod::UpdateDocuments, | ||||
|                 //Some(reader), | ||||
|                 //handler.update_builder(0), | ||||
|                 //primary_key.as_deref(), | ||||
|             //)?; | ||||
|         //} | ||||
|  | ||||
|         //txn.commit()?; | ||||
|  | ||||
|         //match Arc::try_unwrap(index.0) { | ||||
|             //Ok(inner) => inner.prepare_for_closing().wait(), | ||||
|             //Err(_) => bail!("Could not close index properly."), | ||||
|         //} | ||||
|  | ||||
|         //Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| // /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. | ||||
| // /// | ||||
| // /// This is done for compatibility reasons, and to avoid a new dump version, | ||||
| // /// since the new syntax was introduced soon after the new dump version. | ||||
| //fn convert_custom_ranking_rules(ranking_rules: &mut Value) { | ||||
|     //*ranking_rules = match ranking_rules.take() { | ||||
|         //Value::Array(values) => values | ||||
|             //.into_iter() | ||||
|             //.filter_map(|value| match value { | ||||
|                 //Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s) | ||||
|                     //.map(|f| format!("{}:asc", f)) | ||||
|                     //.map(Value::String), | ||||
|                 //Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s) | ||||
|                     //.map(|f| format!("{}:desc", f)) | ||||
|                     //.map(Value::String), | ||||
|                 //otherwise => Some(otherwise), | ||||
|             //}) | ||||
|             //.collect(), | ||||
|         //otherwise => otherwise, | ||||
|     //} | ||||
| //} | ||||
							
								
								
									
										52
									
								
								meilisearch-lib/src/index/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								meilisearch-lib/src/index/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| use std::error::Error; | ||||
|  | ||||
| use meilisearch_error::{Code, ErrorCode}; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use crate::error::MilliError; | ||||
|  | ||||
| pub type Result<T> = std::result::Result<T, IndexError>; | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| pub enum IndexError { | ||||
|     #[error("Internal error: {0}")] | ||||
|     Internal(Box<dyn Error + Send + Sync + 'static>), | ||||
|     #[error("Document with id {0} not found.")] | ||||
|     DocumentNotFound(String), | ||||
|     #[error("{0}")] | ||||
|     Facet(#[from] FacetError), | ||||
|     #[error("{0}")] | ||||
|     Milli(#[from] milli::Error), | ||||
| } | ||||
|  | ||||
| internal_error!( | ||||
|     IndexError: std::io::Error, | ||||
|     heed::Error, | ||||
|     fst::Error, | ||||
|     serde_json::Error | ||||
| ); | ||||
|  | ||||
| impl ErrorCode for IndexError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             IndexError::Internal(_) => Code::Internal, | ||||
|             IndexError::DocumentNotFound(_) => Code::DocumentNotFound, | ||||
|             IndexError::Facet(e) => e.error_code(), | ||||
|             IndexError::Milli(e) => MilliError(e).error_code(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| pub enum FacetError { | ||||
|     #[error("Invalid facet expression, expected {}, found: {1}", .0.join(", "))] | ||||
|     InvalidExpression(&'static [&'static str], Value), | ||||
| } | ||||
|  | ||||
| impl ErrorCode for FacetError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             FacetError::InvalidExpression(_, _) => Code::Facet, | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										202
									
								
								meilisearch-lib/src/index/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								meilisearch-lib/src/index/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,202 @@ | ||||
| use std::collections::{BTreeSet, HashSet}; | ||||
| use std::fs::create_dir_all; | ||||
| use std::marker::PhantomData; | ||||
| use std::ops::Deref; | ||||
| use std::path::Path; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use heed::{EnvOpenOptions, RoTxn}; | ||||
| use milli::update::Setting; | ||||
| use milli::{obkv_to_json, FieldId}; | ||||
| use serde_json::{Map, Value}; | ||||
|  | ||||
| use error::Result; | ||||
| pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; | ||||
| pub use updates::{Checked, Facets, Settings, Unchecked}; | ||||
|  | ||||
| use crate::EnvSizer; | ||||
| use crate::index_controller::update_file_store::UpdateFileStore; | ||||
|  | ||||
| use self::error::IndexError; | ||||
|  | ||||
| pub mod error; | ||||
| pub mod update_handler; | ||||
|  | ||||
| mod dump; | ||||
| mod search; | ||||
| mod updates; | ||||
|  | ||||
| pub type Document = Map<String, Value>; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct Index { | ||||
|     pub inner: Arc<milli::Index>, | ||||
|     update_file_store: Arc<UpdateFileStore>, | ||||
| } | ||||
|  | ||||
| impl Deref for Index { | ||||
|     type Target = milli::Index; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         self.inner.as_ref() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Index { | ||||
|     pub fn open(path: impl AsRef<Path>, size: usize, update_file_store: Arc<UpdateFileStore>) -> Result<Self> { | ||||
|         create_dir_all(&path)?; | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(size); | ||||
|         let inner = Arc::new(milli::Index::new(options, &path)?); | ||||
|         Ok(Index { inner, update_file_store }) | ||||
|     } | ||||
|  | ||||
|     pub fn settings(&self) -> Result<Settings<Checked>> { | ||||
|         let txn = self.read_txn()?; | ||||
|         self.settings_txn(&txn) | ||||
|     } | ||||
|  | ||||
|     pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> { | ||||
|         let displayed_attributes = self | ||||
|             .displayed_fields(txn)? | ||||
|             .map(|fields| fields.into_iter().map(String::from).collect()); | ||||
|  | ||||
|         let searchable_attributes = self | ||||
|             .searchable_fields(txn)? | ||||
|             .map(|fields| fields.into_iter().map(String::from).collect()); | ||||
|  | ||||
|         let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect(); | ||||
|  | ||||
|         let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect(); | ||||
|  | ||||
|         let criteria = self | ||||
|             .criteria(txn)? | ||||
|             .into_iter() | ||||
|             .map(|c| c.to_string()) | ||||
|             .collect(); | ||||
|  | ||||
|         let stop_words = self | ||||
|             .stop_words(txn)? | ||||
|             .map(|stop_words| -> Result<BTreeSet<_>> { | ||||
|                 Ok(stop_words.stream().into_strs()?.into_iter().collect()) | ||||
|             }) | ||||
|             .transpose()? | ||||
|             .unwrap_or_else(BTreeSet::new); | ||||
|         let distinct_field = self.distinct_field(txn)?.map(String::from); | ||||
|  | ||||
|         // in milli each word in the synonyms map were split on their separator. Since we lost | ||||
|         // this information we are going to put space between words. | ||||
|         let synonyms = self | ||||
|             .synonyms(txn)? | ||||
|             .iter() | ||||
|             .map(|(key, values)| { | ||||
|                 ( | ||||
|                     key.join(" "), | ||||
|                     values.iter().map(|value| value.join(" ")).collect(), | ||||
|                 ) | ||||
|             }) | ||||
|             .collect(); | ||||
|  | ||||
|         Ok(Settings { | ||||
|             displayed_attributes: match displayed_attributes { | ||||
|                 Some(attrs) => Setting::Set(attrs), | ||||
|                 None => Setting::Reset, | ||||
|             }, | ||||
|             searchable_attributes: match searchable_attributes { | ||||
|                 Some(attrs) => Setting::Set(attrs), | ||||
|                 None => Setting::Reset, | ||||
|             }, | ||||
|             filterable_attributes: Setting::Set(filterable_attributes), | ||||
|             sortable_attributes: Setting::Set(sortable_attributes), | ||||
|             ranking_rules: Setting::Set(criteria), | ||||
|             stop_words: Setting::Set(stop_words), | ||||
|             distinct_attribute: match distinct_field { | ||||
|                 Some(field) => Setting::Set(field), | ||||
|                 None => Setting::Reset, | ||||
|             }, | ||||
|             synonyms: Setting::Set(synonyms), | ||||
|             _kind: PhantomData, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn retrieve_documents<S: AsRef<str>>( | ||||
|         &self, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         attributes_to_retrieve: Option<Vec<S>>, | ||||
|     ) -> Result<Vec<Map<String, Value>>> { | ||||
|         let txn = self.read_txn()?; | ||||
|  | ||||
|         let fields_ids_map = self.fields_ids_map(&txn)?; | ||||
|         let fields_to_display = | ||||
|             self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?; | ||||
|  | ||||
|         let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit); | ||||
|  | ||||
|         let mut documents = Vec::new(); | ||||
|  | ||||
|         for entry in iter { | ||||
|             let (_id, obkv) = entry?; | ||||
|             let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?; | ||||
|             documents.push(object); | ||||
|         } | ||||
|  | ||||
|         Ok(documents) | ||||
|     } | ||||
|  | ||||
|     pub fn retrieve_document<S: AsRef<str>>( | ||||
|         &self, | ||||
|         doc_id: String, | ||||
|         attributes_to_retrieve: Option<Vec<S>>, | ||||
|     ) -> Result<Map<String, Value>> { | ||||
|         let txn = self.read_txn()?; | ||||
|  | ||||
|         let fields_ids_map = self.fields_ids_map(&txn)?; | ||||
|  | ||||
|         let fields_to_display = | ||||
|             self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?; | ||||
|  | ||||
|         let internal_id = self | ||||
|             .external_documents_ids(&txn)? | ||||
|             .get(doc_id.as_bytes()) | ||||
|             .ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?; | ||||
|  | ||||
|         let document = self | ||||
|             .documents(&txn, std::iter::once(internal_id))? | ||||
|             .into_iter() | ||||
|             .next() | ||||
|             .map(|(_, d)| d) | ||||
|             .ok_or(IndexError::DocumentNotFound(doc_id))?; | ||||
|  | ||||
|         let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)?; | ||||
|  | ||||
|         Ok(document) | ||||
|     } | ||||
|  | ||||
|     pub fn size(&self) -> u64 { | ||||
|         self.env.size() | ||||
|     } | ||||
|  | ||||
|     fn fields_to_display<S: AsRef<str>>( | ||||
|         &self, | ||||
|         txn: &heed::RoTxn, | ||||
|         attributes_to_retrieve: &Option<Vec<S>>, | ||||
|         fields_ids_map: &milli::FieldsIdsMap, | ||||
|     ) -> Result<Vec<FieldId>> { | ||||
|         let mut displayed_fields_ids = match self.displayed_fields_ids(txn)? { | ||||
|             Some(ids) => ids.into_iter().collect::<Vec<_>>(), | ||||
|             None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|         }; | ||||
|  | ||||
|         let attributes_to_retrieve_ids = match attributes_to_retrieve { | ||||
|             Some(attrs) => attrs | ||||
|                 .iter() | ||||
|                 .filter_map(|f| fields_ids_map.id(f.as_ref())) | ||||
|                 .collect::<HashSet<_>>(), | ||||
|             None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|         }; | ||||
|  | ||||
|         displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid)); | ||||
|         Ok(displayed_fields_ids) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										1335
									
								
								meilisearch-lib/src/index/search.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1335
									
								
								meilisearch-lib/src/index/search.rs
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										72
									
								
								meilisearch-lib/src/index/update_handler.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								meilisearch-lib/src/index/update_handler.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| use crate::index::Index; | ||||
| use milli::update::UpdateBuilder; | ||||
| use milli::CompressionType; | ||||
| use rayon::ThreadPool; | ||||
|  | ||||
| use crate::index_controller::update_actor::RegisterUpdate; | ||||
| use crate::index_controller::{Failed, Processed, Processing}; | ||||
| use crate::options::IndexerOpts; | ||||
|  | ||||
| pub struct UpdateHandler { | ||||
|     max_nb_chunks: Option<usize>, | ||||
|     chunk_compression_level: Option<u32>, | ||||
|     thread_pool: ThreadPool, | ||||
|     log_frequency: usize, | ||||
|     max_memory: Option<usize>, | ||||
|     chunk_compression_type: CompressionType, | ||||
| } | ||||
|  | ||||
| impl UpdateHandler { | ||||
|     pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> { | ||||
|         let thread_pool = rayon::ThreadPoolBuilder::new() | ||||
|             .num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2)) | ||||
|             .build()?; | ||||
|  | ||||
|         Ok(Self { | ||||
|             max_nb_chunks: opt.max_nb_chunks, | ||||
|             chunk_compression_level: opt.chunk_compression_level, | ||||
|             thread_pool, | ||||
|             log_frequency: opt.log_every_n, | ||||
|             max_memory: opt.max_memory.map(|m| m.get_bytes() as usize), | ||||
|             chunk_compression_type: opt.chunk_compression_type, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn update_builder(&self, update_id: u64) -> UpdateBuilder { | ||||
|         // We prepare the update by using the update builder. | ||||
|         let mut update_builder = UpdateBuilder::new(update_id); | ||||
|         if let Some(max_nb_chunks) = self.max_nb_chunks { | ||||
|             update_builder.max_nb_chunks(max_nb_chunks); | ||||
|         } | ||||
|         if let Some(chunk_compression_level) = self.chunk_compression_level { | ||||
|             update_builder.chunk_compression_level(chunk_compression_level); | ||||
|         } | ||||
|         update_builder.thread_pool(&self.thread_pool); | ||||
|         update_builder.log_every_n(self.log_frequency); | ||||
|         if let Some(max_memory) = self.max_memory { | ||||
|             update_builder.max_memory(max_memory); | ||||
|         } | ||||
|         update_builder.chunk_compression_type(self.chunk_compression_type); | ||||
|         update_builder | ||||
|     } | ||||
|  | ||||
|     pub fn handle_update( | ||||
|         &self, | ||||
|         index: Index, | ||||
|         meta: Processing, | ||||
|     ) -> Result<Processed, Failed> { | ||||
|         let update_id = meta.id(); | ||||
|         let update_builder = self.update_builder(update_id); | ||||
|  | ||||
|         let result = match meta.meta() { | ||||
|             RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => { | ||||
|                 index.update_documents(*method, *content_uuid, update_builder, primary_key.as_deref()) | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         match result { | ||||
|             Ok(result) => Ok(meta.process(result)), | ||||
|             Err(e) => Err(meta.fail(e)), | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										366
									
								
								meilisearch-lib/src/index/updates.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										366
									
								
								meilisearch-lib/src/index/updates.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,366 @@ | ||||
| use std::collections::{BTreeMap, BTreeSet}; | ||||
| use std::marker::PhantomData; | ||||
| use std::num::NonZeroUsize; | ||||
|  | ||||
| use log::{debug, info, trace}; | ||||
| use milli::documents::DocumentBatchReader; | ||||
| use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; | ||||
| use serde::{Deserialize, Serialize, Serializer}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index_controller::UpdateResult; | ||||
|  | ||||
| use super::Index; | ||||
| use super::error::Result; | ||||
|  | ||||
| fn serialize_with_wildcard<S>( | ||||
|     field: &Setting<Vec<String>>, | ||||
|     s: S, | ||||
| ) -> std::result::Result<S::Ok, S::Error> | ||||
| where | ||||
|     S: Serializer, | ||||
| { | ||||
|     let wildcard = vec!["*".to_string()]; | ||||
|     match field { | ||||
|         Setting::Set(value) => Some(value), | ||||
|         Setting::Reset => Some(&wildcard), | ||||
|         Setting::NotSet => None, | ||||
|     } | ||||
|     .serialize(s) | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Default, Debug, Serialize)] | ||||
| pub struct Checked; | ||||
|  | ||||
| #[derive(Clone, Default, Debug, Serialize, Deserialize)] | ||||
| pub struct Unchecked; | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Serialize, Deserialize)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] | ||||
| pub struct Settings<T> { | ||||
|     #[serde( | ||||
|         default, | ||||
|         serialize_with = "serialize_with_wildcard", | ||||
|         skip_serializing_if = "Setting::is_not_set" | ||||
|     )] | ||||
|     pub displayed_attributes: Setting<Vec<String>>, | ||||
|  | ||||
|     #[serde( | ||||
|         default, | ||||
|         serialize_with = "serialize_with_wildcard", | ||||
|         skip_serializing_if = "Setting::is_not_set" | ||||
|     )] | ||||
|     pub searchable_attributes: Setting<Vec<String>>, | ||||
|  | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     pub filterable_attributes: Setting<BTreeSet<String>>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     pub sortable_attributes: Setting<BTreeSet<String>>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     pub ranking_rules: Setting<Vec<String>>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     pub stop_words: Setting<BTreeSet<String>>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     pub synonyms: Setting<BTreeMap<String, Vec<String>>>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     pub distinct_attribute: Setting<String>, | ||||
|  | ||||
|     #[serde(skip)] | ||||
|     pub _kind: PhantomData<T>, | ||||
| } | ||||
|  | ||||
| impl Settings<Checked> { | ||||
|     pub fn cleared() -> Settings<Checked> { | ||||
|         Settings { | ||||
|             displayed_attributes: Setting::Reset, | ||||
|             searchable_attributes: Setting::Reset, | ||||
|             filterable_attributes: Setting::Reset, | ||||
|             sortable_attributes: Setting::Reset, | ||||
|             ranking_rules: Setting::Reset, | ||||
|             stop_words: Setting::Reset, | ||||
|             synonyms: Setting::Reset, | ||||
|             distinct_attribute: Setting::Reset, | ||||
|             _kind: PhantomData, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn into_unchecked(self) -> Settings<Unchecked> { | ||||
|         let Self { | ||||
|             displayed_attributes, | ||||
|             searchable_attributes, | ||||
|             filterable_attributes, | ||||
|             sortable_attributes, | ||||
|             ranking_rules, | ||||
|             stop_words, | ||||
|             synonyms, | ||||
|             distinct_attribute, | ||||
|             .. | ||||
|         } = self; | ||||
|  | ||||
|         Settings { | ||||
|             displayed_attributes, | ||||
|             searchable_attributes, | ||||
|             filterable_attributes, | ||||
|             sortable_attributes, | ||||
|             ranking_rules, | ||||
|             stop_words, | ||||
|             synonyms, | ||||
|             distinct_attribute, | ||||
|             _kind: PhantomData, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Settings<Unchecked> { | ||||
|     pub fn check(self) -> Settings<Checked> { | ||||
|         let displayed_attributes = match self.displayed_attributes { | ||||
|             Setting::Set(fields) => { | ||||
|                 if fields.iter().any(|f| f == "*") { | ||||
|                     Setting::Reset | ||||
|                 } else { | ||||
|                     Setting::Set(fields) | ||||
|                 } | ||||
|             } | ||||
|             otherwise => otherwise, | ||||
|         }; | ||||
|  | ||||
|         let searchable_attributes = match self.searchable_attributes { | ||||
|             Setting::Set(fields) => { | ||||
|                 if fields.iter().any(|f| f == "*") { | ||||
|                     Setting::Reset | ||||
|                 } else { | ||||
|                     Setting::Set(fields) | ||||
|                 } | ||||
|             } | ||||
|             otherwise => otherwise, | ||||
|         }; | ||||
|  | ||||
|         Settings { | ||||
|             displayed_attributes, | ||||
|             searchable_attributes, | ||||
|             filterable_attributes: self.filterable_attributes, | ||||
|             sortable_attributes: self.sortable_attributes, | ||||
|             ranking_rules: self.ranking_rules, | ||||
|             stop_words: self.stop_words, | ||||
|             synonyms: self.synonyms, | ||||
|             distinct_attribute: self.distinct_attribute, | ||||
|             _kind: PhantomData, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Facets { | ||||
|     pub level_group_size: Option<NonZeroUsize>, | ||||
|     pub min_level_size: Option<NonZeroUsize>, | ||||
| } | ||||
|  | ||||
| impl Index { | ||||
|     pub fn update_documents( | ||||
|         &self, | ||||
|         method: IndexDocumentsMethod, | ||||
|         content_uuid: Uuid, | ||||
|         update_builder: UpdateBuilder, | ||||
|         primary_key: Option<&str>, | ||||
|     ) -> Result<UpdateResult> { | ||||
|         let mut txn = self.write_txn()?; | ||||
|         let result = self.update_documents_txn(&mut txn, method, content_uuid, update_builder, primary_key)?; | ||||
|         txn.commit()?; | ||||
|  | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     pub fn update_documents_txn<'a, 'b>( | ||||
|         &'a self, | ||||
|         txn: &mut heed::RwTxn<'a, 'b>, | ||||
|         method: IndexDocumentsMethod, | ||||
|         content_uuid: Uuid, | ||||
|         update_builder: UpdateBuilder, | ||||
|         primary_key: Option<&str>, | ||||
|     ) -> Result<UpdateResult> { | ||||
|         trace!("performing document addition"); | ||||
|  | ||||
|         // Set the primary key if not set already, ignore if already set. | ||||
|         if let (None, Some(primary_key)) = (self.primary_key(txn)?, primary_key) { | ||||
|             let mut builder = UpdateBuilder::new(0).settings(txn, self); | ||||
|             builder.set_primary_key(primary_key.to_string()); | ||||
|             builder.execute(|_, _| ())?; | ||||
|         } | ||||
|  | ||||
|         let indexing_callback = | ||||
|             |indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step); | ||||
|  | ||||
|         let content_file = self.update_file_store.get_update(content_uuid).unwrap(); | ||||
|         let reader = DocumentBatchReader::from_reader(content_file).unwrap(); | ||||
|  | ||||
|         let mut builder = update_builder.index_documents(txn, self); | ||||
|         builder.index_documents_method(method); | ||||
|         let addition = builder.execute(reader, indexing_callback)?; | ||||
|  | ||||
|         info!("document addition done: {:?}", addition); | ||||
|  | ||||
|         Ok(UpdateResult::DocumentsAddition(addition)) | ||||
|     } | ||||
|  | ||||
|     //pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> { | ||||
|         //// We must use the write transaction of the update here. | ||||
|         //let mut wtxn = self.write_txn()?; | ||||
|         //let builder = update_builder.clear_documents(&mut wtxn, self); | ||||
|  | ||||
|         //let _count = builder.execute()?; | ||||
|  | ||||
|         //wtxn.commit() | ||||
|             //.and(Ok(UpdateResult::Other)) | ||||
|             //.map_err(Into::into) | ||||
|     //} | ||||
|  | ||||
|     //pub fn update_settings_txn<'a, 'b>( | ||||
|         //&'a self, | ||||
|         //txn: &mut heed::RwTxn<'a, 'b>, | ||||
|         //settings: &Settings<Checked>, | ||||
|         //update_builder: UpdateBuilder, | ||||
|     //) -> Result<UpdateResult> { | ||||
|         //// We must use the write transaction of the update here. | ||||
|         //let mut builder = update_builder.settings(txn, self); | ||||
|  | ||||
|         //match settings.searchable_attributes { | ||||
|             //Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), | ||||
|             //Setting::Reset => builder.reset_searchable_fields(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //match settings.displayed_attributes { | ||||
|             //Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), | ||||
|             //Setting::Reset => builder.reset_displayed_fields(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //match settings.filterable_attributes { | ||||
|             //Setting::Set(ref facets) => { | ||||
|                 //builder.set_filterable_fields(facets.clone().into_iter().collect()) | ||||
|             //} | ||||
|             //Setting::Reset => builder.reset_filterable_fields(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //match settings.sortable_attributes { | ||||
|             //Setting::Set(ref fields) => { | ||||
|                 //builder.set_sortable_fields(fields.iter().cloned().collect()) | ||||
|             //} | ||||
|             //Setting::Reset => builder.reset_sortable_fields(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //match settings.ranking_rules { | ||||
|             //Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), | ||||
|             //Setting::Reset => builder.reset_criteria(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //match settings.stop_words { | ||||
|             //Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), | ||||
|             //Setting::Reset => builder.reset_stop_words(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //match settings.synonyms { | ||||
|             //Setting::Set(ref synonyms) => { | ||||
|                 //builder.set_synonyms(synonyms.clone().into_iter().collect()) | ||||
|             //} | ||||
|             //Setting::Reset => builder.reset_synonyms(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //match settings.distinct_attribute { | ||||
|             //Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), | ||||
|             //Setting::Reset => builder.reset_distinct_field(), | ||||
|             //Setting::NotSet => (), | ||||
|         //} | ||||
|  | ||||
|         //builder.execute(|indexing_step, update_id| { | ||||
|             //debug!("update {}: {:?}", update_id, indexing_step) | ||||
|         //})?; | ||||
|  | ||||
|         //Ok(UpdateResult::Other) | ||||
|     //} | ||||
|  | ||||
|     //pub fn update_settings( | ||||
|         //&self, | ||||
|         //settings: &Settings<Checked>, | ||||
|         //update_builder: UpdateBuilder, | ||||
|     //) -> Result<UpdateResult> { | ||||
|         //let mut txn = self.write_txn()?; | ||||
|         //let result = self.update_settings_txn(&mut txn, settings, update_builder)?; | ||||
|         //txn.commit()?; | ||||
|         //Ok(result) | ||||
|     //} | ||||
|  | ||||
|     //pub fn delete_documents( | ||||
|         //&self, | ||||
|         //document_ids: &[String], | ||||
|         //update_builder: UpdateBuilder, | ||||
|     //) -> Result<UpdateResult> { | ||||
|         //let mut txn = self.write_txn()?; | ||||
|         //let mut builder = update_builder.delete_documents(&mut txn, self)?; | ||||
|  | ||||
|         //// We ignore unexisting document ids | ||||
|         //document_ids.iter().for_each(|id| { | ||||
|             //builder.delete_external_id(id); | ||||
|         //}); | ||||
|  | ||||
|         //let deleted = builder.execute()?; | ||||
|         //txn.commit() | ||||
|             //.and(Ok(UpdateResult::DocumentDeletion { deleted })) | ||||
|             //.map_err(Into::into) | ||||
|     //} | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_setting_check() { | ||||
|         // test no changes | ||||
|         let settings = Settings { | ||||
|             displayed_attributes: Setting::Set(vec![String::from("hello")]), | ||||
|             searchable_attributes: Setting::Set(vec![String::from("hello")]), | ||||
|             filterable_attributes: Setting::NotSet, | ||||
|             sortable_attributes: Setting::NotSet, | ||||
|             ranking_rules: Setting::NotSet, | ||||
|             stop_words: Setting::NotSet, | ||||
|             synonyms: Setting::NotSet, | ||||
|             distinct_attribute: Setting::NotSet, | ||||
|             _kind: PhantomData::<Unchecked>, | ||||
|         }; | ||||
|  | ||||
|         let checked = settings.clone().check(); | ||||
|         assert_eq!(settings.displayed_attributes, checked.displayed_attributes); | ||||
|         assert_eq!( | ||||
|             settings.searchable_attributes, | ||||
|             checked.searchable_attributes | ||||
|         ); | ||||
|  | ||||
|         // test wildcard | ||||
|         // test no changes | ||||
|         let settings = Settings { | ||||
|             displayed_attributes: Setting::Set(vec![String::from("*")]), | ||||
|             searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]), | ||||
|             filterable_attributes: Setting::NotSet, | ||||
|             sortable_attributes: Setting::NotSet, | ||||
|             ranking_rules: Setting::NotSet, | ||||
|             stop_words: Setting::NotSet, | ||||
|             synonyms: Setting::NotSet, | ||||
|             distinct_attribute: Setting::NotSet, | ||||
|             _kind: PhantomData::<Unchecked>, | ||||
|         }; | ||||
|  | ||||
|         let checked = settings.check(); | ||||
|         assert_eq!(checked.displayed_attributes, Setting::Reset); | ||||
|         assert_eq!(checked.searchable_attributes, Setting::Reset); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										157
									
								
								meilisearch-lib/src/index_controller/dump_actor/actor.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										157
									
								
								meilisearch-lib/src/index_controller/dump_actor/actor.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,157 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use async_stream::stream; | ||||
| use chrono::Utc; | ||||
| use futures::{lock::Mutex, stream::StreamExt}; | ||||
| use log::{error, trace}; | ||||
| use tokio::sync::{mpsc, oneshot, RwLock}; | ||||
| use update_actor::UpdateActorHandle; | ||||
| use uuid_resolver::UuidResolverHandle; | ||||
|  | ||||
| use super::error::{DumpActorError, Result}; | ||||
| use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask}; | ||||
| use crate::index_controller::{update_actor, uuid_resolver}; | ||||
|  | ||||
| pub const CONCURRENT_DUMP_MSG: usize = 10; | ||||
|  | ||||
| pub struct DumpActor<UuidResolver, Update> { | ||||
|     inbox: Option<mpsc::Receiver<DumpMsg>>, | ||||
|     uuid_resolver: UuidResolver, | ||||
|     update: Update, | ||||
|     dump_path: PathBuf, | ||||
|     lock: Arc<Mutex<()>>, | ||||
|     dump_infos: Arc<RwLock<HashMap<String, DumpInfo>>>, | ||||
|     update_db_size: usize, | ||||
|     index_db_size: usize, | ||||
| } | ||||
|  | ||||
| /// Generate uid from creation date | ||||
| fn generate_uid() -> String { | ||||
|     Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() | ||||
| } | ||||
|  | ||||
| impl<UuidResolver, Update> DumpActor<UuidResolver, Update> | ||||
| where | ||||
|     UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static, | ||||
|     Update: UpdateActorHandle + Send + Sync + Clone + 'static, | ||||
| { | ||||
|     pub fn new( | ||||
|         inbox: mpsc::Receiver<DumpMsg>, | ||||
|         uuid_resolver: UuidResolver, | ||||
|         update: Update, | ||||
|         dump_path: impl AsRef<Path>, | ||||
|         index_db_size: usize, | ||||
|         update_db_size: usize, | ||||
|     ) -> Self { | ||||
|         let dump_infos = Arc::new(RwLock::new(HashMap::new())); | ||||
|         let lock = Arc::new(Mutex::new(())); | ||||
|         Self { | ||||
|             inbox: Some(inbox), | ||||
|             uuid_resolver, | ||||
|             update, | ||||
|             dump_path: dump_path.as_ref().into(), | ||||
|             dump_infos, | ||||
|             lock, | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub async fn run(mut self) { | ||||
|         trace!("Started dump actor."); | ||||
|  | ||||
|         let mut inbox = self | ||||
|             .inbox | ||||
|             .take() | ||||
|             .expect("Dump Actor must have a inbox at this point."); | ||||
|  | ||||
|         let stream = stream! { | ||||
|             loop { | ||||
|                 match inbox.recv().await { | ||||
|                     Some(msg) => yield msg, | ||||
|                     None => break, | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         stream | ||||
|             .for_each_concurrent(Some(CONCURRENT_DUMP_MSG), |msg| self.handle_message(msg)) | ||||
|             .await; | ||||
|  | ||||
|         error!("Dump actor stopped."); | ||||
|     } | ||||
|  | ||||
|     async fn handle_message(&self, msg: DumpMsg) { | ||||
|         use DumpMsg::*; | ||||
|  | ||||
|         match msg { | ||||
|             CreateDump { ret } => { | ||||
|                 let _ = self.handle_create_dump(ret).await; | ||||
|             } | ||||
|             DumpInfo { ret, uid } => { | ||||
|                 let _ = ret.send(self.handle_dump_info(uid).await); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn handle_create_dump(&self, ret: oneshot::Sender<Result<DumpInfo>>) { | ||||
|         let uid = generate_uid(); | ||||
|         let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress); | ||||
|  | ||||
|         let _lock = match self.lock.try_lock() { | ||||
|             Some(lock) => lock, | ||||
|             None => { | ||||
|                 ret.send(Err(DumpActorError::DumpAlreadyRunning)) | ||||
|                     .expect("Dump actor is dead"); | ||||
|                 return; | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         self.dump_infos | ||||
|             .write() | ||||
|             .await | ||||
|             .insert(uid.clone(), info.clone()); | ||||
|  | ||||
|         ret.send(Ok(info)).expect("Dump actor is dead"); | ||||
|  | ||||
|         let task = DumpTask { | ||||
|             path: self.dump_path.clone(), | ||||
|             uuid_resolver: self.uuid_resolver.clone(), | ||||
|             update_handle: self.update.clone(), | ||||
|             uid: uid.clone(), | ||||
|             update_db_size: self.update_db_size, | ||||
|             index_db_size: self.index_db_size, | ||||
|         }; | ||||
|  | ||||
|         let task_result = tokio::task::spawn(task.run()).await; | ||||
|  | ||||
|         let mut dump_infos = self.dump_infos.write().await; | ||||
|         let dump_infos = dump_infos | ||||
|             .get_mut(&uid) | ||||
|             .expect("dump entry deleted while lock was acquired"); | ||||
|  | ||||
|         match task_result { | ||||
|             Ok(Ok(())) => { | ||||
|                 dump_infos.done(); | ||||
|                 trace!("Dump succeed"); | ||||
|             } | ||||
|             Ok(Err(e)) => { | ||||
|                 dump_infos.with_error(e.to_string()); | ||||
|                 error!("Dump failed: {}", e); | ||||
|             } | ||||
|             Err(_) => { | ||||
|                 dump_infos.with_error("Unexpected error while performing dump.".to_string()); | ||||
|                 error!("Dump panicked. Dump status set to failed"); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     async fn handle_dump_info(&self, uid: String) -> Result<DumpInfo> { | ||||
|         match self.dump_infos.read().await.get(&uid) { | ||||
|             Some(info) => Ok(info.clone()), | ||||
|             _ => Err(DumpActorError::DumpDoesNotExist(uid)), | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										52
									
								
								meilisearch-lib/src/index_controller/dump_actor/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								meilisearch-lib/src/index_controller/dump_actor/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| use meilisearch_error::{Code, ErrorCode}; | ||||
|  | ||||
| use crate::index_controller::update_actor::error::UpdateActorError; | ||||
| use crate::index_controller::uuid_resolver::error::UuidResolverError; | ||||
|  | ||||
| pub type Result<T> = std::result::Result<T, DumpActorError>; | ||||
|  | ||||
| #[derive(thiserror::Error, Debug)] | ||||
| pub enum DumpActorError { | ||||
|     #[error("Another dump is already in progress")] | ||||
|     DumpAlreadyRunning, | ||||
|     #[error("Dump `{0}` not found")] | ||||
|     DumpDoesNotExist(String), | ||||
|     #[error("Internal error: {0}")] | ||||
|     Internal(Box<dyn std::error::Error + Send + Sync + 'static>), | ||||
|     #[error("{0}")] | ||||
|     UuidResolver(#[from] UuidResolverError), | ||||
|     #[error("{0}")] | ||||
|     UpdateActor(#[from] UpdateActorError), | ||||
| } | ||||
|  | ||||
| macro_rules! internal_error { | ||||
|     ($($other:path), *) => { | ||||
|         $( | ||||
|             impl From<$other> for DumpActorError { | ||||
|                 fn from(other: $other) -> Self { | ||||
|                     Self::Internal(Box::new(other)) | ||||
|                 } | ||||
|             } | ||||
|         )* | ||||
|     } | ||||
| } | ||||
|  | ||||
| internal_error!( | ||||
|     heed::Error, | ||||
|     std::io::Error, | ||||
|     tokio::task::JoinError, | ||||
|     serde_json::error::Error, | ||||
|     tempfile::PersistError | ||||
| ); | ||||
|  | ||||
| impl ErrorCode for DumpActorError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress, | ||||
|             DumpActorError::DumpDoesNotExist(_) => Code::NotFound, | ||||
|             DumpActorError::Internal(_) => Code::Internal, | ||||
|             DumpActorError::UuidResolver(e) => e.error_code(), | ||||
|             DumpActorError::UpdateActor(e) => e.error_code(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,52 @@ | ||||
| use std::path::Path; | ||||
|  | ||||
| use tokio::sync::{mpsc, oneshot}; | ||||
|  | ||||
| use super::error::Result; | ||||
| use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg}; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct DumpActorHandleImpl { | ||||
|     sender: mpsc::Sender<DumpMsg>, | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| impl DumpActorHandle for DumpActorHandleImpl { | ||||
|     async fn create_dump(&self) -> Result<DumpInfo> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = DumpMsg::CreateDump { ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         receiver.await.expect("IndexActor has been killed") | ||||
|     } | ||||
|  | ||||
|     async fn dump_info(&self, uid: String) -> Result<DumpInfo> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = DumpMsg::DumpInfo { ret, uid }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         receiver.await.expect("IndexActor has been killed") | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DumpActorHandleImpl { | ||||
|     pub fn new( | ||||
|         path: impl AsRef<Path>, | ||||
|         uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl, | ||||
|         update: crate::index_controller::update_actor::UpdateActorHandleImpl, | ||||
|         index_db_size: usize, | ||||
|         update_db_size: usize, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let (sender, receiver) = mpsc::channel(10); | ||||
|         let actor = DumpActor::new( | ||||
|             receiver, | ||||
|             uuid_resolver, | ||||
|             update, | ||||
|             path, | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|         ); | ||||
|  | ||||
|         tokio::task::spawn(actor.run()); | ||||
|  | ||||
|         Ok(Self { sender }) | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,2 @@ | ||||
| pub mod v1; | ||||
| pub mod v2; | ||||
							
								
								
									
										224
									
								
								meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										224
									
								
								meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,224 @@ | ||||
| use std::collections::{BTreeMap, BTreeSet}; | ||||
| use std::marker::PhantomData; | ||||
| use std::path::Path; | ||||
|  | ||||
| use log::{error, info, warn}; | ||||
| use milli::update::Setting; | ||||
| use serde::{Deserialize, Deserializer, Serialize}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}; | ||||
| use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; | ||||
| use crate::{ | ||||
|     index::Unchecked, | ||||
|     options::IndexerOpts, | ||||
| }; | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct MetadataV1 { | ||||
|     db_version: String, | ||||
|     indexes: Vec<IndexMetadata>, | ||||
| } | ||||
|  | ||||
| impl MetadataV1 { | ||||
|     pub fn load_dump( | ||||
|         self, | ||||
|         src: impl AsRef<Path>, | ||||
|         dst: impl AsRef<Path>, | ||||
|         size: usize, | ||||
|         indexer_options: &IndexerOpts, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         info!( | ||||
|             "Loading dump, dump database version: {}, dump version: V1", | ||||
|             self.db_version | ||||
|         ); | ||||
|  | ||||
|         let uuid_store = HeedUuidStore::new(&dst)?; | ||||
|         for index in self.indexes { | ||||
|             let uuid = Uuid::new_v4(); | ||||
|             uuid_store.insert(index.uid.clone(), uuid)?; | ||||
|             let src = src.as_ref().join(index.uid); | ||||
|             load_index( | ||||
|                 &src, | ||||
|                 &dst, | ||||
|                 uuid, | ||||
|                 index.meta.primary_key.as_deref(), | ||||
|                 size, | ||||
|                 indexer_options, | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result<Option<T>, D::Error> | ||||
| where | ||||
|     T: Deserialize<'de>, | ||||
|     D: Deserializer<'de>, | ||||
| { | ||||
|     Deserialize::deserialize(deserializer).map(Some) | ||||
| } | ||||
|  | ||||
| // These are the settings used in legacy meilisearch (<v0.21.0). | ||||
| #[derive(Default, Clone, Serialize, Deserialize, Debug)] | ||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||
| struct Settings { | ||||
|     #[serde(default, deserialize_with = "deserialize_some")] | ||||
|     pub ranking_rules: Option<Option<Vec<String>>>, | ||||
|     #[serde(default, deserialize_with = "deserialize_some")] | ||||
|     pub distinct_attribute: Option<Option<String>>, | ||||
|     #[serde(default, deserialize_with = "deserialize_some")] | ||||
|     pub searchable_attributes: Option<Option<Vec<String>>>, | ||||
|     #[serde(default, deserialize_with = "deserialize_some")] | ||||
|     pub displayed_attributes: Option<Option<BTreeSet<String>>>, | ||||
|     #[serde(default, deserialize_with = "deserialize_some")] | ||||
|     pub stop_words: Option<Option<BTreeSet<String>>>, | ||||
|     #[serde(default, deserialize_with = "deserialize_some")] | ||||
|     pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>, | ||||
|     #[serde(default, deserialize_with = "deserialize_some")] | ||||
|     pub attributes_for_faceting: Option<Option<Vec<String>>>, | ||||
| } | ||||
|  | ||||
| fn load_index( | ||||
|     _src: impl AsRef<Path>, | ||||
|     _dst: impl AsRef<Path>, | ||||
|     _uuid: Uuid, | ||||
|     _primary_key: Option<&str>, | ||||
|     _size: usize, | ||||
|     _indexer_options: &IndexerOpts, | ||||
| ) -> anyhow::Result<()> { | ||||
|     todo!("fix dump obkv documents") | ||||
|     //let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); | ||||
|  | ||||
|     //create_dir_all(&index_path)?; | ||||
|     //let mut options = EnvOpenOptions::new(); | ||||
|     //options.map_size(size); | ||||
|     //let index = milli::Index::new(options, index_path)?; | ||||
|     //let index = Index(Arc::new(index)); | ||||
|  | ||||
|     //// extract `settings.json` file and import content | ||||
|     //let settings = import_settings(&src)?; | ||||
|     //let settings: index_controller::Settings<Unchecked> = settings.into(); | ||||
|  | ||||
|     //let mut txn = index.write_txn()?; | ||||
|  | ||||
|     //let handler = UpdateHandler::new(indexer_options)?; | ||||
|  | ||||
|     //index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; | ||||
|  | ||||
|     //let file = File::open(&src.as_ref().join("documents.jsonl"))?; | ||||
|     //let mut reader = std::io::BufReader::new(file); | ||||
|     //reader.fill_buf()?; | ||||
|     //if !reader.buffer().is_empty() { | ||||
|         //index.update_documents_txn( | ||||
|             //&mut txn, | ||||
|             //IndexDocumentsMethod::ReplaceDocuments, | ||||
|             //Some(reader), | ||||
|             //handler.update_builder(0), | ||||
|             //primary_key, | ||||
|         //)?; | ||||
|     //} | ||||
|  | ||||
|     //txn.commit()?; | ||||
|  | ||||
|     //// Finaly, we extract the original milli::Index and close it | ||||
|     //Arc::try_unwrap(index.0) | ||||
|         //.map_err(|_e| "Couldn't close the index properly") | ||||
|         //.unwrap() | ||||
|         //.prepare_for_closing() | ||||
|         //.wait(); | ||||
|  | ||||
|     //// Updates are ignored in dumps V1. | ||||
|  | ||||
|     //Ok(()) | ||||
| } | ||||
|  | ||||
| /// we need to **always** be able to convert the old settings to the settings currently being used | ||||
| impl From<Settings> for index_controller::Settings<Unchecked> { | ||||
|     fn from(settings: Settings) -> Self { | ||||
|         Self { | ||||
|             distinct_attribute: match settings.distinct_attribute { | ||||
|                 Some(Some(attr)) => Setting::Set(attr), | ||||
|                 Some(None) => Setting::Reset, | ||||
|                 None => Setting::NotSet | ||||
|             }, | ||||
|             // we need to convert the old `Vec<String>` into a `BTreeSet<String>` | ||||
|             displayed_attributes: match settings.displayed_attributes { | ||||
|                 Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()), | ||||
|                 Some(None) => Setting::Reset, | ||||
|                 None => Setting::NotSet | ||||
|             }, | ||||
|             searchable_attributes: match settings.searchable_attributes { | ||||
|                 Some(Some(attrs)) => Setting::Set(attrs), | ||||
|                 Some(None) => Setting::Reset, | ||||
|                 None => Setting::NotSet | ||||
|             }, | ||||
|             filterable_attributes: match settings.attributes_for_faceting { | ||||
|                 Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()), | ||||
|                 Some(None) => Setting::Reset, | ||||
|                 None => Setting::NotSet | ||||
|             }, | ||||
|             sortable_attributes: Setting::NotSet, | ||||
|             ranking_rules: match settings.ranking_rules { | ||||
|                 Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| { | ||||
|                     match criterion.as_str() { | ||||
|                         "words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion), | ||||
|                         s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)), | ||||
|                         s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)), | ||||
|                         "wordsPosition" => { | ||||
|                             warn!("The criteria `attribute` and `wordsPosition` have been merged \ | ||||
|                                 into a single criterion `attribute` so `wordsPositon` will be \ | ||||
|                                 ignored"); | ||||
|                             None | ||||
|                         } | ||||
|                         s => { | ||||
|                             error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); | ||||
|                             None | ||||
|                         } | ||||
|                     } | ||||
|                 }).collect()), | ||||
|                 Some(None) => Setting::Reset, | ||||
|                 None => Setting::NotSet | ||||
|             }, | ||||
|             // we need to convert the old `Vec<String>` into a `BTreeSet<String>` | ||||
|             stop_words: match settings.stop_words { | ||||
|                 Some(Some(stop_words)) => Setting::Set(stop_words.into_iter().collect()), | ||||
|                 Some(None) => Setting::Reset, | ||||
|                 None => Setting::NotSet | ||||
|             }, | ||||
|             // we need to convert the old `Vec<String>` into a `BTreeMap<String>` | ||||
|             synonyms: match settings.synonyms { | ||||
|                 Some(Some(synonyms)) => Setting::Set(synonyms.into_iter().collect()), | ||||
|                 Some(None) => Setting::Reset, | ||||
|                 None => Setting::NotSet | ||||
|             }, | ||||
|             _kind: PhantomData, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // /// Extract Settings from `settings.json` file present at provided `dir_path` | ||||
| //fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> { | ||||
|     //let path = dir_path.as_ref().join("settings.json"); | ||||
|     //let file = File::open(path)?; | ||||
|     //let reader = std::io::BufReader::new(file); | ||||
|     //let metadata = serde_json::from_reader(reader)?; | ||||
|  | ||||
|     //Ok(metadata) | ||||
| //} | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn settings_format_regression() { | ||||
|         let settings = Settings::default(); | ||||
|         assert_eq!( | ||||
|             r##"{"rankingRules":null,"distinctAttribute":null,"searchableAttributes":null,"displayedAttributes":null,"stopWords":null,"synonyms":null,"attributesForFaceting":null}"##, | ||||
|             serde_json::to_string(&settings).unwrap() | ||||
|         ); | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,59 @@ | ||||
| use std::path::Path; | ||||
|  | ||||
| use chrono::{DateTime, Utc}; | ||||
| use log::info; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::index::Index; | ||||
| use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}; | ||||
| use crate::options::IndexerOpts; | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct MetadataV2 { | ||||
|     db_version: String, | ||||
|     index_db_size: usize, | ||||
|     update_db_size: usize, | ||||
|     dump_date: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl MetadataV2 { | ||||
|     pub fn new(index_db_size: usize, update_db_size: usize) -> Self { | ||||
|         Self { | ||||
|             db_version: env!("CARGO_PKG_VERSION").to_string(), | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|             dump_date: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn load_dump( | ||||
|         self, | ||||
|         src: impl AsRef<Path>, | ||||
|         dst: impl AsRef<Path>, | ||||
|         index_db_size: usize, | ||||
|         update_db_size: usize, | ||||
|         indexing_options: &IndexerOpts, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         info!( | ||||
|             "Loading dump from {}, dump database version: {}, dump version: V2", | ||||
|             self.dump_date, self.db_version | ||||
|         ); | ||||
|  | ||||
|         info!("Loading index database."); | ||||
|         HeedUuidStore::load_dump(src.as_ref(), &dst)?; | ||||
|  | ||||
|         info!("Loading updates."); | ||||
|         UpdateStore::load_dump(&src, &dst, update_db_size)?; | ||||
|  | ||||
|         info!("Loading indexes."); | ||||
|         let indexes_path = src.as_ref().join("indexes"); | ||||
|         let indexes = indexes_path.read_dir()?; | ||||
|         for index in indexes { | ||||
|             let index = index?; | ||||
|             Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										14
									
								
								meilisearch-lib/src/index_controller/dump_actor/message.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								meilisearch-lib/src/index_controller/dump_actor/message.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| use tokio::sync::oneshot; | ||||
|  | ||||
| use super::error::Result; | ||||
| use super::DumpInfo; | ||||
|  | ||||
| pub enum DumpMsg { | ||||
|     CreateDump { | ||||
|         ret: oneshot::Sender<Result<DumpInfo>>, | ||||
|     }, | ||||
|     DumpInfo { | ||||
|         uid: String, | ||||
|         ret: oneshot::Sender<Result<DumpInfo>>, | ||||
|     }, | ||||
| } | ||||
							
								
								
									
										204
									
								
								meilisearch-lib/src/index_controller/dump_actor/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										204
									
								
								meilisearch-lib/src/index_controller/dump_actor/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,204 @@ | ||||
| use std::fs::File; | ||||
| use std::path::{Path, PathBuf}; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use log::{info, trace, warn}; | ||||
| #[cfg(test)] | ||||
| use mockall::automock; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use tokio::fs::create_dir_all; | ||||
|  | ||||
| use loaders::v1::MetadataV1; | ||||
| use loaders::v2::MetadataV2; | ||||
|  | ||||
| pub use actor::DumpActor; | ||||
| pub use handle_impl::*; | ||||
| pub use message::DumpMsg; | ||||
|  | ||||
| use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; | ||||
| use crate::index_controller::dump_actor::error::DumpActorError; | ||||
| use crate::options::IndexerOpts; | ||||
| use error::Result; | ||||
|  | ||||
| mod actor; | ||||
| pub mod error; | ||||
| mod handle_impl; | ||||
| mod loaders; | ||||
| mod message; | ||||
|  | ||||
| const META_FILE_NAME: &str = "metadata.json"; | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| #[cfg_attr(test, automock)] | ||||
| pub trait DumpActorHandle { | ||||
|     /// Start the creation of a dump | ||||
|     /// Implementation: [handle_impl::DumpActorHandleImpl::create_dump] | ||||
|     async fn create_dump(&self) -> Result<DumpInfo>; | ||||
|  | ||||
|     /// Return the status of an already created dump | ||||
|     /// Implementation: [handle_impl::DumpActorHandleImpl::dump_info] | ||||
|     async fn dump_info(&self, uid: String) -> Result<DumpInfo>; | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| #[serde(tag = "dumpVersion")] | ||||
| pub enum Metadata { | ||||
|     V1(MetadataV1), | ||||
|     V2(MetadataV2), | ||||
| } | ||||
|  | ||||
| impl Metadata { | ||||
|     pub fn new_v2(index_db_size: usize, update_db_size: usize) -> Self { | ||||
|         let meta = MetadataV2::new(index_db_size, update_db_size); | ||||
|         Self::V2(meta) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] | ||||
| #[serde(rename_all = "snake_case")] | ||||
| pub enum DumpStatus { | ||||
|     Done, | ||||
|     InProgress, | ||||
|     Failed, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct DumpInfo { | ||||
|     pub uid: String, | ||||
|     pub status: DumpStatus, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub error: Option<String>, | ||||
|     started_at: DateTime<Utc>, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     finished_at: Option<DateTime<Utc>>, | ||||
| } | ||||
|  | ||||
| impl DumpInfo { | ||||
|     pub fn new(uid: String, status: DumpStatus) -> Self { | ||||
|         Self { | ||||
|             uid, | ||||
|             status, | ||||
|             error: None, | ||||
|             started_at: Utc::now(), | ||||
|             finished_at: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn with_error(&mut self, error: String) { | ||||
|         self.status = DumpStatus::Failed; | ||||
|         self.finished_at = Some(Utc::now()); | ||||
|         self.error = Some(error); | ||||
|     } | ||||
|  | ||||
|     pub fn done(&mut self) { | ||||
|         self.finished_at = Some(Utc::now()); | ||||
|         self.status = DumpStatus::Done; | ||||
|     } | ||||
|  | ||||
|     pub fn dump_already_in_progress(&self) -> bool { | ||||
|         self.status == DumpStatus::InProgress | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn load_dump( | ||||
|     dst_path: impl AsRef<Path>, | ||||
|     src_path: impl AsRef<Path>, | ||||
|     index_db_size: usize, | ||||
|     update_db_size: usize, | ||||
|     indexer_opts: &IndexerOpts, | ||||
| ) -> anyhow::Result<()> { | ||||
|     let tmp_src = tempfile::tempdir_in(".")?; | ||||
|     let tmp_src_path = tmp_src.path(); | ||||
|  | ||||
|     crate::from_tar_gz(&src_path, tmp_src_path)?; | ||||
|  | ||||
|     let meta_path = tmp_src_path.join(META_FILE_NAME); | ||||
|     let mut meta_file = File::open(&meta_path)?; | ||||
|     let meta: Metadata = serde_json::from_reader(&mut meta_file)?; | ||||
|  | ||||
|     let dst_dir = dst_path | ||||
|         .as_ref() | ||||
|         .parent() | ||||
|         .with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?; | ||||
|  | ||||
|     let tmp_dst = tempfile::tempdir_in(dst_dir)?; | ||||
|  | ||||
|     match meta { | ||||
|         Metadata::V1(meta) => { | ||||
|             meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)? | ||||
|         } | ||||
|         Metadata::V2(meta) => meta.load_dump( | ||||
|             &tmp_src_path, | ||||
|             tmp_dst.path(), | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|             indexer_opts, | ||||
|         )?, | ||||
|     } | ||||
|     // Persist and atomically rename the db | ||||
|     let persisted_dump = tmp_dst.into_path(); | ||||
|     if dst_path.as_ref().exists() { | ||||
|         warn!("Overwriting database at {}", dst_path.as_ref().display()); | ||||
|         std::fs::remove_dir_all(&dst_path)?; | ||||
|     } | ||||
|  | ||||
|     std::fs::rename(&persisted_dump, &dst_path)?; | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| struct DumpTask<U, P> { | ||||
|     path: PathBuf, | ||||
|     uuid_resolver: U, | ||||
|     update_handle: P, | ||||
|     uid: String, | ||||
|     update_db_size: usize, | ||||
|     index_db_size: usize, | ||||
| } | ||||
|  | ||||
| impl<U, P> DumpTask<U, P> | ||||
| where | ||||
|     U: UuidResolverHandle + Send + Sync + Clone + 'static, | ||||
|     P: UpdateActorHandle + Send + Sync + Clone + 'static, | ||||
|  | ||||
| { | ||||
|     async fn run(self) -> Result<()> { | ||||
|         trace!("Performing dump."); | ||||
|  | ||||
|         create_dir_all(&self.path).await?; | ||||
|  | ||||
|         let path_clone = self.path.clone(); | ||||
|         let temp_dump_dir = | ||||
|             tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??; | ||||
|         let temp_dump_path = temp_dump_dir.path().to_owned(); | ||||
|  | ||||
|         let meta = Metadata::new_v2(self.index_db_size, self.update_db_size); | ||||
|         let meta_path = temp_dump_path.join(META_FILE_NAME); | ||||
|         let mut meta_file = File::create(&meta_path)?; | ||||
|         serde_json::to_writer(&mut meta_file, &meta)?; | ||||
|  | ||||
|         let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?; | ||||
|  | ||||
|         self.update_handle | ||||
|             .dump(uuids, temp_dump_path.clone()) | ||||
|             .await?; | ||||
|  | ||||
|         let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> { | ||||
|             let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; | ||||
|             crate::to_tar_gz(temp_dump_path, temp_dump_file.path()) | ||||
|                 .map_err(|e| DumpActorError::Internal(e.into()))?; | ||||
|  | ||||
|             let dump_path = self.path.join(self.uid).with_extension("dump"); | ||||
|             temp_dump_file.persist(&dump_path)?; | ||||
|  | ||||
|             Ok(dump_path) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         info!("Created dump in {:?}.", dump_path); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										40
									
								
								meilisearch-lib/src/index_controller/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								meilisearch-lib/src/index_controller/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| use meilisearch_error::Code; | ||||
| use meilisearch_error::ErrorCode; | ||||
|  | ||||
| use crate::index::error::IndexError; | ||||
|  | ||||
| use super::dump_actor::error::DumpActorError; | ||||
| use super::index_actor::error::IndexActorError; | ||||
| use super::update_actor::error::UpdateActorError; | ||||
| use super::uuid_resolver::error::UuidResolverError; | ||||
|  | ||||
| pub type Result<T> = std::result::Result<T, IndexControllerError>; | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| pub enum IndexControllerError { | ||||
|     #[error("Index creation must have an uid")] | ||||
|     MissingUid, | ||||
|     #[error("{0}")] | ||||
|     Uuid(#[from] UuidResolverError), | ||||
|     #[error("{0}")] | ||||
|     IndexActor(#[from] IndexActorError), | ||||
|     #[error("{0}")] | ||||
|     UpdateActor(#[from] UpdateActorError), | ||||
|     #[error("{0}")] | ||||
|     DumpActor(#[from] DumpActorError), | ||||
|     #[error("{0}")] | ||||
|     IndexError(#[from] IndexError), | ||||
| } | ||||
|  | ||||
| impl ErrorCode for IndexControllerError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             IndexControllerError::MissingUid => Code::BadRequest, | ||||
|             IndexControllerError::Uuid(e) => e.error_code(), | ||||
|             IndexControllerError::IndexActor(e) => e.error_code(), | ||||
|             IndexControllerError::UpdateActor(e) => e.error_code(), | ||||
|             IndexControllerError::DumpActor(e) => e.error_code(), | ||||
|             IndexControllerError::IndexError(e) => e.error_code(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										351
									
								
								meilisearch-lib/src/index_controller/index_actor/actor.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										351
									
								
								meilisearch-lib/src/index_controller/index_actor/actor.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,351 @@ | ||||
| use std::path::PathBuf; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use async_stream::stream; | ||||
| use futures::stream::StreamExt; | ||||
| use heed::CompactionOption; | ||||
| use log::debug; | ||||
| use milli::update::UpdateBuilder; | ||||
| use tokio::task::spawn_blocking; | ||||
| use tokio::{fs, sync::mpsc}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index::{ | ||||
|     update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings, | ||||
| }; | ||||
| use crate::index_controller::{ | ||||
|     get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing, | ||||
| }; | ||||
| use crate::options::IndexerOpts; | ||||
|  | ||||
| use super::error::{IndexActorError, Result}; | ||||
| use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore}; | ||||
|  | ||||
| pub const CONCURRENT_INDEX_MSG: usize = 10; | ||||
|  | ||||
| pub struct IndexActor<S> { | ||||
|     receiver: Option<mpsc::Receiver<IndexMsg>>, | ||||
|     update_handler: Arc<UpdateHandler>, | ||||
|     store: S, | ||||
| } | ||||
|  | ||||
| impl<S> IndexActor<S> | ||||
| where S: IndexStore + Sync + Send, | ||||
| { | ||||
|     pub fn new( | ||||
|         receiver: mpsc::Receiver<IndexMsg>, | ||||
|         store: S, | ||||
|         options: &IndexerOpts, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let update_handler = UpdateHandler::new(options)?; | ||||
|         let update_handler = Arc::new(update_handler); | ||||
|         let receiver = Some(receiver); | ||||
|  | ||||
|         Ok(Self { | ||||
|             receiver, | ||||
|             update_handler, | ||||
|             store, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// `run` poll the write_receiver and read_receiver concurrently, but while messages send | ||||
|     /// through the read channel are processed concurrently, the messages sent through the write | ||||
|     /// channel are processed one at a time. | ||||
|     pub async fn run(mut self) { | ||||
|         let mut receiver = self | ||||
|             .receiver | ||||
|             .take() | ||||
|             .expect("Index Actor must have a inbox at this point."); | ||||
|  | ||||
|         let stream = stream! { | ||||
|             loop { | ||||
|                 match receiver.recv().await { | ||||
|                     Some(msg) => yield msg, | ||||
|                     None => break, | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         stream | ||||
|             .for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg)) | ||||
|             .await; | ||||
|     } | ||||
|  | ||||
|     async fn handle_message(&self, msg: IndexMsg) { | ||||
|         use IndexMsg::*; | ||||
|         match msg { | ||||
|             CreateIndex { | ||||
|                 uuid, | ||||
|                 primary_key, | ||||
|                 ret, | ||||
|             } => { | ||||
|                 let _ = ret.send(self.handle_create_index(uuid, primary_key).await); | ||||
|             } | ||||
|             Update { | ||||
|                 ret, | ||||
|                 meta, | ||||
|                 uuid, | ||||
|             } => { | ||||
|                 let _ = ret.send(self.handle_update(uuid, meta).await); | ||||
|             } | ||||
|             Search { ret, query, uuid } => { | ||||
|                 let _ = ret.send(self.handle_search(uuid, query).await); | ||||
|             } | ||||
|             Settings { ret, uuid } => { | ||||
|                 let _ = ret.send(self.handle_settings(uuid).await); | ||||
|             } | ||||
|             Documents { | ||||
|                 ret, | ||||
|                 uuid, | ||||
|                 attributes_to_retrieve, | ||||
|                 offset, | ||||
|                 limit, | ||||
|             } => { | ||||
|                 let _ = ret.send( | ||||
|                     self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve) | ||||
|                         .await, | ||||
|                 ); | ||||
|             } | ||||
|             Document { | ||||
|                 uuid, | ||||
|                 attributes_to_retrieve, | ||||
|                 doc_id, | ||||
|                 ret, | ||||
|             } => { | ||||
|                 let _ = ret.send( | ||||
|                     self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve) | ||||
|                         .await, | ||||
|                 ); | ||||
|             } | ||||
|             Delete { uuid, ret } => { | ||||
|                 let _ = ret.send(self.handle_delete(uuid).await); | ||||
|             } | ||||
|             GetMeta { uuid, ret } => { | ||||
|                 let _ = ret.send(self.handle_get_meta(uuid).await); | ||||
|             } | ||||
|             UpdateIndex { | ||||
|                 uuid, | ||||
|                 index_settings, | ||||
|                 ret, | ||||
|             } => { | ||||
|                 let _ = ret.send(self.handle_update_index(uuid, index_settings).await); | ||||
|             } | ||||
|             Snapshot { uuid, path, ret } => { | ||||
|                 let _ = ret.send(self.handle_snapshot(uuid, path).await); | ||||
|             } | ||||
|             Dump { uuid, path, ret } => { | ||||
|                 let _ = ret.send(self.handle_dump(uuid, path).await); | ||||
|             } | ||||
|             GetStats { uuid, ret } => { | ||||
|                 let _ = ret.send(self.handle_get_stats(uuid).await); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> { | ||||
|         let index = self | ||||
|             .store | ||||
|             .get(uuid) | ||||
|             .await? | ||||
|             .ok_or(IndexActorError::UnexistingIndex)?; | ||||
|         let result = spawn_blocking(move || index.perform_search(query)).await??; | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     async fn handle_create_index( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         primary_key: Option<String>, | ||||
|     ) -> Result<IndexMeta> { | ||||
|         let index = self.store.create(uuid, primary_key).await?; | ||||
|         let meta = spawn_blocking(move || IndexMeta::new(&index)).await??; | ||||
|         Ok(meta) | ||||
|     } | ||||
|  | ||||
|     async fn handle_update( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         meta: Processing, | ||||
|     ) -> Result<std::result::Result<Processed, Failed>> { | ||||
|         debug!("Processing update {}", meta.id()); | ||||
|         let update_handler = self.update_handler.clone(); | ||||
|         let index = match self.store.get(uuid).await? { | ||||
|             Some(index) => index, | ||||
|             None => self.store.create(uuid, None).await?, | ||||
|         }; | ||||
|  | ||||
|         Ok(spawn_blocking(move || update_handler.handle_update(index, meta)).await?) | ||||
|     } | ||||
|  | ||||
|     async fn handle_settings(&self, uuid: Uuid) -> Result<Settings<Checked>> { | ||||
|         let index = self | ||||
|             .store | ||||
|             .get(uuid) | ||||
|             .await? | ||||
|             .ok_or(IndexActorError::UnexistingIndex)?; | ||||
|         let result = spawn_blocking(move || index.settings()).await??; | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     async fn handle_fetch_documents( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Vec<Document>> { | ||||
|         let index = self | ||||
|             .store | ||||
|             .get(uuid) | ||||
|             .await? | ||||
|             .ok_or(IndexActorError::UnexistingIndex)?; | ||||
|         let result = | ||||
|             spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)) | ||||
|                 .await??; | ||||
|  | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     async fn handle_fetch_document( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         doc_id: String, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Document> { | ||||
|         let index = self | ||||
|             .store | ||||
|             .get(uuid) | ||||
|             .await? | ||||
|             .ok_or(IndexActorError::UnexistingIndex)?; | ||||
|  | ||||
|         let result = | ||||
|             spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)) | ||||
|                 .await??; | ||||
|  | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     async fn handle_delete(&self, uuid: Uuid) -> Result<()> { | ||||
|         let index = self.store.delete(uuid).await?; | ||||
|  | ||||
|         if let Some(index) = index { | ||||
|             tokio::task::spawn(async move { | ||||
|                 let index = index.inner; | ||||
|                 let store = get_arc_ownership_blocking(index).await; | ||||
|                 spawn_blocking(move || { | ||||
|                     store.prepare_for_closing().wait(); | ||||
|                     debug!("Index closed"); | ||||
|                 }); | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> { | ||||
|         match self.store.get(uuid).await? { | ||||
|             Some(index) => { | ||||
|                 let meta = spawn_blocking(move || IndexMeta::new(&index)).await??; | ||||
|                 Ok(meta) | ||||
|             } | ||||
|             None => Err(IndexActorError::UnexistingIndex), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn handle_update_index( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         index_settings: IndexSettings, | ||||
|     ) -> Result<IndexMeta> { | ||||
|         let index = self | ||||
|             .store | ||||
|             .get(uuid) | ||||
|             .await? | ||||
|             .ok_or(IndexActorError::UnexistingIndex)?; | ||||
|  | ||||
|         let result = spawn_blocking(move || match index_settings.primary_key { | ||||
|             Some(primary_key) => { | ||||
|                 let mut txn = index.write_txn()?; | ||||
|                 if index.primary_key(&txn)?.is_some() { | ||||
|                     return Err(IndexActorError::ExistingPrimaryKey); | ||||
|                 } | ||||
|                 let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index); | ||||
|                 builder.set_primary_key(primary_key); | ||||
|                 builder.execute(|_, _| ())?; | ||||
|                 let meta = IndexMeta::new_txn(&index, &txn)?; | ||||
|                 txn.commit()?; | ||||
|                 Ok(meta) | ||||
|             } | ||||
|             None => { | ||||
|                 let meta = IndexMeta::new(&index)?; | ||||
|                 Ok(meta) | ||||
|             } | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> { | ||||
|         use tokio::fs::create_dir_all; | ||||
|  | ||||
|         path.push("indexes"); | ||||
|         create_dir_all(&path).await?; | ||||
|  | ||||
|         if let Some(index) = self.store.get(uuid).await? { | ||||
|             let mut index_path = path.join(format!("index-{}", uuid)); | ||||
|  | ||||
|             create_dir_all(&index_path).await?; | ||||
|  | ||||
|             index_path.push("data.mdb"); | ||||
|             spawn_blocking(move || -> Result<()> { | ||||
|                 // Get write txn to wait for ongoing write transaction before snapshot. | ||||
|                 let _txn = index.write_txn()?; | ||||
|                 index | ||||
|                     .env | ||||
|                     .copy_to_path(index_path, CompactionOption::Enabled)?; | ||||
|                 Ok(()) | ||||
|             }) | ||||
|             .await??; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the | ||||
|     /// documents and all the settings. | ||||
|     async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { | ||||
|         let index = self | ||||
|             .store | ||||
|             .get(uuid) | ||||
|             .await? | ||||
|             .ok_or(IndexActorError::UnexistingIndex)?; | ||||
|  | ||||
|         let path = path.join(format!("indexes/index-{}/", uuid)); | ||||
|         fs::create_dir_all(&path).await?; | ||||
|  | ||||
|         tokio::task::spawn_blocking(move || index.dump(path)).await??; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> { | ||||
|         let index = self | ||||
|             .store | ||||
|             .get(uuid) | ||||
|             .await? | ||||
|             .ok_or(IndexActorError::UnexistingIndex)?; | ||||
|  | ||||
|         spawn_blocking(move || { | ||||
|             let rtxn = index.read_txn()?; | ||||
|  | ||||
|             Ok(IndexStats { | ||||
|                 size: index.size(), | ||||
|                 number_of_documents: index.number_of_documents(&rtxn)?, | ||||
|                 is_indexing: None, | ||||
|                 field_distribution: index.field_distribution(&rtxn)?, | ||||
|             }) | ||||
|         }) | ||||
|         .await? | ||||
|     } | ||||
| } | ||||
							
								
								
									
										48
									
								
								meilisearch-lib/src/index_controller/index_actor/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								meilisearch-lib/src/index_controller/index_actor/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| use meilisearch_error::{Code, ErrorCode}; | ||||
|  | ||||
| use crate::{error::MilliError, index::error::IndexError}; | ||||
|  | ||||
| pub type Result<T> = std::result::Result<T, IndexActorError>; | ||||
|  | ||||
| #[derive(thiserror::Error, Debug)] | ||||
| pub enum IndexActorError { | ||||
|     #[error("{0}")] | ||||
|     IndexError(#[from] IndexError), | ||||
|     #[error("Index already exists")] | ||||
|     IndexAlreadyExists, | ||||
|     #[error("Index not found")] | ||||
|     UnexistingIndex, | ||||
|     #[error("A primary key is already present. It's impossible to update it")] | ||||
|     ExistingPrimaryKey, | ||||
|     #[error("Internal Error: {0}")] | ||||
|     Internal(Box<dyn std::error::Error + Send + Sync + 'static>), | ||||
|     #[error("{0}")] | ||||
|     Milli(#[from] milli::Error), | ||||
| } | ||||
|  | ||||
| macro_rules! internal_error { | ||||
|     ($($other:path), *) => { | ||||
|         $( | ||||
|             impl From<$other> for IndexActorError { | ||||
|                 fn from(other: $other) -> Self { | ||||
|                     Self::Internal(Box::new(other)) | ||||
|                 } | ||||
|             } | ||||
|         )* | ||||
|     } | ||||
| } | ||||
|  | ||||
| internal_error!(heed::Error, tokio::task::JoinError, std::io::Error); | ||||
|  | ||||
| impl ErrorCode for IndexActorError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             IndexActorError::IndexError(e) => e.error_code(), | ||||
|             IndexActorError::IndexAlreadyExists => Code::IndexAlreadyExists, | ||||
|             IndexActorError::UnexistingIndex => Code::IndexNotFound, | ||||
|             IndexActorError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent, | ||||
|             IndexActorError::Internal(_) => Code::Internal, | ||||
|             IndexActorError::Milli(e) => MilliError(e).error_code(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										162
									
								
								meilisearch-lib/src/index_controller/index_actor/handle_impl.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								meilisearch-lib/src/index_controller/index_actor/handle_impl.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,162 @@ | ||||
| use crate::options::IndexerOpts; | ||||
| use std::path::{Path, PathBuf}; | ||||
|  | ||||
| use tokio::sync::{mpsc, oneshot}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::{ | ||||
|     index::Checked, | ||||
|     index_controller::{IndexSettings, IndexStats, Processing}, | ||||
| }; | ||||
| use crate::{ | ||||
|     index::{Document, SearchQuery, SearchResult, Settings}, | ||||
|     index_controller::{Failed, Processed}, | ||||
| }; | ||||
|  | ||||
| use super::error::Result; | ||||
| use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore}; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct IndexActorHandleImpl { | ||||
|     sender: mpsc::Sender<IndexMsg>, | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| impl IndexActorHandle for IndexActorHandleImpl { | ||||
|     async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::CreateIndex { | ||||
|             ret, | ||||
|             uuid, | ||||
|             primary_key, | ||||
|         }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         receiver.await.expect("IndexActor has been killed") | ||||
|     } | ||||
|  | ||||
|     async fn update( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         meta: Processing, | ||||
|     ) -> Result<std::result::Result<Processed, Failed>> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Update { | ||||
|             ret, | ||||
|             meta, | ||||
|             uuid, | ||||
|         }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Search { uuid, query, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Settings { uuid, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn documents( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Vec<Document>> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Documents { | ||||
|             uuid, | ||||
|             ret, | ||||
|             offset, | ||||
|             attributes_to_retrieve, | ||||
|             limit, | ||||
|         }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn document( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         doc_id: String, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Document> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Document { | ||||
|             uuid, | ||||
|             ret, | ||||
|             doc_id, | ||||
|             attributes_to_retrieve, | ||||
|         }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<()> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Delete { uuid, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::GetMeta { uuid, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::UpdateIndex { | ||||
|             uuid, | ||||
|             index_settings, | ||||
|             ret, | ||||
|         }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Snapshot { uuid, path, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::Dump { uuid, path, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = IndexMsg::GetStats { uuid, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver.await.expect("IndexActor has been killed")?) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl IndexActorHandleImpl { | ||||
|     pub fn new( | ||||
|         path: impl AsRef<Path>, | ||||
|         index_size: usize, | ||||
|         options: &IndexerOpts, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let (sender, receiver) = mpsc::channel(100); | ||||
|  | ||||
|         let store = MapIndexStore::new(&path, index_size); | ||||
|         let actor = IndexActor::new(receiver, store, options)?; | ||||
|         tokio::task::spawn(actor.run()); | ||||
|         Ok(Self { sender }) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										73
									
								
								meilisearch-lib/src/index_controller/index_actor/message.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								meilisearch-lib/src/index_controller/index_actor/message.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| use std::path::PathBuf; | ||||
|  | ||||
| use tokio::sync::oneshot; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::Result as IndexResult; | ||||
| use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; | ||||
| use crate::index_controller::{Failed, IndexStats, Processed, Processing}; | ||||
|  | ||||
| use super::{IndexMeta, IndexSettings}; | ||||
|  | ||||
| #[allow(clippy::large_enum_variant)] | ||||
| pub enum IndexMsg { | ||||
|     CreateIndex { | ||||
|         uuid: Uuid, | ||||
|         primary_key: Option<String>, | ||||
|         ret: oneshot::Sender<IndexResult<IndexMeta>>, | ||||
|     }, | ||||
|     Update { | ||||
|         uuid: Uuid, | ||||
|         meta: Processing, | ||||
|         ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>, | ||||
|     }, | ||||
|     Search { | ||||
|         uuid: Uuid, | ||||
|         query: SearchQuery, | ||||
|         ret: oneshot::Sender<IndexResult<SearchResult>>, | ||||
|     }, | ||||
|     Settings { | ||||
|         uuid: Uuid, | ||||
|         ret: oneshot::Sender<IndexResult<Settings<Checked>>>, | ||||
|     }, | ||||
|     Documents { | ||||
|         uuid: Uuid, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         ret: oneshot::Sender<IndexResult<Vec<Document>>>, | ||||
|     }, | ||||
|     Document { | ||||
|         uuid: Uuid, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|         doc_id: String, | ||||
|         ret: oneshot::Sender<IndexResult<Document>>, | ||||
|     }, | ||||
|     Delete { | ||||
|         uuid: Uuid, | ||||
|         ret: oneshot::Sender<IndexResult<()>>, | ||||
|     }, | ||||
|     GetMeta { | ||||
|         uuid: Uuid, | ||||
|         ret: oneshot::Sender<IndexResult<IndexMeta>>, | ||||
|     }, | ||||
|     UpdateIndex { | ||||
|         uuid: Uuid, | ||||
|         index_settings: IndexSettings, | ||||
|         ret: oneshot::Sender<IndexResult<IndexMeta>>, | ||||
|     }, | ||||
|     Snapshot { | ||||
|         uuid: Uuid, | ||||
|         path: PathBuf, | ||||
|         ret: oneshot::Sender<IndexResult<()>>, | ||||
|     }, | ||||
|     Dump { | ||||
|         uuid: Uuid, | ||||
|         path: PathBuf, | ||||
|         ret: oneshot::Sender<IndexResult<()>>, | ||||
|     }, | ||||
|     GetStats { | ||||
|         uuid: Uuid, | ||||
|         ret: oneshot::Sender<IndexResult<IndexStats>>, | ||||
|     }, | ||||
| } | ||||
							
								
								
									
										167
									
								
								meilisearch-lib/src/index_controller/index_actor/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										167
									
								
								meilisearch-lib/src/index_controller/index_actor/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,167 @@ | ||||
| use std::path::PathBuf; | ||||
|  | ||||
| use chrono::{DateTime, Utc}; | ||||
| #[cfg(test)] | ||||
| use mockall::automock; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use actor::IndexActor; | ||||
| pub use actor::CONCURRENT_INDEX_MSG; | ||||
| pub use handle_impl::IndexActorHandleImpl; | ||||
| use message::IndexMsg; | ||||
| use store::{IndexStore, MapIndexStore}; | ||||
|  | ||||
| use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings}; | ||||
| use crate::index_controller::{Failed, IndexStats, Processed, Processing}; | ||||
| use error::Result; | ||||
|  | ||||
| use super::IndexSettings; | ||||
|  | ||||
| mod actor; | ||||
| pub mod error; | ||||
| mod handle_impl; | ||||
| mod message; | ||||
| mod store; | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexMeta { | ||||
|     created_at: DateTime<Utc>, | ||||
|     pub updated_at: DateTime<Utc>, | ||||
|     pub primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| impl IndexMeta { | ||||
|     fn new(index: &Index) -> Result<Self> { | ||||
|         let txn = index.read_txn()?; | ||||
|         Self::new_txn(index, &txn) | ||||
|     } | ||||
|  | ||||
|     fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> { | ||||
|         let created_at = index.created_at(txn)?; | ||||
|         let updated_at = index.updated_at(txn)?; | ||||
|         let primary_key = index.primary_key(txn)?.map(String::from); | ||||
|         Ok(Self { | ||||
|             created_at, | ||||
|             updated_at, | ||||
|             primary_key, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| #[cfg_attr(test, automock)] | ||||
| pub trait IndexActorHandle { | ||||
|     async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>; | ||||
|     async fn update( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         meta: Processing, | ||||
|     ) -> Result<std::result::Result<Processed, Failed>>; | ||||
|     async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>; | ||||
|     async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>>; | ||||
|  | ||||
|     async fn documents( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Vec<Document>>; | ||||
|     async fn document( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         doc_id: String, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Document>; | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<()>; | ||||
|     async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>; | ||||
|     async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>; | ||||
|     async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; | ||||
|     async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>; | ||||
|     async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>; | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::sync::Arc; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
|     #[async_trait::async_trait] | ||||
|     /// Useful for passing around an `Arc<MockIndexActorHandle>` in tests. | ||||
|     impl IndexActorHandle for Arc<MockIndexActorHandle> { | ||||
|         async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> { | ||||
|             self.as_ref().create_index(uuid, primary_key).await | ||||
|         } | ||||
|  | ||||
|         async fn update( | ||||
|             &self, | ||||
|             uuid: Uuid, | ||||
|             meta: Processing, | ||||
|             data: Option<std::fs::File>, | ||||
|         ) -> Result<std::result::Result<Processed, Failed>> { | ||||
|             self.as_ref().update(uuid, meta, data).await | ||||
|         } | ||||
|  | ||||
|         async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> { | ||||
|             self.as_ref().search(uuid, query).await | ||||
|         } | ||||
|  | ||||
|         async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> { | ||||
|             self.as_ref().settings(uuid).await | ||||
|         } | ||||
|  | ||||
|         async fn documents( | ||||
|             &self, | ||||
|             uuid: Uuid, | ||||
|             offset: usize, | ||||
|             limit: usize, | ||||
|             attributes_to_retrieve: Option<Vec<String>>, | ||||
|         ) -> Result<Vec<Document>> { | ||||
|             self.as_ref() | ||||
|                 .documents(uuid, offset, limit, attributes_to_retrieve) | ||||
|                 .await | ||||
|         } | ||||
|  | ||||
|         async fn document( | ||||
|             &self, | ||||
|             uuid: Uuid, | ||||
|             doc_id: String, | ||||
|             attributes_to_retrieve: Option<Vec<String>>, | ||||
|         ) -> Result<Document> { | ||||
|             self.as_ref() | ||||
|                 .document(uuid, doc_id, attributes_to_retrieve) | ||||
|                 .await | ||||
|         } | ||||
|  | ||||
|         async fn delete(&self, uuid: Uuid) -> Result<()> { | ||||
|             self.as_ref().delete(uuid).await | ||||
|         } | ||||
|  | ||||
|         async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> { | ||||
|             self.as_ref().get_index_meta(uuid).await | ||||
|         } | ||||
|  | ||||
|         async fn update_index( | ||||
|             &self, | ||||
|             uuid: Uuid, | ||||
|             index_settings: IndexSettings, | ||||
|         ) -> Result<IndexMeta> { | ||||
|             self.as_ref().update_index(uuid, index_settings).await | ||||
|         } | ||||
|  | ||||
|         async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { | ||||
|             self.as_ref().snapshot(uuid, path).await | ||||
|         } | ||||
|  | ||||
|         async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { | ||||
|             self.as_ref().dump(uuid, path).await | ||||
|         } | ||||
|  | ||||
|         async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> { | ||||
|             self.as_ref().get_index_stats(uuid).await | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										109
									
								
								meilisearch-lib/src/index_controller/index_actor/store.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								meilisearch-lib/src/index_controller/index_actor/store.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,109 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use milli::update::UpdateBuilder; | ||||
| use tokio::fs; | ||||
| use tokio::sync::RwLock; | ||||
| use tokio::task::spawn_blocking; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::{IndexActorError, Result}; | ||||
| use crate::index::Index; | ||||
| use crate::index_controller::update_file_store::UpdateFileStore; | ||||
|  | ||||
| type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>; | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| pub trait IndexStore { | ||||
|     async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>; | ||||
|     async fn get(&self, uuid: Uuid) -> Result<Option<Index>>; | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>; | ||||
| } | ||||
|  | ||||
| pub struct MapIndexStore { | ||||
|     index_store: AsyncMap<Uuid, Index>, | ||||
|     path: PathBuf, | ||||
|     index_size: usize, | ||||
|     update_file_store: Arc<UpdateFileStore>, | ||||
| } | ||||
|  | ||||
| impl MapIndexStore { | ||||
|     pub fn new(path: impl AsRef<Path>, index_size: usize) -> Self { | ||||
|         let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap()); | ||||
|         let path = path.as_ref().join("indexes/"); | ||||
|         let index_store = Arc::new(RwLock::new(HashMap::new())); | ||||
|         Self { | ||||
|             index_store, | ||||
|             path, | ||||
|             index_size, | ||||
|             update_file_store, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| impl IndexStore for MapIndexStore { | ||||
|     async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> { | ||||
|         // We need to keep the lock until we are sure the db file has been opened correclty, to | ||||
|         // ensure that another db is not created at the same time. | ||||
|         let mut lock = self.index_store.write().await; | ||||
|  | ||||
|         if let Some(index) = lock.get(&uuid) { | ||||
|             return Ok(index.clone()); | ||||
|         } | ||||
|         let path = self.path.join(format!("index-{}", uuid)); | ||||
|         if path.exists() { | ||||
|             return Err(IndexActorError::IndexAlreadyExists); | ||||
|         } | ||||
|  | ||||
|         let index_size = self.index_size; | ||||
|         let file_store = self.update_file_store.clone(); | ||||
|         let index = spawn_blocking(move || -> Result<Index> { | ||||
|             let index = Index::open(path, index_size, file_store)?; | ||||
|             if let Some(primary_key) = primary_key { | ||||
|                 let mut txn = index.write_txn()?; | ||||
|  | ||||
|                 let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index); | ||||
|                 builder.set_primary_key(primary_key); | ||||
|                 builder.execute(|_, _| ())?; | ||||
|  | ||||
|                 txn.commit()?; | ||||
|             } | ||||
|             Ok(index) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         lock.insert(uuid, index.clone()); | ||||
|  | ||||
|         Ok(index) | ||||
|     } | ||||
|  | ||||
|     async fn get(&self, uuid: Uuid) -> Result<Option<Index>> { | ||||
|         let guard = self.index_store.read().await; | ||||
|         match guard.get(&uuid) { | ||||
|             Some(index) => Ok(Some(index.clone())), | ||||
|             None => { | ||||
|                 // drop the guard here so we can perform the write after without deadlocking; | ||||
|                 drop(guard); | ||||
|                 let path = self.path.join(format!("index-{}", uuid)); | ||||
|                 if !path.exists() { | ||||
|                     return Ok(None); | ||||
|                 } | ||||
|  | ||||
|                 let index_size = self.index_size; | ||||
|                 let file_store = self.update_file_store.clone(); | ||||
|                 let index = spawn_blocking(move || Index::open(path, index_size, file_store)).await??; | ||||
|                 self.index_store.write().await.insert(uuid, index.clone()); | ||||
|                 Ok(Some(index)) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> { | ||||
|         let db_path = self.path.join(format!("index-{}", uuid)); | ||||
|         fs::remove_dir_all(db_path).await?; | ||||
|         let index = self.index_store.write().await.remove(&uuid); | ||||
|         Ok(index) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										557
									
								
								meilisearch-lib/src/index_controller/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										557
									
								
								meilisearch-lib/src/index_controller/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,557 @@ | ||||
| use std::collections::BTreeMap; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use actix_web::error::PayloadError; | ||||
| use bytes::Bytes; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use futures::Stream; | ||||
| use log::info; | ||||
| use milli::FieldDistribution; | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use tokio::time::sleep; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use dump_actor::DumpActorHandle; | ||||
| pub use dump_actor::{DumpInfo, DumpStatus}; | ||||
| use index_actor::IndexActorHandle; | ||||
| use snapshot::load_snapshot; | ||||
| use update_actor::UpdateActorHandle; | ||||
| pub use updates::*; | ||||
| use uuid_resolver::{error::UuidResolverError, UuidResolverHandle}; | ||||
|  | ||||
| use crate::options::IndexerOpts; | ||||
| use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; | ||||
| use error::Result; | ||||
|  | ||||
| use self::dump_actor::load_dump; | ||||
|  | ||||
| mod dump_actor; | ||||
| pub mod error; | ||||
| pub mod index_actor; | ||||
| mod snapshot; | ||||
| pub mod update_actor; | ||||
| mod updates; | ||||
| mod uuid_resolver; | ||||
| pub mod update_file_store; | ||||
|  | ||||
| pub type Payload = Box<dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin>; | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexMetadata { | ||||
|     #[serde(skip)] | ||||
|     pub uuid: Uuid, | ||||
|     pub uid: String, | ||||
|     name: String, | ||||
|     #[serde(flatten)] | ||||
|     pub meta: index_actor::IndexMeta, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug)] | ||||
| pub struct IndexSettings { | ||||
|     pub uid: Option<String>, | ||||
|     pub primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Debug)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexStats { | ||||
|     #[serde(skip)] | ||||
|     pub size: u64, | ||||
|     pub number_of_documents: u64, | ||||
|     /// Whether the current index is performing an update. It is initially `None` when the | ||||
|     /// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is | ||||
|     /// later set to either true or false, we we retrieve the information from the `UpdateStore` | ||||
|     pub is_indexing: Option<bool>, | ||||
|     pub field_distribution: FieldDistribution, | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct IndexController { | ||||
|     uuid_resolver: uuid_resolver::UuidResolverHandleImpl, | ||||
|     index_handle: index_actor::IndexActorHandleImpl, | ||||
|     update_handle: update_actor::UpdateActorHandleImpl, | ||||
|     dump_handle: dump_actor::DumpActorHandleImpl, | ||||
| } | ||||
|  | ||||
| pub enum DocumentAdditionFormat { | ||||
|     Json, | ||||
| } | ||||
|  | ||||
|  | ||||
| #[derive(Serialize, Debug)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Stats { | ||||
|     pub database_size: u64, | ||||
|     pub last_update: Option<DateTime<Utc>>, | ||||
|     pub indexes: BTreeMap<String, IndexStats>, | ||||
| } | ||||
|  | ||||
| pub enum Update { | ||||
|     DocumentAddition { | ||||
|         payload: Payload, | ||||
|         primary_key: Option<String>, | ||||
|         method: IndexDocumentsMethod, | ||||
|         format: DocumentAdditionFormat, | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Default, Debug)] | ||||
| pub struct IndexControllerBuilder { | ||||
|     max_index_size: Option<usize>, | ||||
|     max_update_store_size: Option<usize>, | ||||
|     snapshot_dir: Option<PathBuf>, | ||||
|     import_snapshot: Option<PathBuf>, | ||||
|     ignore_snapshot_if_db_exists: bool, | ||||
|     ignore_missing_snapshot: bool, | ||||
|     dump_src: Option<PathBuf>, | ||||
|     dump_dst: Option<PathBuf>, | ||||
| } | ||||
|  | ||||
| impl IndexControllerBuilder { | ||||
|     pub fn build(self, db_path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<IndexController> { | ||||
|         let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?; | ||||
|         let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; | ||||
|  | ||||
|         if let Some(ref path) = self.import_snapshot { | ||||
|             info!("Loading from snapshot {:?}", path); | ||||
|             load_snapshot( | ||||
|                 db_path.as_ref(), | ||||
|                 path, | ||||
|                 self.ignore_snapshot_if_db_exists, | ||||
|                 self.ignore_missing_snapshot, | ||||
|             )?; | ||||
|         } else if let Some(ref src_path) = self.dump_src { | ||||
|             load_dump( | ||||
|                 db_path.as_ref(), | ||||
|                 src_path, | ||||
|                 index_size, | ||||
|                 update_store_size, | ||||
|                 &indexer_options, | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         std::fs::create_dir_all(db_path.as_ref())?; | ||||
|  | ||||
|         let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?; | ||||
|         let index_handle = | ||||
|             index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?; | ||||
|         let update_handle = update_actor::UpdateActorHandleImpl::new( | ||||
|             index_handle.clone(), | ||||
|             &db_path, | ||||
|             update_store_size, | ||||
|         )?; | ||||
|  | ||||
|         let dump_handle = dump_actor::DumpActorHandleImpl::new( | ||||
|             &self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?, | ||||
|             uuid_resolver.clone(), | ||||
|             update_handle.clone(), | ||||
|             index_size, | ||||
|             update_store_size, | ||||
|         )?; | ||||
|  | ||||
|         //if options.schedule_snapshot { | ||||
|             //let snapshot_service = SnapshotService::new( | ||||
|                 //uuid_resolver.clone(), | ||||
|                 //update_handle.clone(), | ||||
|                 //Duration::from_secs(options.snapshot_interval_sec), | ||||
|                 //options.snapshot_dir.clone(), | ||||
|                 //options | ||||
|                     //.db_path | ||||
|                     //.file_name() | ||||
|                     //.map(|n| n.to_owned().into_string().expect("invalid path")) | ||||
|                     //.unwrap_or_else(|| String::from("data.ms")), | ||||
|             //); | ||||
|  | ||||
|             //tokio::task::spawn(snapshot_service.run()); | ||||
|         //} | ||||
|  | ||||
|         Ok(IndexController { | ||||
|             uuid_resolver, | ||||
|             index_handle, | ||||
|             update_handle, | ||||
|             dump_handle, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's max update store size. | ||||
|     pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self { | ||||
|         self.max_update_store_size.replace(max_update_store_size); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn set_max_index_size(&mut self, size: usize) -> &mut Self { | ||||
|         self.max_index_size.replace(size); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's snapshot path. | ||||
|     pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self { | ||||
|         self.snapshot_dir.replace(snapshot_dir); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's ignore snapshot if db exists. | ||||
|     pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self { | ||||
|         self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's ignore missing snapshot. | ||||
|     pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self { | ||||
|         self.ignore_missing_snapshot = ignore_missing_snapshot; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's dump src. | ||||
|     pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { | ||||
|         self.dump_src.replace(dump_src); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's dump dst. | ||||
|     pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { | ||||
|         self.dump_dst.replace(dump_dst); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's import snapshot. | ||||
|     pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { | ||||
|         self.import_snapshot.replace(import_snapshot); | ||||
|         self | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl IndexController { | ||||
|     pub fn builder() -> IndexControllerBuilder { | ||||
|         IndexControllerBuilder::default() | ||||
|     } | ||||
|  | ||||
|     pub async fn register_update(&self, uid: &str, update: Update) -> Result<UpdateStatus> { | ||||
|         match self.uuid_resolver.get(uid.to_string()).await { | ||||
|             Ok(uuid) => { | ||||
|                 let update_result = self.update_handle.update(uuid, update).await?; | ||||
|                 Ok(update_result) | ||||
|             }, | ||||
|             Err(UuidResolverError::UnexistingIndex(name)) => { | ||||
|                 let uuid = Uuid::new_v4(); | ||||
|                 let update_result = self.update_handle.update(uuid, update).await?; | ||||
|                 // ignore if index creation fails now, since it may already have been created | ||||
|                 let _ = self.index_handle.create_index(uuid, None).await; | ||||
|                 self.uuid_resolver.insert(name, uuid).await?; | ||||
|                 Ok(update_result) | ||||
|             } | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     //pub async fn add_documents( | ||||
|         //&self, | ||||
|         //uid: String, | ||||
|         //method: milli::update::IndexDocumentsMethod, | ||||
|         //payload: Payload, | ||||
|         //primary_key: Option<String>, | ||||
|     //) -> Result<UpdateStatus> { | ||||
|         //let perform_update = |uuid| async move { | ||||
|             //let meta = UpdateMeta::DocumentsAddition { | ||||
|                 //method, | ||||
|                 //primary_key, | ||||
|             //}; | ||||
|             //let (sender, receiver) = mpsc::channel(10); | ||||
|  | ||||
|             //// It is necessary to spawn a local task to send the payload to the update handle to | ||||
|             //// prevent dead_locking between the update_handle::update that waits for the update to be | ||||
|             //// registered and the update_actor that waits for the the payload to be sent to it. | ||||
|             //tokio::task::spawn_local(async move { | ||||
|                 //payload | ||||
|                     //.for_each(|r| async { | ||||
|                         //let _ = sender.send(r).await; | ||||
|                     //}) | ||||
|                     //.await | ||||
|             //}); | ||||
|  | ||||
|             //// This must be done *AFTER* spawning the task. | ||||
|             //self.update_handle.update(meta, receiver, uuid).await | ||||
|         //}; | ||||
|  | ||||
|         //match self.uuid_resolver.get(uid).await { | ||||
|             //Ok(uuid) => Ok(perform_update(uuid).await?), | ||||
|             //Err(UuidResolverError::UnexistingIndex(name)) => { | ||||
|                 //let uuid = Uuid::new_v4(); | ||||
|                 //let status = perform_update(uuid).await?; | ||||
|                 //// ignore if index creation fails now, since it may already have been created | ||||
|                 //let _ = self.index_handle.create_index(uuid, None).await; | ||||
|                 //self.uuid_resolver.insert(name, uuid).await?; | ||||
|                 //Ok(status) | ||||
|             //} | ||||
|             //Err(e) => Err(e.into()), | ||||
|         //} | ||||
|     //} | ||||
|  | ||||
|     //pub async fn clear_documents(&self, uid: String) -> Result<UpdateStatus> { | ||||
|         //let uuid = self.uuid_resolver.get(uid).await?; | ||||
|         //let meta = UpdateMeta::ClearDocuments; | ||||
|         //let (_, receiver) = mpsc::channel(1); | ||||
|         //let status = self.update_handle.update(meta, receiver, uuid).await?; | ||||
|         //Ok(status) | ||||
|     //} | ||||
|  | ||||
|     //pub async fn delete_documents( | ||||
|         //&self, | ||||
|         //uid: String, | ||||
|         //documents: Vec<String>, | ||||
|     //) -> Result<UpdateStatus> { | ||||
|         //let uuid = self.uuid_resolver.get(uid).await?; | ||||
|         //let meta = UpdateMeta::DeleteDocuments { ids: documents }; | ||||
|         //let (_, receiver) = mpsc::channel(1); | ||||
|         //let status = self.update_handle.update(meta, receiver, uuid).await?; | ||||
|         //Ok(status) | ||||
|     //} | ||||
|  | ||||
|     //pub async fn update_settings( | ||||
|         //&self, | ||||
|         //uid: String, | ||||
|         //settings: Settings<Checked>, | ||||
|         //create: bool, | ||||
|     //) -> Result<UpdateStatus> { | ||||
|         //let perform_udpate = |uuid| async move { | ||||
|             //let meta = UpdateMeta::Settings(settings.into_unchecked()); | ||||
|             //// Nothing so send, drop the sender right away, as not to block the update actor. | ||||
|             //let (_, receiver) = mpsc::channel(1); | ||||
|             //self.update_handle.update(meta, receiver, uuid).await | ||||
|         //}; | ||||
|  | ||||
|         //match self.uuid_resolver.get(uid).await { | ||||
|             //Ok(uuid) => Ok(perform_udpate(uuid).await?), | ||||
|             //Err(UuidResolverError::UnexistingIndex(name)) if create => { | ||||
|                 //let uuid = Uuid::new_v4(); | ||||
|                 //let status = perform_udpate(uuid).await?; | ||||
|                 //// ignore if index creation fails now, since it may already have been created | ||||
|                 //let _ = self.index_handle.create_index(uuid, None).await; | ||||
|                 //self.uuid_resolver.insert(name, uuid).await?; | ||||
|                 //Ok(status) | ||||
|             //} | ||||
|             //Err(e) => Err(e.into()), | ||||
|         //} | ||||
|     //} | ||||
|  | ||||
|     //pub async fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata> { | ||||
|         //let IndexSettings { uid, primary_key } = index_settings; | ||||
|         //let uid = uid.ok_or(IndexControllerError::MissingUid)?; | ||||
|         //let uuid = Uuid::new_v4(); | ||||
|         //let meta = self.index_handle.create_index(uuid, primary_key).await?; | ||||
|         //self.uuid_resolver.insert(uid.clone(), uuid).await?; | ||||
|         //let meta = IndexMetadata { | ||||
|             //uuid, | ||||
|             //name: uid.clone(), | ||||
|             //uid, | ||||
|             //meta, | ||||
|         //}; | ||||
|  | ||||
|         //Ok(meta) | ||||
|     //} | ||||
|  | ||||
|     //pub async fn delete_index(&self, uid: String) -> Result<()> { | ||||
|         //let uuid = self.uuid_resolver.delete(uid).await?; | ||||
|  | ||||
|         //// We remove the index from the resolver synchronously, and effectively perform the index | ||||
|         //// deletion as a background task. | ||||
|         //let update_handle = self.update_handle.clone(); | ||||
|         //let index_handle = self.index_handle.clone(); | ||||
|         //tokio::spawn(async move { | ||||
|             //if let Err(e) = update_handle.delete(uuid).await { | ||||
|                 //error!("Error while deleting index: {}", e); | ||||
|             //} | ||||
|             //if let Err(e) = index_handle.delete(uuid).await { | ||||
|                 //error!("Error while deleting index: {}", e); | ||||
|             //} | ||||
|         //}); | ||||
|  | ||||
|         //Ok(()) | ||||
|     //} | ||||
|  | ||||
|     pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> { | ||||
|         let uuid = self.uuid_resolver.get(uid).await?; | ||||
|         let result = self.update_handle.update_status(uuid, id).await?; | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> { | ||||
|         let uuid = self.uuid_resolver.get(uid).await?; | ||||
|         let result = self.update_handle.get_all_updates_status(uuid).await?; | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> { | ||||
|         let uuids = self.uuid_resolver.list().await?; | ||||
|  | ||||
|         let mut ret = Vec::new(); | ||||
|  | ||||
|         for (uid, uuid) in uuids { | ||||
|             let meta = self.index_handle.get_index_meta(uuid).await?; | ||||
|             let meta = IndexMetadata { | ||||
|                 uuid, | ||||
|                 name: uid.clone(), | ||||
|                 uid, | ||||
|                 meta, | ||||
|             }; | ||||
|             ret.push(meta); | ||||
|         } | ||||
|  | ||||
|         Ok(ret) | ||||
|     } | ||||
|  | ||||
|     pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> { | ||||
|         let uuid = self.uuid_resolver.get(uid.clone()).await?; | ||||
|         let settings = self.index_handle.settings(uuid).await?; | ||||
|         Ok(settings) | ||||
|     } | ||||
|  | ||||
|     pub async fn documents( | ||||
|         &self, | ||||
|         uid: String, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Vec<Document>> { | ||||
|         let uuid = self.uuid_resolver.get(uid.clone()).await?; | ||||
|         let documents = self | ||||
|             .index_handle | ||||
|             .documents(uuid, offset, limit, attributes_to_retrieve) | ||||
|             .await?; | ||||
|         Ok(documents) | ||||
|     } | ||||
|  | ||||
|     pub async fn document( | ||||
|         &self, | ||||
|         uid: String, | ||||
|         doc_id: String, | ||||
|         attributes_to_retrieve: Option<Vec<String>>, | ||||
|     ) -> Result<Document> { | ||||
|         let uuid = self.uuid_resolver.get(uid.clone()).await?; | ||||
|         let document = self | ||||
|             .index_handle | ||||
|             .document(uuid, doc_id, attributes_to_retrieve) | ||||
|             .await?; | ||||
|         Ok(document) | ||||
|     } | ||||
|  | ||||
|     pub async fn update_index( | ||||
|         &self, | ||||
|         uid: String, | ||||
|         mut index_settings: IndexSettings, | ||||
|     ) -> Result<IndexMetadata> { | ||||
|         if index_settings.uid.is_some() { | ||||
|             index_settings.uid.take(); | ||||
|         } | ||||
|  | ||||
|         let uuid = self.uuid_resolver.get(uid.clone()).await?; | ||||
|         let meta = self.index_handle.update_index(uuid, index_settings).await?; | ||||
|         let meta = IndexMetadata { | ||||
|             uuid, | ||||
|             name: uid.clone(), | ||||
|             uid, | ||||
|             meta, | ||||
|         }; | ||||
|         Ok(meta) | ||||
|     } | ||||
|  | ||||
|     pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> { | ||||
|         let uuid = self.uuid_resolver.get(uid).await?; | ||||
|         let result = self.index_handle.search(uuid, query).await?; | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> { | ||||
|         let uuid = self.uuid_resolver.get(uid.clone()).await?; | ||||
|         let meta = self.index_handle.get_index_meta(uuid).await?; | ||||
|         let meta = IndexMetadata { | ||||
|             uuid, | ||||
|             name: uid.clone(), | ||||
|             uid, | ||||
|             meta, | ||||
|         }; | ||||
|         Ok(meta) | ||||
|     } | ||||
|  | ||||
|     pub async fn get_uuids_size(&self) -> Result<u64> { | ||||
|         Ok(self.uuid_resolver.get_size().await?) | ||||
|     } | ||||
|  | ||||
|     pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> { | ||||
|         let uuid = self.uuid_resolver.get(uid).await?; | ||||
|         let update_infos = self.update_handle.get_info().await?; | ||||
|         let mut stats = self.index_handle.get_index_stats(uuid).await?; | ||||
|         // Check if the currently indexing update is from out index. | ||||
|         stats.is_indexing = Some(Some(uuid) == update_infos.processing); | ||||
|         Ok(stats) | ||||
|     } | ||||
|  | ||||
|     pub async fn get_all_stats(&self) -> Result<Stats> { | ||||
|         let update_infos = self.update_handle.get_info().await?; | ||||
|         let mut database_size = self.get_uuids_size().await? + update_infos.size; | ||||
|         let mut last_update: Option<DateTime<_>> = None; | ||||
|         let mut indexes = BTreeMap::new(); | ||||
|  | ||||
|         for index in self.list_indexes().await? { | ||||
|             let mut index_stats = self.index_handle.get_index_stats(index.uuid).await?; | ||||
|             database_size += index_stats.size; | ||||
|  | ||||
|             last_update = last_update.map_or(Some(index.meta.updated_at), |last| { | ||||
|                 Some(last.max(index.meta.updated_at)) | ||||
|             }); | ||||
|  | ||||
|             index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing); | ||||
|  | ||||
|             indexes.insert(index.uid, index_stats); | ||||
|         } | ||||
|  | ||||
|         Ok(Stats { | ||||
|             database_size, | ||||
|             last_update, | ||||
|             indexes, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub async fn create_dump(&self) -> Result<DumpInfo> { | ||||
|         Ok(self.dump_handle.create_dump().await?) | ||||
|     } | ||||
|  | ||||
|     pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> { | ||||
|         Ok(self.dump_handle.dump_info(uid).await?) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T { | ||||
|     loop { | ||||
|         match Arc::try_unwrap(item) { | ||||
|             Ok(item) => return item, | ||||
|             Err(item_arc) => { | ||||
|                 item = item_arc; | ||||
|                 sleep(Duration::from_millis(100)).await; | ||||
|                 continue; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name. | ||||
| pub fn asc_ranking_rule(text: &str) -> Option<&str> { | ||||
|     text.split_once("asc(") | ||||
|         .and_then(|(_, tail)| tail.rsplit_once(")")) | ||||
|         .map(|(field, _)| field) | ||||
| } | ||||
|  | ||||
| /// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name. | ||||
| pub fn desc_ranking_rule(text: &str) -> Option<&str> { | ||||
|     text.split_once("desc(") | ||||
|         .and_then(|(_, tail)| tail.rsplit_once(")")) | ||||
|         .map(|(field, _)| field) | ||||
| } | ||||
|  | ||||
| fn update_files_path(path: impl AsRef<Path>) -> PathBuf { | ||||
|     path.as_ref().join("updates/updates_files") | ||||
| } | ||||
							
								
								
									
										259
									
								
								meilisearch-lib/src/index_controller/snapshot.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								meilisearch-lib/src/index_controller/snapshot.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,259 @@ | ||||
| use std::path::Path; | ||||
|  | ||||
| use anyhow::bail; | ||||
|  | ||||
| //pub struct SnapshotService<U, R> { | ||||
|     //uuid_resolver_handle: R, | ||||
|     //update_handle: U, | ||||
|     //snapshot_period: Duration, | ||||
|     //snapshot_path: PathBuf, | ||||
|     //db_name: String, | ||||
| //} | ||||
|  | ||||
| //impl<U, R> SnapshotService<U, R> | ||||
| //where | ||||
|     //U: UpdateActorHandle, | ||||
|     //R: UuidResolverHandle, | ||||
| //{ | ||||
|     //pub fn new( | ||||
|         //uuid_resolver_handle: R, | ||||
|         //update_handle: U, | ||||
|         //snapshot_period: Duration, | ||||
|         //snapshot_path: PathBuf, | ||||
|         //db_name: String, | ||||
|     //) -> Self { | ||||
|         //Self { | ||||
|             //uuid_resolver_handle, | ||||
|             //update_handle, | ||||
|             //snapshot_period, | ||||
|             //snapshot_path, | ||||
|             //db_name, | ||||
|         //} | ||||
|     //} | ||||
|  | ||||
|     //pub async fn run(self) { | ||||
|         //info!( | ||||
|             //"Snapshot scheduled every {}s.", | ||||
|             //self.snapshot_period.as_secs() | ||||
|         //); | ||||
|         //loop { | ||||
|             //if let Err(e) = self.perform_snapshot().await { | ||||
|                 //error!("Error while performing snapshot: {}", e); | ||||
|             //} | ||||
|             //sleep(self.snapshot_period).await; | ||||
|         //} | ||||
|     //} | ||||
|  | ||||
|     //async fn perform_snapshot(&self) -> anyhow::Result<()> { | ||||
|         //trace!("Performing snapshot."); | ||||
|  | ||||
|         //let snapshot_dir = self.snapshot_path.clone(); | ||||
|         //fs::create_dir_all(&snapshot_dir).await?; | ||||
|         //let temp_snapshot_dir = | ||||
|             //spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??; | ||||
|         //let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); | ||||
|  | ||||
|         //let uuids = self | ||||
|             //.uuid_resolver_handle | ||||
|             //.snapshot(temp_snapshot_path.clone()) | ||||
|             //.await?; | ||||
|  | ||||
|         //if uuids.is_empty() { | ||||
|             //return Ok(()); | ||||
|         //} | ||||
|  | ||||
|         //self.update_handle | ||||
|             //.snapshot(uuids, temp_snapshot_path.clone()) | ||||
|             //.await?; | ||||
|         //let snapshot_dir = self.snapshot_path.clone(); | ||||
|         //let snapshot_path = self | ||||
|             //.snapshot_path | ||||
|             //.join(format!("{}.snapshot", self.db_name)); | ||||
|         //let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> { | ||||
|             //let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?; | ||||
|             //let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); | ||||
|             //compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; | ||||
|             //temp_snapshot_file.persist(&snapshot_path)?; | ||||
|             //Ok(snapshot_path) | ||||
|         //}) | ||||
|         //.await??; | ||||
|  | ||||
|         //trace!("Created snapshot in {:?}.", snapshot_path); | ||||
|  | ||||
|         //Ok(()) | ||||
|     //} | ||||
| //} | ||||
|  | ||||
| pub fn load_snapshot( | ||||
|     db_path: impl AsRef<Path>, | ||||
|     snapshot_path: impl AsRef<Path>, | ||||
|     ignore_snapshot_if_db_exists: bool, | ||||
|     ignore_missing_snapshot: bool, | ||||
| ) -> anyhow::Result<()> { | ||||
|     if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { | ||||
|         match crate::from_tar_gz(snapshot_path, &db_path) { | ||||
|             Ok(()) => Ok(()), | ||||
|             Err(e) => { | ||||
|                 // clean created db folder | ||||
|                 std::fs::remove_dir_all(&db_path)?; | ||||
|                 Err(e) | ||||
|             } | ||||
|         } | ||||
|     } else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists { | ||||
|         bail!( | ||||
|             "database already exists at {:?}, try to delete it or rename it", | ||||
|             db_path | ||||
|                 .as_ref() | ||||
|                 .canonicalize() | ||||
|                 .unwrap_or_else(|_| db_path.as_ref().to_owned()) | ||||
|         ) | ||||
|     } else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot { | ||||
|         bail!( | ||||
|             "snapshot doesn't exist at {:?}", | ||||
|             snapshot_path | ||||
|                 .as_ref() | ||||
|                 .canonicalize() | ||||
|                 .unwrap_or_else(|_| snapshot_path.as_ref().to_owned()) | ||||
|         ) | ||||
|     } else { | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::iter::FromIterator; | ||||
|     use std::{collections::HashSet, sync::Arc}; | ||||
|  | ||||
|     use futures::future::{err, ok}; | ||||
|     use rand::Rng; | ||||
|     use tokio::time::timeout; | ||||
|     use uuid::Uuid; | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::index_controller::index_actor::MockIndexActorHandle; | ||||
|     use crate::index_controller::update_actor::{ | ||||
|         error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl, | ||||
|     }; | ||||
|     use crate::index_controller::uuid_resolver::{ | ||||
|         error::UuidResolverError, MockUuidResolverHandle, | ||||
|     }; | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     async fn test_normal() { | ||||
|         let mut rng = rand::thread_rng(); | ||||
|         let uuids_num: usize = rng.gen_range(5..10); | ||||
|         let uuids = (0..uuids_num) | ||||
|             .map(|_| Uuid::new_v4()) | ||||
|             .collect::<HashSet<_>>(); | ||||
|  | ||||
|         let mut uuid_resolver = MockUuidResolverHandle::new(); | ||||
|         let uuids_clone = uuids.clone(); | ||||
|         uuid_resolver | ||||
|             .expect_snapshot() | ||||
|             .times(1) | ||||
|             .returning(move |_| Box::pin(ok(uuids_clone.clone()))); | ||||
|  | ||||
|         let uuids_clone = uuids.clone(); | ||||
|         let mut index_handle = MockIndexActorHandle::new(); | ||||
|         index_handle | ||||
|             .expect_snapshot() | ||||
|             .withf(move |uuid, _path| uuids_clone.contains(uuid)) | ||||
|             .times(uuids_num) | ||||
|             .returning(move |_, _| Box::pin(ok(()))); | ||||
|  | ||||
|         let dir = tempfile::tempdir_in(".").unwrap(); | ||||
|         let handle = Arc::new(index_handle); | ||||
|         let update_handle = | ||||
|             UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap(); | ||||
|  | ||||
|         let snapshot_path = tempfile::tempdir_in(".").unwrap(); | ||||
|         let snapshot_service = SnapshotService::new( | ||||
|             uuid_resolver, | ||||
|             update_handle, | ||||
|             Duration::from_millis(100), | ||||
|             snapshot_path.path().to_owned(), | ||||
|             "data.ms".to_string(), | ||||
|         ); | ||||
|  | ||||
|         snapshot_service.perform_snapshot().await.unwrap(); | ||||
|     } | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     async fn error_performing_uuid_snapshot() { | ||||
|         let mut uuid_resolver = MockUuidResolverHandle::new(); | ||||
|         uuid_resolver | ||||
|             .expect_snapshot() | ||||
|             .times(1) | ||||
|             // abitrary error | ||||
|             .returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); | ||||
|  | ||||
|         let update_handle = MockUpdateActorHandle::new(); | ||||
|  | ||||
|         let snapshot_path = tempfile::tempdir_in(".").unwrap(); | ||||
|         let snapshot_service = SnapshotService::new( | ||||
|             uuid_resolver, | ||||
|             update_handle, | ||||
|             Duration::from_millis(100), | ||||
|             snapshot_path.path().to_owned(), | ||||
|             "data.ms".to_string(), | ||||
|         ); | ||||
|  | ||||
|         assert!(snapshot_service.perform_snapshot().await.is_err()); | ||||
|         // Nothing was written to the file | ||||
|         assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); | ||||
|     } | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     async fn error_performing_index_snapshot() { | ||||
|         let uuid = Uuid::new_v4(); | ||||
|         let mut uuid_resolver = MockUuidResolverHandle::new(); | ||||
|         uuid_resolver | ||||
|             .expect_snapshot() | ||||
|             .times(1) | ||||
|             .returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid))))); | ||||
|  | ||||
|         let mut update_handle = MockUpdateActorHandle::new(); | ||||
|         update_handle | ||||
|             .expect_snapshot() | ||||
|             // abitrary error | ||||
|             .returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0)))); | ||||
|  | ||||
|         let snapshot_path = tempfile::tempdir_in(".").unwrap(); | ||||
|         let snapshot_service = SnapshotService::new( | ||||
|             uuid_resolver, | ||||
|             update_handle, | ||||
|             Duration::from_millis(100), | ||||
|             snapshot_path.path().to_owned(), | ||||
|             "data.ms".to_string(), | ||||
|         ); | ||||
|  | ||||
|         assert!(snapshot_service.perform_snapshot().await.is_err()); | ||||
|         // Nothing was written to the file | ||||
|         assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); | ||||
|     } | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     async fn test_loop() { | ||||
|         let mut uuid_resolver = MockUuidResolverHandle::new(); | ||||
|         uuid_resolver | ||||
|             .expect_snapshot() | ||||
|             // we expect the funtion to be called between 2 and 3 time in the given interval. | ||||
|             .times(2..4) | ||||
|             // abitrary error, to short-circuit the function | ||||
|             .returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); | ||||
|  | ||||
|         let update_handle = MockUpdateActorHandle::new(); | ||||
|  | ||||
|         let snapshot_path = tempfile::tempdir_in(".").unwrap(); | ||||
|         let snapshot_service = SnapshotService::new( | ||||
|             uuid_resolver, | ||||
|             update_handle, | ||||
|             Duration::from_millis(100), | ||||
|             snapshot_path.path().to_owned(), | ||||
|             "data.ms".to_string(), | ||||
|         ); | ||||
|  | ||||
|         let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await; | ||||
|     } | ||||
| } | ||||
							
								
								
									
										260
									
								
								meilisearch-lib/src/index_controller/update_actor/actor.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										260
									
								
								meilisearch-lib/src/index_controller/update_actor/actor.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,260 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::io; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::atomic::AtomicBool; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use actix_web::error::PayloadError; | ||||
| use async_stream::stream; | ||||
| use bytes::Bytes; | ||||
| use futures::{Stream, StreamExt}; | ||||
| use log::trace; | ||||
| use milli::documents::DocumentBatchBuilder; | ||||
| use serde_json::{Map, Value}; | ||||
| use tokio::sync::mpsc; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::{Result, UpdateActorError}; | ||||
| use super::RegisterUpdate; | ||||
| use super::{UpdateMsg, UpdateStore, UpdateStoreInfo, Update}; | ||||
| use crate::index_controller::index_actor::IndexActorHandle; | ||||
| use crate::index_controller::update_file_store::UpdateFileStore; | ||||
| use crate::index_controller::{DocumentAdditionFormat, Payload, UpdateStatus}; | ||||
|  | ||||
| pub struct UpdateActor<I> { | ||||
|     store: Arc<UpdateStore>, | ||||
|     inbox: Option<mpsc::Receiver<UpdateMsg>>, | ||||
|     update_file_store: UpdateFileStore, | ||||
|     index_handle: I, | ||||
|     must_exit: Arc<AtomicBool>, | ||||
| } | ||||
|  | ||||
| struct StreamReader<S> { | ||||
|     stream: S, | ||||
|     current: Option<Bytes>, | ||||
| } | ||||
|  | ||||
| impl<S> StreamReader<S> { | ||||
|     fn new(stream: S) -> Self { | ||||
|         Self { stream, current: None } | ||||
|     } | ||||
|  | ||||
| } | ||||
|  | ||||
| impl<S: Stream<Item = std::result::Result<Bytes, PayloadError>> + Unpin> io::Read for StreamReader<S> { | ||||
|     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | ||||
|         match self.current.take() { | ||||
|             Some(mut bytes) => { | ||||
|                 let copied = bytes.split_to(buf.len()); | ||||
|                 buf.copy_from_slice(&copied); | ||||
|                 if !bytes.is_empty() { | ||||
|                     self.current.replace(bytes); | ||||
|                 } | ||||
|                 Ok(copied.len()) | ||||
|             } | ||||
|             None => { | ||||
|                 match tokio::runtime::Handle::current().block_on(self.stream.next()) { | ||||
|                     Some(Ok(bytes)) => { | ||||
|                         self.current.replace(bytes); | ||||
|                         self.read(buf) | ||||
|                     }, | ||||
|                     Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)), | ||||
|                     None => return Ok(0), | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<I> UpdateActor<I> | ||||
| where | ||||
|     I: IndexActorHandle + Clone + Sync + Send + 'static, | ||||
| { | ||||
|     pub fn new( | ||||
|         update_db_size: usize, | ||||
|         inbox: mpsc::Receiver<UpdateMsg>, | ||||
|         path: impl AsRef<Path>, | ||||
|         index_handle: I, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let path = path.as_ref().to_owned(); | ||||
|         std::fs::create_dir_all(&path)?; | ||||
|  | ||||
|         let mut options = heed::EnvOpenOptions::new(); | ||||
|         options.map_size(update_db_size); | ||||
|  | ||||
|         let must_exit = Arc::new(AtomicBool::new(false)); | ||||
|  | ||||
|         let store = UpdateStore::open(options, &path, index_handle.clone(), must_exit.clone())?; | ||||
|  | ||||
|         let inbox = Some(inbox); | ||||
|  | ||||
|         let update_file_store =  UpdateFileStore::new(&path).unwrap(); | ||||
|  | ||||
|         Ok(Self { | ||||
|             store, | ||||
|             inbox, | ||||
|             index_handle, | ||||
|             must_exit, | ||||
|             update_file_store | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub async fn run(mut self) { | ||||
|         use UpdateMsg::*; | ||||
|  | ||||
|         trace!("Started update actor."); | ||||
|  | ||||
|         let mut inbox = self | ||||
|             .inbox | ||||
|             .take() | ||||
|             .expect("A receiver should be present by now."); | ||||
|  | ||||
|         let must_exit = self.must_exit.clone(); | ||||
|         let stream = stream! { | ||||
|             loop { | ||||
|                 let msg = inbox.recv().await; | ||||
|  | ||||
|                 if must_exit.load(std::sync::atomic::Ordering::Relaxed) { | ||||
|                     break; | ||||
|                 } | ||||
|  | ||||
|                 match msg { | ||||
|                     Some(msg) => yield msg, | ||||
|                     None => break, | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         stream | ||||
|             .for_each_concurrent(Some(10), |msg| async { | ||||
|                 match msg { | ||||
|                     Update { | ||||
|                         uuid, | ||||
|                         update, | ||||
|                         ret, | ||||
|                     } => { | ||||
|                         let _ = ret.send(self.handle_update(uuid, update).await); | ||||
|                     } | ||||
|                     ListUpdates { uuid, ret } => { | ||||
|                         let _ = ret.send(self.handle_list_updates(uuid).await); | ||||
|                     } | ||||
|                     GetUpdate { uuid, ret, id } => { | ||||
|                         let _ = ret.send(self.handle_get_update(uuid, id).await); | ||||
|                     } | ||||
|                     Delete { uuid, ret } => { | ||||
|                         let _ = ret.send(self.handle_delete(uuid).await); | ||||
|                     } | ||||
|                     Snapshot { uuids, path, ret } => { | ||||
|                         let _ = ret.send(self.handle_snapshot(uuids, path).await); | ||||
|                     } | ||||
|                     GetInfo { ret } => { | ||||
|                         let _ = ret.send(self.handle_get_info().await); | ||||
|                     } | ||||
|                     Dump { uuids, path, ret } => { | ||||
|                         let _ = ret.send(self.handle_dump(uuids, path).await); | ||||
|                     } | ||||
|                 } | ||||
|             }) | ||||
|             .await; | ||||
|     } | ||||
|  | ||||
|     async fn handle_update( | ||||
|         &self, | ||||
|         index_uuid: Uuid, | ||||
|         update: Update, | ||||
|     ) -> Result<UpdateStatus> { | ||||
|         let registration = match update { | ||||
|             Update::DocumentAddition { payload, primary_key, method, format } => { | ||||
|                 let content_uuid = match format { | ||||
|                     DocumentAdditionFormat::Json => self.documents_from_json(payload).await?, | ||||
|                 }; | ||||
|  | ||||
|                 RegisterUpdate::DocumentAddition { primary_key, method, content_uuid } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         let store = self.store.clone(); | ||||
|         let status = tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration)).await??; | ||||
|  | ||||
|         Ok(status.into()) | ||||
|     } | ||||
|  | ||||
|     async fn documents_from_json(&self, payload: Payload) -> Result<Uuid> { | ||||
|         let file_store = self.update_file_store.clone(); | ||||
|         tokio::task::spawn_blocking(move || { | ||||
|             let (uuid, mut file) = file_store.new_update().unwrap(); | ||||
|             let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap(); | ||||
|  | ||||
|             let documents: Vec<Map<String, Value>> = serde_json::from_reader(StreamReader::new(payload))?; | ||||
|             builder.add_documents(documents).unwrap(); | ||||
|             builder.finish().unwrap(); | ||||
|  | ||||
|             file.persist(); | ||||
|  | ||||
|             Ok(uuid) | ||||
|         }).await? | ||||
|     } | ||||
|  | ||||
|     async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> { | ||||
|         let update_store = self.store.clone(); | ||||
|         tokio::task::spawn_blocking(move || { | ||||
|             let result = update_store.list(uuid)?; | ||||
|             Ok(result) | ||||
|         }) | ||||
|         .await? | ||||
|     } | ||||
|  | ||||
|     async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> { | ||||
|         let store = self.store.clone(); | ||||
|         tokio::task::spawn_blocking(move || { | ||||
|             let result = store | ||||
|                 .meta(uuid, id)? | ||||
|                 .ok_or(UpdateActorError::UnexistingUpdate(id))?; | ||||
|             Ok(result) | ||||
|         }) | ||||
|         .await? | ||||
|     } | ||||
|  | ||||
|     async fn handle_delete(&self, uuid: Uuid) -> Result<()> { | ||||
|         let store = self.store.clone(); | ||||
|  | ||||
|         tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> { | ||||
|         let index_handle = self.index_handle.clone(); | ||||
|         let update_store = self.store.clone(); | ||||
|  | ||||
|         tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle)) | ||||
|             .await??; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> { | ||||
|         let index_handle = self.index_handle.clone(); | ||||
|         let update_store = self.store.clone(); | ||||
|  | ||||
|         tokio::task::spawn_blocking(move || -> Result<()> { | ||||
|             update_store.dump(&uuids, path.to_path_buf(), index_handle)?; | ||||
|             Ok(()) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     async fn handle_get_info(&self) -> Result<UpdateStoreInfo> { | ||||
|         let update_store = self.store.clone(); | ||||
|         let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> { | ||||
|             let info = update_store.get_info()?; | ||||
|             Ok(info) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(info) | ||||
|     } | ||||
|  | ||||
| } | ||||
							
								
								
									
										61
									
								
								meilisearch-lib/src/index_controller/update_actor/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								meilisearch-lib/src/index_controller/update_actor/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| use std::error::Error; | ||||
|  | ||||
| use meilisearch_error::{Code, ErrorCode}; | ||||
|  | ||||
| use crate::index_controller::index_actor::error::IndexActorError; | ||||
|  | ||||
| pub type Result<T> = std::result::Result<T, UpdateActorError>; | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| #[allow(clippy::large_enum_variant)] | ||||
| pub enum UpdateActorError { | ||||
|     #[error("Update {0} not found.")] | ||||
|     UnexistingUpdate(u64), | ||||
|     #[error("Internal error: {0}")] | ||||
|     Internal(Box<dyn Error + Send + Sync + 'static>), | ||||
|     #[error("{0}")] | ||||
|     IndexActor(#[from] IndexActorError), | ||||
|     #[error( | ||||
|         "update store was shut down due to a fatal error, please check your logs for more info." | ||||
|     )] | ||||
|     FatalUpdateStoreError, | ||||
|     #[error("{0}")] | ||||
|     InvalidPayload(Box<dyn Error + Send + Sync + 'static>), | ||||
|     #[error("{0}")] | ||||
|     PayloadError(#[from] actix_web::error::PayloadError), | ||||
| } | ||||
|  | ||||
| impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateActorError { | ||||
|     fn from(_: tokio::sync::mpsc::error::SendError<T>) -> Self { | ||||
|         Self::FatalUpdateStoreError | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<tokio::sync::oneshot::error::RecvError> for UpdateActorError { | ||||
|     fn from(_: tokio::sync::oneshot::error::RecvError) -> Self { | ||||
|         Self::FatalUpdateStoreError | ||||
|     } | ||||
| } | ||||
|  | ||||
| internal_error!( | ||||
|     UpdateActorError: heed::Error, | ||||
|     std::io::Error, | ||||
|     serde_json::Error, | ||||
|     tokio::task::JoinError | ||||
| ); | ||||
|  | ||||
| impl ErrorCode for UpdateActorError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             UpdateActorError::UnexistingUpdate(_) => Code::NotFound, | ||||
|             UpdateActorError::Internal(_) => Code::Internal, | ||||
|             UpdateActorError::IndexActor(e) => e.error_code(), | ||||
|             UpdateActorError::FatalUpdateStoreError => Code::Internal, | ||||
|             UpdateActorError::InvalidPayload(_) => Code::BadRequest, | ||||
|             UpdateActorError::PayloadError(error) => match error { | ||||
|                 actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge, | ||||
|                 _ => Code::Internal, | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,94 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::path::{Path, PathBuf}; | ||||
|  | ||||
| use tokio::sync::{mpsc, oneshot}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index_controller::{IndexActorHandle, Update, UpdateStatus}; | ||||
|  | ||||
| use super::error::Result; | ||||
| use super::{UpdateActor, UpdateActorHandle, UpdateMsg, UpdateStoreInfo}; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct UpdateActorHandleImpl { | ||||
|     sender: mpsc::Sender<UpdateMsg>, | ||||
| } | ||||
|  | ||||
| impl UpdateActorHandleImpl { | ||||
|     pub fn new<I>( | ||||
|         index_handle: I, | ||||
|         path: impl AsRef<Path>, | ||||
|         update_store_size: usize, | ||||
|     ) -> anyhow::Result<Self> | ||||
|     where | ||||
|         I: IndexActorHandle + Clone + Sync + Send +'static, | ||||
|     { | ||||
|         let path = path.as_ref().to_owned(); | ||||
|         let (sender, receiver) = mpsc::channel(100); | ||||
|         let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?; | ||||
|  | ||||
|         tokio::task::spawn_local(actor.run()); | ||||
|  | ||||
|         Ok(Self { sender }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| impl UpdateActorHandle for UpdateActorHandleImpl { | ||||
|     async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UpdateMsg::ListUpdates { uuid, ret }; | ||||
|         self.sender.send(msg).await?; | ||||
|         receiver.await? | ||||
|     } | ||||
|  | ||||
|     async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UpdateMsg::GetUpdate { uuid, id, ret }; | ||||
|         self.sender.send(msg).await?; | ||||
|         receiver.await? | ||||
|     } | ||||
|  | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<()> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UpdateMsg::Delete { uuid, ret }; | ||||
|         self.sender.send(msg).await?; | ||||
|         receiver.await? | ||||
|     } | ||||
|  | ||||
|     async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UpdateMsg::Snapshot { uuids, path, ret }; | ||||
|         self.sender.send(msg).await?; | ||||
|         receiver.await? | ||||
|     } | ||||
|  | ||||
|     async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UpdateMsg::Dump { uuids, path, ret }; | ||||
|         self.sender.send(msg).await?; | ||||
|         receiver.await? | ||||
|     } | ||||
|  | ||||
|     async fn get_info(&self) -> Result<UpdateStoreInfo> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UpdateMsg::GetInfo { ret }; | ||||
|         self.sender.send(msg).await?; | ||||
|         receiver.await? | ||||
|     } | ||||
|  | ||||
|     async fn update( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         update: Update, | ||||
|     ) -> Result<UpdateStatus> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UpdateMsg::Update { | ||||
|             uuid, | ||||
|             update, | ||||
|             ret, | ||||
|         }; | ||||
|         self.sender.send(msg).await?; | ||||
|         receiver.await? | ||||
|     } | ||||
| } | ||||
							
								
								
									
										42
									
								
								meilisearch-lib/src/index_controller/update_actor/message.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								meilisearch-lib/src/index_controller/update_actor/message.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::path::PathBuf; | ||||
|  | ||||
| use tokio::sync::oneshot; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::Result; | ||||
| use super::{UpdateStatus, UpdateStoreInfo, Update}; | ||||
|  | ||||
| pub enum UpdateMsg { | ||||
|     Update { | ||||
|         uuid: Uuid, | ||||
|         update: Update, | ||||
|         ret: oneshot::Sender<Result<UpdateStatus>>, | ||||
|     }, | ||||
|     ListUpdates { | ||||
|         uuid: Uuid, | ||||
|         ret: oneshot::Sender<Result<Vec<UpdateStatus>>>, | ||||
|     }, | ||||
|     GetUpdate { | ||||
|         uuid: Uuid, | ||||
|         ret: oneshot::Sender<Result<UpdateStatus>>, | ||||
|         id: u64, | ||||
|     }, | ||||
|     Delete { | ||||
|         uuid: Uuid, | ||||
|         ret: oneshot::Sender<Result<()>>, | ||||
|     }, | ||||
|     Snapshot { | ||||
|         uuids: HashSet<Uuid>, | ||||
|         path: PathBuf, | ||||
|         ret: oneshot::Sender<Result<()>>, | ||||
|     }, | ||||
|     Dump { | ||||
|         uuids: HashSet<Uuid>, | ||||
|         path: PathBuf, | ||||
|         ret: oneshot::Sender<Result<()>>, | ||||
|     }, | ||||
|     GetInfo { | ||||
|         ret: oneshot::Sender<Result<UpdateStoreInfo>>, | ||||
|     }, | ||||
| } | ||||
							
								
								
									
										49
									
								
								meilisearch-lib/src/index_controller/update_actor/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								meilisearch-lib/src/index_controller/update_actor/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| use std::{collections::HashSet, path::PathBuf}; | ||||
|  | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use uuid::Uuid; | ||||
| use serde::{Serialize, Deserialize}; | ||||
|  | ||||
| use crate::index_controller::UpdateStatus; | ||||
| use super::Update; | ||||
|  | ||||
| use actor::UpdateActor; | ||||
| use error::Result; | ||||
| use message::UpdateMsg; | ||||
|  | ||||
| pub use handle_impl::UpdateActorHandleImpl; | ||||
| pub use store::{UpdateStore, UpdateStoreInfo}; | ||||
|  | ||||
| mod actor; | ||||
| pub mod error; | ||||
| mod handle_impl; | ||||
| mod message; | ||||
| pub mod store; | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub enum RegisterUpdate { | ||||
|     DocumentAddition { | ||||
|         primary_key: Option<String>, | ||||
|         method: IndexDocumentsMethod, | ||||
|         content_uuid: Uuid, | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| #[cfg(test)] | ||||
| use mockall::automock; | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| pub trait UpdateActorHandle { | ||||
|     async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>; | ||||
|     async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>; | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<()>; | ||||
|     async fn snapshot(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>; | ||||
|     async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>; | ||||
|     async fn get_info(&self) -> Result<UpdateStoreInfo>; | ||||
|     async fn update( | ||||
|         &self, | ||||
|         uuid: Uuid, | ||||
|         update: Update, | ||||
|     ) -> Result<UpdateStatus>; | ||||
| } | ||||
| @@ -0,0 +1,86 @@ | ||||
| use std::{borrow::Cow, convert::TryInto, mem::size_of}; | ||||
|  | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| pub struct NextIdCodec; | ||||
|  | ||||
| pub enum NextIdKey { | ||||
|     Global, | ||||
|     Index(Uuid), | ||||
| } | ||||
|  | ||||
| impl<'a> BytesEncode<'a> for NextIdCodec { | ||||
|     type EItem = NextIdKey; | ||||
|  | ||||
|     fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         match item { | ||||
|             NextIdKey::Global => Some(Cow::Borrowed(b"__global__")), | ||||
|             NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct PendingKeyCodec; | ||||
|  | ||||
| impl<'a> BytesEncode<'a> for PendingKeyCodec { | ||||
|     type EItem = (u64, Uuid, u64); | ||||
|  | ||||
|     fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         let mut bytes = Vec::with_capacity(size_of::<Self::EItem>()); | ||||
|         bytes.extend_from_slice(&global_id.to_be_bytes()); | ||||
|         bytes.extend_from_slice(uuid.as_bytes()); | ||||
|         bytes.extend_from_slice(&update_id.to_be_bytes()); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> BytesDecode<'a> for PendingKeyCodec { | ||||
|     type DItem = (u64, Uuid, u64); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?; | ||||
|         let global_id = u64::from_be_bytes(global_id_bytes); | ||||
|  | ||||
|         let uuid_bytes = bytes | ||||
|             .get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))? | ||||
|             .try_into() | ||||
|             .ok()?; | ||||
|         let uuid = Uuid::from_bytes(uuid_bytes); | ||||
|  | ||||
|         let update_id_bytes = bytes | ||||
|             .get((size_of::<u64>() + size_of::<Uuid>())..)? | ||||
|             .try_into() | ||||
|             .ok()?; | ||||
|         let update_id = u64::from_be_bytes(update_id_bytes); | ||||
|  | ||||
|         Some((global_id, uuid, update_id)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct UpdateKeyCodec; | ||||
|  | ||||
| impl<'a> BytesEncode<'a> for UpdateKeyCodec { | ||||
|     type EItem = (Uuid, u64); | ||||
|  | ||||
|     fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         let mut bytes = Vec::with_capacity(size_of::<Self::EItem>()); | ||||
|         bytes.extend_from_slice(uuid.as_bytes()); | ||||
|         bytes.extend_from_slice(&update_id.to_be_bytes()); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> BytesDecode<'a> for UpdateKeyCodec { | ||||
|     type DItem = (Uuid, u64); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?; | ||||
|         let uuid = Uuid::from_bytes(uuid_bytes); | ||||
|  | ||||
|         let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?; | ||||
|         let update_id = u64::from_be_bytes(update_id_bytes); | ||||
|  | ||||
|         Some((uuid, update_id)) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										186
									
								
								meilisearch-lib/src/index_controller/update_actor/store/dump.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										186
									
								
								meilisearch-lib/src/index_controller/update_actor/store/dump.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,186 @@ | ||||
| use std::{ | ||||
|     collections::HashSet, | ||||
|     fs::{create_dir_all, File}, | ||||
|     io::Write, | ||||
|     path::{Path, PathBuf}, | ||||
| }; | ||||
|  | ||||
| use heed::RoTxn; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::{Result, State, UpdateStore}; | ||||
| use crate::index_controller::{ | ||||
|     index_actor::IndexActorHandle, | ||||
|     UpdateStatus, | ||||
| }; | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| struct UpdateEntry { | ||||
|     uuid: Uuid, | ||||
|     update: UpdateStatus, | ||||
| } | ||||
|  | ||||
| impl UpdateStore { | ||||
|     pub fn dump( | ||||
|         &self, | ||||
|         uuids: &HashSet<Uuid>, | ||||
|         path: PathBuf, | ||||
|         handle: impl IndexActorHandle, | ||||
|     ) -> Result<()> { | ||||
|         let state_lock = self.state.write(); | ||||
|         state_lock.swap(State::Dumping); | ||||
|  | ||||
|         // txn must *always* be acquired after state lock, or it will dead lock. | ||||
|         let txn = self.env.write_txn()?; | ||||
|  | ||||
|         let dump_path = path.join("updates"); | ||||
|         create_dir_all(&dump_path)?; | ||||
|  | ||||
|         self.dump_updates(&txn, uuids, &dump_path)?; | ||||
|  | ||||
|         let fut = dump_indexes(uuids, handle, &path); | ||||
|         tokio::runtime::Handle::current().block_on(fut)?; | ||||
|  | ||||
|         state_lock.swap(State::Idle); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn dump_updates( | ||||
|         &self, | ||||
|         txn: &RoTxn, | ||||
|         uuids: &HashSet<Uuid>, | ||||
|         path: impl AsRef<Path>, | ||||
|     ) -> Result<()> { | ||||
|         let dump_data_path = path.as_ref().join("data.jsonl"); | ||||
|         let mut dump_data_file = File::create(dump_data_path)?; | ||||
|  | ||||
|         let update_files_path = path.as_ref().join(super::UPDATE_DIR); | ||||
|         create_dir_all(&update_files_path)?; | ||||
|  | ||||
|         self.dump_pending(txn, uuids, &mut dump_data_file, &path)?; | ||||
|         self.dump_completed(txn, uuids, &mut dump_data_file)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn dump_pending( | ||||
|         &self, | ||||
|         _txn: &RoTxn, | ||||
|         _uuids: &HashSet<Uuid>, | ||||
|         _file: &mut File, | ||||
|         _dst_path: impl AsRef<Path>, | ||||
|     ) -> Result<()> { | ||||
|         todo!() | ||||
|         //let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); | ||||
|  | ||||
|         //for pending in pendings { | ||||
|             //let ((_, uuid, _), data) = pending?; | ||||
|             //if uuids.contains(&uuid) { | ||||
|                 //let update = data.decode()?; | ||||
|  | ||||
|                 //if let Some(ref update_uuid) = update.content { | ||||
|                     //let src = super::update_uuid_to_file_path(&self.path, *update_uuid); | ||||
|                     //let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid); | ||||
|                     //std::fs::copy(src, dst)?; | ||||
|                 //} | ||||
|  | ||||
|                 //let update_json = UpdateEntry { | ||||
|                     //uuid, | ||||
|                     //update: update.into(), | ||||
|                 //}; | ||||
|  | ||||
|                 //serde_json::to_writer(&mut file, &update_json)?; | ||||
|                 //file.write_all(b"\n")?; | ||||
|             //} | ||||
|         //} | ||||
|  | ||||
|         //Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn dump_completed( | ||||
|         &self, | ||||
|         txn: &RoTxn, | ||||
|         uuids: &HashSet<Uuid>, | ||||
|         mut file: &mut File, | ||||
|     ) -> Result<()> { | ||||
|         let updates = self.updates.iter(txn)?.lazily_decode_data(); | ||||
|  | ||||
|         for update in updates { | ||||
|             let ((uuid, _), data) = update?; | ||||
|             if uuids.contains(&uuid) { | ||||
|                 let update = data.decode()?; | ||||
|  | ||||
|                 let update_json = UpdateEntry { uuid, update }; | ||||
|  | ||||
|                 serde_json::to_writer(&mut file, &update_json)?; | ||||
|                 file.write_all(b"\n")?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn load_dump( | ||||
|         _src: impl AsRef<Path>, | ||||
|         _dst: impl AsRef<Path>, | ||||
|         _db_size: usize, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         todo!() | ||||
|         //let dst_update_path = dst.as_ref().join("updates/"); | ||||
|         //create_dir_all(&dst_update_path)?; | ||||
|  | ||||
|         //let mut options = EnvOpenOptions::new(); | ||||
|         //options.map_size(db_size as usize); | ||||
|         //let (store, _) = UpdateStore::new(options, &dst_update_path)?; | ||||
|  | ||||
|         //let src_update_path = src.as_ref().join("updates"); | ||||
|         //let update_data = File::open(&src_update_path.join("data.jsonl"))?; | ||||
|         //let mut update_data = BufReader::new(update_data); | ||||
|  | ||||
|         //std::fs::create_dir_all(dst_update_path.join("update_files/"))?; | ||||
|  | ||||
|         //let mut wtxn = store.env.write_txn()?; | ||||
|         //let mut line = String::new(); | ||||
|         //loop { | ||||
|             //match update_data.read_line(&mut line) { | ||||
|                 //Ok(0) => break, | ||||
|                 //Ok(_) => { | ||||
|                     //let UpdateEntry { uuid, update } = serde_json::from_str(&line)?; | ||||
|                     //store.register_raw_updates(&mut wtxn, &update, uuid)?; | ||||
|  | ||||
|                     //// Copy ascociated update path if it exists | ||||
|                     //if let UpdateStatus::Enqueued(Enqueued { | ||||
|                         //content: Some(uuid), | ||||
|                         //.. | ||||
|                     //}) = update | ||||
|                     //{ | ||||
|                         //let src = update_uuid_to_file_path(&src_update_path, uuid); | ||||
|                         //let dst = update_uuid_to_file_path(&dst_update_path, uuid); | ||||
|                         //std::fs::copy(src, dst)?; | ||||
|                     //} | ||||
|                 //} | ||||
|                 //_ => break, | ||||
|             //} | ||||
|  | ||||
|             //line.clear(); | ||||
|         //} | ||||
|  | ||||
|         //wtxn.commit()?; | ||||
|  | ||||
|         //Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| async fn dump_indexes( | ||||
|     uuids: &HashSet<Uuid>, | ||||
|     handle: impl IndexActorHandle, | ||||
|     path: impl AsRef<Path>, | ||||
| ) -> Result<()> { | ||||
|     for uuid in uuids { | ||||
|         handle.dump(*uuid, path.as_ref().to_owned()).await?; | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
							
								
								
									
										708
									
								
								meilisearch-lib/src/index_controller/update_actor/store/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										708
									
								
								meilisearch-lib/src/index_controller/update_actor/store/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,708 @@ | ||||
| mod codec; | ||||
| pub mod dump; | ||||
|  | ||||
| use std::fs::{create_dir_all, remove_file}; | ||||
| use std::path::Path; | ||||
| use std::sync::atomic::{AtomicBool, Ordering}; | ||||
| use std::sync::Arc; | ||||
| use std::{ | ||||
|     collections::{BTreeMap, HashSet}, | ||||
|     path::PathBuf, | ||||
|     time::Duration, | ||||
| }; | ||||
|  | ||||
| use arc_swap::ArcSwap; | ||||
| use futures::StreamExt; | ||||
| use heed::types::{ByteSlice, OwnedType, SerdeJson}; | ||||
| use heed::zerocopy::U64; | ||||
| use heed::{CompactionOption, Database, Env, EnvOpenOptions}; | ||||
| use log::error; | ||||
| use parking_lot::{Mutex, MutexGuard}; | ||||
| use tokio::runtime::Handle; | ||||
| use tokio::sync::mpsc; | ||||
| use tokio::sync::mpsc::error::TrySendError; | ||||
| use tokio::time::timeout; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use codec::*; | ||||
|  | ||||
| use super::RegisterUpdate; | ||||
| use super::error::Result; | ||||
| use crate::EnvSizer; | ||||
| use crate::index_controller::update_files_path; | ||||
| use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; | ||||
|  | ||||
| #[allow(clippy::upper_case_acronyms)] | ||||
| type BEU64 = U64<heed::byteorder::BE>; | ||||
|  | ||||
| const UPDATE_DIR: &str = "update_files"; | ||||
|  | ||||
| pub struct UpdateStoreInfo { | ||||
|     /// Size of the update store in bytes. | ||||
|     pub size: u64, | ||||
|     /// Uuid of the currently processing update if it exists | ||||
|     pub processing: Option<Uuid>, | ||||
| } | ||||
|  | ||||
| /// A data structure that allows concurrent reads AND exactly one writer. | ||||
| pub struct StateLock { | ||||
|     lock: Mutex<()>, | ||||
|     data: ArcSwap<State>, | ||||
| } | ||||
|  | ||||
| pub struct StateLockGuard<'a> { | ||||
|     _lock: MutexGuard<'a, ()>, | ||||
|     state: &'a StateLock, | ||||
| } | ||||
|  | ||||
| impl StateLockGuard<'_> { | ||||
|     pub fn swap(&self, state: State) -> Arc<State> { | ||||
|         self.state.data.swap(Arc::new(state)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl StateLock { | ||||
|     fn from_state(state: State) -> Self { | ||||
|         let lock = Mutex::new(()); | ||||
|         let data = ArcSwap::from(Arc::new(state)); | ||||
|         Self { lock, data } | ||||
|     } | ||||
|  | ||||
|     pub fn read(&self) -> Arc<State> { | ||||
|         self.data.load().clone() | ||||
|     } | ||||
|  | ||||
|     pub fn write(&self) -> StateLockGuard { | ||||
|         let _lock = self.lock.lock(); | ||||
|         let state = &self; | ||||
|         StateLockGuard { _lock, state } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[allow(clippy::large_enum_variant)] | ||||
| pub enum State { | ||||
|     Idle, | ||||
|     Processing(Uuid, Processing), | ||||
|     Snapshoting, | ||||
|     Dumping, | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct UpdateStore { | ||||
|     pub env: Env, | ||||
|     /// A queue containing the updates to process, ordered by arrival. | ||||
|     /// The key are built as follow: | ||||
|     /// | global_update_id | index_uuid | update_id | | ||||
|     /// |     8-bytes      |  16-bytes  |  8-bytes  | | ||||
|     pending_queue: Database<PendingKeyCodec, SerdeJson<Enqueued>>, | ||||
|     /// Map indexes to the next available update id. If NextIdKey::Global is queried, then the next | ||||
|     /// global update id is returned | ||||
|     next_update_id: Database<NextIdCodec, OwnedType<BEU64>>, | ||||
|     /// Contains all the performed updates meta, be they failed, aborted, or processed. | ||||
|     /// The keys are built as follow: | ||||
|     /// |    Uuid  |   id    | | ||||
|     /// | 16-bytes | 8-bytes | | ||||
|     updates: Database<UpdateKeyCodec, SerdeJson<UpdateStatus>>, | ||||
|     /// Indicates the current state of the update store, | ||||
|     state: Arc<StateLock>, | ||||
|     /// Wake up the loop when a new event occurs. | ||||
|     notification_sender: mpsc::Sender<()>, | ||||
|     path: PathBuf, | ||||
| } | ||||
|  | ||||
| impl UpdateStore { | ||||
|     fn new( | ||||
|         mut options: EnvOpenOptions, | ||||
|         path: impl AsRef<Path>, | ||||
|     ) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { | ||||
|         options.max_dbs(5); | ||||
|  | ||||
|         let update_path = path.as_ref().join("updates"); | ||||
|         std::fs::create_dir_all(&update_path)?; | ||||
|         let env = options.open(update_path)?; | ||||
|         let pending_queue = env.create_database(Some("pending-queue"))?; | ||||
|         let next_update_id = env.create_database(Some("next-update-id"))?; | ||||
|         let updates = env.create_database(Some("updates"))?; | ||||
|  | ||||
|         let state = Arc::new(StateLock::from_state(State::Idle)); | ||||
|  | ||||
|         let (notification_sender, notification_receiver) = mpsc::channel(1); | ||||
|  | ||||
|         Ok(( | ||||
|             Self { | ||||
|                 env, | ||||
|                 pending_queue, | ||||
|                 next_update_id, | ||||
|                 updates, | ||||
|                 state, | ||||
|                 notification_sender, | ||||
|                 path: path.as_ref().to_owned(), | ||||
|             }, | ||||
|             notification_receiver, | ||||
|         )) | ||||
|     } | ||||
|  | ||||
|     pub fn open( | ||||
|         options: EnvOpenOptions, | ||||
|         path: impl AsRef<Path>, | ||||
|         index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static, | ||||
|         must_exit: Arc<AtomicBool>, | ||||
|     ) -> anyhow::Result<Arc<Self>> { | ||||
|         let (update_store, mut notification_receiver) = Self::new(options, path)?; | ||||
|         let update_store = Arc::new(update_store); | ||||
|  | ||||
|         // Send a first notification to trigger the process. | ||||
|         if let Err(TrySendError::Closed(())) = update_store.notification_sender.try_send(()) { | ||||
|             panic!("Failed to init update store"); | ||||
|         } | ||||
|  | ||||
|         // We need a weak reference so we can take ownership on the arc later when we | ||||
|         // want to close the index. | ||||
|         let duration = Duration::from_secs(10 * 60); // 10 minutes | ||||
|         let update_store_weak = Arc::downgrade(&update_store); | ||||
|         tokio::task::spawn_local(async move { | ||||
|             // Block and wait for something to process with a timeout. The timeout | ||||
|             // function returns a Result and we must just unlock the loop on Result. | ||||
|             'outer: while timeout(duration, notification_receiver.recv()) | ||||
|                 .await | ||||
|                 .map_or(true, |o| o.is_some()) | ||||
|             { | ||||
|                 loop { | ||||
|                     match update_store_weak.upgrade() { | ||||
|                         Some(update_store) => { | ||||
|                             let handler = index_handle.clone(); | ||||
|                             let res = tokio::task::spawn_blocking(move || { | ||||
|                                 update_store.process_pending_update(handler) | ||||
|                             }) | ||||
|                             .await | ||||
|                             .expect("Fatal error processing update."); | ||||
|                             match res { | ||||
|                                 Ok(Some(_)) => (), | ||||
|                                 Ok(None) => break, | ||||
|                                 Err(e) => { | ||||
|                                     error!("Fatal error while processing an update that requires the update store to shutdown: {}", e); | ||||
|                                     must_exit.store(true, Ordering::SeqCst); | ||||
|                                     break 'outer; | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                         // the ownership on the arc has been taken, we need to exit. | ||||
|                         None => break 'outer, | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             error!("Update store loop exited."); | ||||
|         }); | ||||
|  | ||||
|         Ok(update_store) | ||||
|     } | ||||
|  | ||||
|     /// Returns the next global update id and the next update id for a given `index_uuid`. | ||||
|     fn next_update_id(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<(u64, u64)> { | ||||
|         let global_id = self | ||||
|             .next_update_id | ||||
|             .get(txn, &NextIdKey::Global)? | ||||
|             .map(U64::get) | ||||
|             .unwrap_or_default(); | ||||
|  | ||||
|         self.next_update_id | ||||
|             .put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?; | ||||
|  | ||||
|         let update_id = self.next_update_id_raw(txn, index_uuid)?; | ||||
|  | ||||
|         Ok((global_id, update_id)) | ||||
|     } | ||||
|  | ||||
|     /// Returns the next next update id for a given `index_uuid` without | ||||
|     /// incrementing the global update id. This is useful for the dumps. | ||||
|     fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<u64> { | ||||
|         let update_id = self | ||||
|             .next_update_id | ||||
|             .get(txn, &NextIdKey::Index(index_uuid))? | ||||
|             .map(U64::get) | ||||
|             .unwrap_or_default(); | ||||
|  | ||||
|         self.next_update_id.put( | ||||
|             txn, | ||||
|             &NextIdKey::Index(index_uuid), | ||||
|             &BEU64::new(update_id + 1), | ||||
|         )?; | ||||
|  | ||||
|         Ok(update_id) | ||||
|     } | ||||
|  | ||||
|     /// Registers the update content in the pending store and the meta | ||||
|     /// into the pending-meta store. Returns the new unique update id. | ||||
|     pub fn register_update( | ||||
|         &self, | ||||
|         index_uuid: Uuid, | ||||
|         update: RegisterUpdate, | ||||
|     ) -> heed::Result<Enqueued> { | ||||
|         let mut txn = self.env.write_txn()?; | ||||
|         let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?; | ||||
|         let meta = Enqueued::new(update, update_id); | ||||
|  | ||||
|         self.pending_queue | ||||
|             .put(&mut txn, &(global_id, index_uuid, update_id), &meta)?; | ||||
|  | ||||
|         txn.commit()?; | ||||
|  | ||||
|         if let Err(TrySendError::Closed(())) = self.notification_sender.try_send(()) { | ||||
|             panic!("Update store loop exited"); | ||||
|         } | ||||
|  | ||||
|         Ok(meta) | ||||
|     } | ||||
|  | ||||
|     // /// Push already processed update in the UpdateStore without triggering the notification | ||||
|     // /// process. This is useful for the dumps. | ||||
|     //pub fn register_raw_updates( | ||||
|         //&self, | ||||
|         //wtxn: &mut heed::RwTxn, | ||||
|         //update: &UpdateStatus, | ||||
|         //index_uuid: Uuid, | ||||
|     //) -> heed::Result<()> { | ||||
|         //match update { | ||||
|             //UpdateStatus::Enqueued(enqueued) => { | ||||
|                 //let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?; | ||||
|                 //self.pending_queue.remap_key_type::<PendingKeyCodec>().put( | ||||
|                     //wtxn, | ||||
|                     //&(global_id, index_uuid, enqueued.id()), | ||||
|                     //enqueued, | ||||
|                 //)?; | ||||
|             //} | ||||
|             //_ => { | ||||
|                 //let _update_id = self.next_update_id_raw(wtxn, index_uuid)?; | ||||
|                 //self.updates.put(wtxn, &(index_uuid, update.id()), update)?; | ||||
|             //} | ||||
|         //} | ||||
|         //Ok(()) | ||||
|     //} | ||||
|  | ||||
|     /// Executes the user provided function on the next pending update (the one with the lowest id). | ||||
|     /// This is asynchronous as it let the user process the update with a read-only txn and | ||||
|     /// only writing the result meta to the processed-meta store *after* it has been processed. | ||||
|     fn process_pending_update(&self, index_handle: impl IndexActorHandle) -> Result<Option<()>> { | ||||
|         // Create a read transaction to be able to retrieve the pending update in order. | ||||
|         let rtxn = self.env.read_txn()?; | ||||
|         let first_meta = self.pending_queue.first(&rtxn)?; | ||||
|         drop(rtxn); | ||||
|  | ||||
|         // If there is a pending update we process and only keep | ||||
|         // a reader while processing it, not a writer. | ||||
|         match first_meta { | ||||
|             Some(((global_id, index_uuid, _), pending)) => { | ||||
|                 let processing = pending.processing(); | ||||
|                 // Acquire the state lock and set the current state to processing. | ||||
|                 // txn must *always* be acquired after state lock, or it will dead lock. | ||||
|                 let state = self.state.write(); | ||||
|                 state.swap(State::Processing(index_uuid, processing.clone())); | ||||
|  | ||||
|                 let result = | ||||
|                     self.perform_update(processing, index_handle, index_uuid, global_id); | ||||
|  | ||||
|                 state.swap(State::Idle); | ||||
|  | ||||
|                 result | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn perform_update( | ||||
|         &self, | ||||
|         processing: Processing, | ||||
|         index_handle: impl IndexActorHandle, | ||||
|         index_uuid: Uuid, | ||||
|         global_id: u64, | ||||
|     ) -> Result<Option<()>> { | ||||
|         // Process the pending update using the provided user function. | ||||
|         let handle = Handle::current(); | ||||
|         let update_id = processing.id(); | ||||
|         let result = | ||||
|             match handle.block_on(index_handle.update(index_uuid, processing.clone())) { | ||||
|                 Ok(result) => result, | ||||
|                 Err(e) => Err(processing.fail(e)), | ||||
|             }; | ||||
|  | ||||
|         // Once the pending update have been successfully processed | ||||
|         // we must remove the content from the pending and processing stores and | ||||
|         // write the *new* meta to the processed-meta store and commit. | ||||
|         let mut wtxn = self.env.write_txn()?; | ||||
|         self.pending_queue | ||||
|             .delete(&mut wtxn, &(global_id, index_uuid, update_id))?; | ||||
|  | ||||
|         let result = match result { | ||||
|             Ok(res) => res.into(), | ||||
|             Err(res) => res.into(), | ||||
|         }; | ||||
|  | ||||
|         self.updates | ||||
|             .put(&mut wtxn, &(index_uuid, update_id), &result)?; | ||||
|  | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         Ok(Some(())) | ||||
|     } | ||||
|  | ||||
|     /// List the updates for `index_uuid`. | ||||
|     pub fn list(&self, index_uuid: Uuid) -> Result<Vec<UpdateStatus>> { | ||||
|         let mut update_list = BTreeMap::<u64, UpdateStatus>::new(); | ||||
|  | ||||
|         let txn = self.env.read_txn()?; | ||||
|  | ||||
|         let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); | ||||
|         for entry in pendings { | ||||
|             let ((_, uuid, id), pending) = entry?; | ||||
|             if uuid == index_uuid { | ||||
|                 update_list.insert(id, pending.decode()?.into()); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let updates = self | ||||
|             .updates | ||||
|             .remap_key_type::<ByteSlice>() | ||||
|             .prefix_iter(&txn, index_uuid.as_bytes())?; | ||||
|  | ||||
|         for entry in updates { | ||||
|             let (_, update) = entry?; | ||||
|             update_list.insert(update.id(), update); | ||||
|         } | ||||
|  | ||||
|         // If the currently processing update is from this index, replace the corresponding pending update with this one. | ||||
|         match *self.state.read() { | ||||
|             State::Processing(uuid, ref processing) if uuid == index_uuid => { | ||||
|                 update_list.insert(processing.id(), processing.clone().into()); | ||||
|             } | ||||
|             _ => (), | ||||
|         } | ||||
|  | ||||
|         Ok(update_list.into_iter().map(|(_, v)| v).collect()) | ||||
|     } | ||||
|  | ||||
|     /// Returns the update associated meta or `None` if the update doesn't exist. | ||||
|     pub fn meta(&self, index_uuid: Uuid, update_id: u64) -> heed::Result<Option<UpdateStatus>> { | ||||
|         // Check if the update is the one currently processing | ||||
|         match *self.state.read() { | ||||
|             State::Processing(uuid, ref processing) | ||||
|                 if uuid == index_uuid && processing.id() == update_id => | ||||
|             { | ||||
|                 return Ok(Some(processing.clone().into())); | ||||
|             } | ||||
|             _ => (), | ||||
|         } | ||||
|  | ||||
|         let txn = self.env.read_txn()?; | ||||
|         // Else, check if it is in the updates database: | ||||
|         let update = self.updates.get(&txn, &(index_uuid, update_id))?; | ||||
|  | ||||
|         if let Some(update) = update { | ||||
|             return Ok(Some(update)); | ||||
|         } | ||||
|  | ||||
|         // If nothing was found yet, we resolve to iterate over the pending queue. | ||||
|         let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); | ||||
|  | ||||
|         for entry in pendings { | ||||
|             let ((_, uuid, id), pending) = entry?; | ||||
|             if uuid == index_uuid && id == update_id { | ||||
|                 return Ok(Some(pending.decode()?.into())); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // No update was found. | ||||
|         Ok(None) | ||||
|     } | ||||
|  | ||||
|     /// Delete all updates for an index from the update store. If the currently processing update | ||||
|     /// is for `index_uuid`, the call will block until the update is terminated. | ||||
|     pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> { | ||||
|         let mut txn = self.env.write_txn()?; | ||||
|         // Contains all the content file paths that we need to be removed if the deletion was successful. | ||||
|         let uuids_to_remove = Vec::new(); | ||||
|  | ||||
|         let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data(); | ||||
|  | ||||
|         while let Some(Ok(((_, uuid, _), pending))) = pendings.next() { | ||||
|             if uuid == index_uuid { | ||||
|                 let mut _pending = pending.decode()?; | ||||
|                 //if let Some(update_uuid) = pending.content.take() { | ||||
|                     //uuids_to_remove.push(update_uuid); | ||||
|                 //} | ||||
|  | ||||
|                 // Invariant check: we can only delete the current entry when we don't hold | ||||
|                 // references to it anymore. This must be done after we have retrieved its content. | ||||
|                 unsafe { | ||||
|                     pendings.del_current()?; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         drop(pendings); | ||||
|  | ||||
|         let mut updates = self | ||||
|             .updates | ||||
|             .remap_key_type::<ByteSlice>() | ||||
|             .prefix_iter_mut(&mut txn, index_uuid.as_bytes())? | ||||
|             .lazily_decode_data(); | ||||
|  | ||||
|         while let Some(_) = updates.next() { | ||||
|             unsafe { | ||||
|                 updates.del_current()?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         drop(updates); | ||||
|  | ||||
|         txn.commit()?; | ||||
|  | ||||
|         // If the currently processing update is from our index, we wait until it is | ||||
|         // finished before returning. This ensure that no write to the index occurs after we delete it. | ||||
|         if let State::Processing(uuid, _) = *self.state.read() { | ||||
|             if uuid == index_uuid { | ||||
|                 // wait for a write lock, do nothing with it. | ||||
|                 self.state.write(); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Finally, remove any outstanding update files. This must be done after waiting for the | ||||
|         // last update to ensure that the update files are not deleted before the update needs | ||||
|         // them. | ||||
|         uuids_to_remove | ||||
|             .iter() | ||||
|             .map(|uuid: &Uuid| update_files_path(&self.path).join(uuid.to_string())) | ||||
|             .for_each(|path| { | ||||
|                 let _ = remove_file(path); | ||||
|             }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn snapshot( | ||||
|         &self, | ||||
|         uuids: &HashSet<Uuid>, | ||||
|         path: impl AsRef<Path>, | ||||
|         handle: impl IndexActorHandle + Clone, | ||||
|     ) -> Result<()> { | ||||
|         let state_lock = self.state.write(); | ||||
|         state_lock.swap(State::Snapshoting); | ||||
|  | ||||
|         let txn = self.env.write_txn()?; | ||||
|  | ||||
|         let update_path = path.as_ref().join("updates"); | ||||
|         create_dir_all(&update_path)?; | ||||
|  | ||||
|         // acquire write lock to prevent further writes during snapshot | ||||
|         create_dir_all(&update_path)?; | ||||
|         let db_path = update_path.join("data.mdb"); | ||||
|  | ||||
|         // create db snapshot | ||||
|         self.env.copy_to_path(&db_path, CompactionOption::Enabled)?; | ||||
|  | ||||
|         let update_files_path = update_path.join(UPDATE_DIR); | ||||
|         create_dir_all(&update_files_path)?; | ||||
|  | ||||
|         let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); | ||||
|  | ||||
|         for entry in pendings { | ||||
|             let ((_, _uuid, _), _pending) = entry?; | ||||
|             //if uuids.contains(&uuid) { | ||||
|                 //if let Enqueued { | ||||
|                     //content: Some(uuid), | ||||
|                     //.. | ||||
|                 //} = pending.decode()? | ||||
|                 //{ | ||||
|                     //let path = update_uuid_to_file_path(&self.path, uuid); | ||||
|                     //copy(path, &update_files_path)?; | ||||
|                 //} | ||||
|             //} | ||||
|         } | ||||
|  | ||||
|         let path = &path.as_ref().to_path_buf(); | ||||
|         let handle = &handle; | ||||
|         // Perform the snapshot of each index concurently. Only a third of the capabilities of | ||||
|         // the index actor at a time not to put too much pressure on the index actor | ||||
|         let mut stream = futures::stream::iter(uuids.iter()) | ||||
|             .map(move |uuid| handle.snapshot(*uuid, path.clone())) | ||||
|             .buffer_unordered(CONCURRENT_INDEX_MSG / 3); | ||||
|  | ||||
|         Handle::current().block_on(async { | ||||
|             while let Some(res) = stream.next().await { | ||||
|                 res?; | ||||
|             } | ||||
|             Ok(()) as Result<()> | ||||
|         })?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn get_info(&self) -> Result<UpdateStoreInfo> { | ||||
|         let size = self.env.size(); | ||||
|         let txn = self.env.read_txn()?; | ||||
|         for entry in self.pending_queue.iter(&txn)? { | ||||
|             let (_, _pending) = entry?; | ||||
|             //if let Enqueued { | ||||
|                 //content: Some(uuid), | ||||
|                 //.. | ||||
|             //} = pending | ||||
|             //{ | ||||
|                 //let path = update_uuid_to_file_path(&self.path, uuid); | ||||
|                 //size += File::open(path)?.metadata()?.len(); | ||||
|             //} | ||||
|         } | ||||
|         let processing = match *self.state.read() { | ||||
|             State::Processing(uuid, _) => Some(uuid), | ||||
|             _ => None, | ||||
|         }; | ||||
|  | ||||
|         Ok(UpdateStoreInfo { size, processing }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|     use crate::index_controller::{ | ||||
|         index_actor::{error::IndexActorError, MockIndexActorHandle}, | ||||
|         UpdateResult, | ||||
|     }; | ||||
|  | ||||
|     use futures::future::ok; | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     async fn test_next_id() { | ||||
|         let dir = tempfile::tempdir_in(".").unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         let handle = Arc::new(MockIndexActorHandle::new()); | ||||
|         options.map_size(4096 * 100); | ||||
|         let update_store = UpdateStore::open( | ||||
|             options, | ||||
|             dir.path(), | ||||
|             handle, | ||||
|             Arc::new(AtomicBool::new(false)), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
|         let index1_uuid = Uuid::new_v4(); | ||||
|         let index2_uuid = Uuid::new_v4(); | ||||
|  | ||||
|         let mut txn = update_store.env.write_txn().unwrap(); | ||||
|         let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); | ||||
|         txn.commit().unwrap(); | ||||
|         assert_eq!((0, 0), ids); | ||||
|  | ||||
|         let mut txn = update_store.env.write_txn().unwrap(); | ||||
|         let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap(); | ||||
|         txn.commit().unwrap(); | ||||
|         assert_eq!((1, 0), ids); | ||||
|  | ||||
|         let mut txn = update_store.env.write_txn().unwrap(); | ||||
|         let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); | ||||
|         txn.commit().unwrap(); | ||||
|         assert_eq!((2, 1), ids); | ||||
|     } | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     async fn test_register_update() { | ||||
|         let dir = tempfile::tempdir_in(".").unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         let handle = Arc::new(MockIndexActorHandle::new()); | ||||
|         options.map_size(4096 * 100); | ||||
|         let update_store = UpdateStore::open( | ||||
|             options, | ||||
|             dir.path(), | ||||
|             handle, | ||||
|             Arc::new(AtomicBool::new(false)), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         let meta = UpdateMeta::ClearDocuments; | ||||
|         let uuid = Uuid::new_v4(); | ||||
|         let store_clone = update_store.clone(); | ||||
|         tokio::task::spawn_blocking(move || { | ||||
|             store_clone.register_update(meta, None, uuid).unwrap(); | ||||
|         }) | ||||
|         .await | ||||
|         .unwrap(); | ||||
|  | ||||
|         let txn = update_store.env.read_txn().unwrap(); | ||||
|         assert!(update_store | ||||
|             .pending_queue | ||||
|             .get(&txn, &(0, uuid, 0)) | ||||
|             .unwrap() | ||||
|             .is_some()); | ||||
|     } | ||||
|  | ||||
|     #[actix_rt::test] | ||||
|     async fn test_process_update() { | ||||
|         let dir = tempfile::tempdir_in(".").unwrap(); | ||||
|         let mut handle = MockIndexActorHandle::new(); | ||||
|  | ||||
|         handle | ||||
|             .expect_update() | ||||
|             .times(2) | ||||
|             .returning(|_index_uuid, processing, _file| { | ||||
|                 if processing.id() == 0 { | ||||
|                     Box::pin(ok(Ok(processing.process(UpdateResult::Other)))) | ||||
|                 } else { | ||||
|                     Box::pin(ok(Err( | ||||
|                         processing.fail(IndexActorError::ExistingPrimaryKey.into()) | ||||
|                     ))) | ||||
|                 } | ||||
|             }); | ||||
|  | ||||
|         let handle = Arc::new(handle); | ||||
|  | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(4096 * 100); | ||||
|         let store = UpdateStore::open( | ||||
|             options, | ||||
|             dir.path(), | ||||
|             handle.clone(), | ||||
|             Arc::new(AtomicBool::new(false)), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
|         // wait a bit for the event loop exit. | ||||
|         tokio::time::sleep(std::time::Duration::from_millis(50)).await; | ||||
|  | ||||
|         let mut txn = store.env.write_txn().unwrap(); | ||||
|  | ||||
|         let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None); | ||||
|         let uuid = Uuid::new_v4(); | ||||
|  | ||||
|         store | ||||
|             .pending_queue | ||||
|             .put(&mut txn, &(0, uuid, 0), &update) | ||||
|             .unwrap(); | ||||
|  | ||||
|         let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None); | ||||
|  | ||||
|         store | ||||
|             .pending_queue | ||||
|             .put(&mut txn, &(1, uuid, 1), &update) | ||||
|             .unwrap(); | ||||
|  | ||||
|         txn.commit().unwrap(); | ||||
|  | ||||
|         // Process the pending, and check that it has been moved to the update databases, and | ||||
|         // removed from the pending database. | ||||
|         let store_clone = store.clone(); | ||||
|         tokio::task::spawn_blocking(move || { | ||||
|             store_clone.process_pending_update(handle.clone()).unwrap(); | ||||
|             store_clone.process_pending_update(handle).unwrap(); | ||||
|         }) | ||||
|         .await | ||||
|         .unwrap(); | ||||
|  | ||||
|         let txn = store.env.read_txn().unwrap(); | ||||
|  | ||||
|         assert!(store.pending_queue.first(&txn).unwrap().is_none()); | ||||
|         let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap(); | ||||
|  | ||||
|         assert!(matches!(update, UpdateStatus::Processed(_))); | ||||
|         let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap(); | ||||
|  | ||||
|         assert!(matches!(update, UpdateStatus::Failed(_))); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										63
									
								
								meilisearch-lib/src/index_controller/update_file_store.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								meilisearch-lib/src/index_controller/update_file_store.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| use std::fs::File; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::ops::{Deref, DerefMut}; | ||||
|  | ||||
| use tempfile::NamedTempFile; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::Result; | ||||
|  | ||||
| pub struct UpdateFile { | ||||
|     path: PathBuf, | ||||
|     file: NamedTempFile, | ||||
| } | ||||
|  | ||||
| impl UpdateFile { | ||||
|     pub fn persist(self) { | ||||
|         println!("persisting in {}", self.path.display()); | ||||
|         self.file.persist(&self.path).unwrap(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Deref for UpdateFile { | ||||
|     type Target = NamedTempFile; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.file | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DerefMut for UpdateFile { | ||||
|     fn deref_mut(&mut self) -> &mut Self::Target { | ||||
|         &mut self.file | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug)] | ||||
| pub struct UpdateFileStore { | ||||
|     path: PathBuf, | ||||
| } | ||||
|  | ||||
| impl UpdateFileStore { | ||||
|     pub fn new(path: impl AsRef<Path>) -> Result<Self> { | ||||
|         let path = path.as_ref().join("updates/updates_files"); | ||||
|         std::fs::create_dir_all(&path).unwrap(); | ||||
|         Ok(Self { path }) | ||||
|     } | ||||
|  | ||||
|     pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { | ||||
|         let file  = NamedTempFile::new().unwrap(); | ||||
|         let uuid = Uuid::new_v4(); | ||||
|         let path = self.path.join(uuid.to_string()); | ||||
|         let update_file = UpdateFile { file, path }; | ||||
|  | ||||
|         Ok((uuid, update_file)) | ||||
|     } | ||||
|  | ||||
|     pub fn get_update(&self, uuid: Uuid) -> Result<File> { | ||||
|         let path = self.path.join(uuid.to_string()); | ||||
|         println!("reading in {}", path.display()); | ||||
|         let file = File::open(path).unwrap(); | ||||
|         Ok(file) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										250
									
								
								meilisearch-lib/src/index_controller/updates.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										250
									
								
								meilisearch-lib/src/index_controller/updates.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,250 @@ | ||||
| use std::{error::Error, fmt::Display}; | ||||
|  | ||||
| use chrono::{DateTime, Utc}; | ||||
|  | ||||
| use meilisearch_error::{Code, ErrorCode}; | ||||
| use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::index::{Settings, Unchecked}; | ||||
|  | ||||
| use super::update_actor::RegisterUpdate; | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub enum UpdateResult { | ||||
|     DocumentsAddition(DocumentAdditionResult), | ||||
|     DocumentDeletion { deleted: u64 }, | ||||
|     Other, | ||||
| } | ||||
|  | ||||
| #[allow(clippy::large_enum_variant)] | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| #[serde(tag = "type")] | ||||
| pub enum UpdateMeta { | ||||
|     DocumentsAddition { | ||||
|         method: IndexDocumentsMethod, | ||||
|         primary_key: Option<String>, | ||||
|     }, | ||||
|     ClearDocuments, | ||||
|     DeleteDocuments { | ||||
|         ids: Vec<String>, | ||||
|     }, | ||||
|     Settings(Settings<Unchecked>), | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Enqueued { | ||||
|     pub update_id: u64, | ||||
|     pub meta: RegisterUpdate, | ||||
|     pub enqueued_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl Enqueued { | ||||
|     pub fn new(meta: RegisterUpdate, update_id: u64) -> Self { | ||||
|         Self { | ||||
|             enqueued_at: Utc::now(), | ||||
|             meta, | ||||
|             update_id, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn processing(self) -> Processing { | ||||
|         Processing { | ||||
|             from: self, | ||||
|             started_processing_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn abort(self) -> Aborted { | ||||
|         Aborted { | ||||
|             from: self, | ||||
|             aborted_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &RegisterUpdate { | ||||
|         &self.meta | ||||
|     } | ||||
|  | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.update_id | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Processed { | ||||
|     pub success: UpdateResult, | ||||
|     pub processed_at: DateTime<Utc>, | ||||
|     #[serde(flatten)] | ||||
|     pub from: Processing, | ||||
| } | ||||
|  | ||||
| impl Processed { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &RegisterUpdate { | ||||
|         self.from.meta() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Processing { | ||||
|     #[serde(flatten)] | ||||
|     pub from: Enqueued, | ||||
|     pub started_processing_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl Processing { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &RegisterUpdate { | ||||
|         self.from.meta() | ||||
|     } | ||||
|  | ||||
|     pub fn process(self, success: UpdateResult) -> Processed { | ||||
|         Processed { | ||||
|             success, | ||||
|             from: self, | ||||
|             processed_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn fail(self, error: impl ErrorCode) -> Failed { | ||||
|         let msg = error.to_string(); | ||||
|         let code = error.error_code(); | ||||
|         Failed { | ||||
|             from: self, | ||||
|             msg, | ||||
|             code, | ||||
|             failed_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Aborted { | ||||
|     #[serde(flatten)] | ||||
|     from: Enqueued, | ||||
|     aborted_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl Aborted { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &RegisterUpdate { | ||||
|         self.from.meta() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Failed { | ||||
|     #[serde(flatten)] | ||||
|     pub from: Processing, | ||||
|     pub msg: String, | ||||
|     pub code: Code, | ||||
|     pub failed_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl Display for Failed { | ||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|         self.msg.fmt(f) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Error for Failed { } | ||||
|  | ||||
| impl ErrorCode for Failed { | ||||
|     fn error_code(&self) -> Code { | ||||
|         self.code | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Failed { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &RegisterUpdate { | ||||
|         self.from.meta() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| #[serde(tag = "status", rename_all = "camelCase")] | ||||
| pub enum UpdateStatus { | ||||
|     Processing(Processing), | ||||
|     Enqueued(Enqueued), | ||||
|     Processed(Processed), | ||||
|     Aborted(Aborted), | ||||
|     Failed(Failed), | ||||
| } | ||||
|  | ||||
| impl UpdateStatus { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         match self { | ||||
|             UpdateStatus::Processing(u) => u.id(), | ||||
|             UpdateStatus::Enqueued(u) => u.id(), | ||||
|             UpdateStatus::Processed(u) => u.id(), | ||||
|             UpdateStatus::Aborted(u) => u.id(), | ||||
|             UpdateStatus::Failed(u) => u.id(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &RegisterUpdate { | ||||
|         match self { | ||||
|             UpdateStatus::Processing(u) => u.meta(), | ||||
|             UpdateStatus::Enqueued(u) => u.meta(), | ||||
|             UpdateStatus::Processed(u) => u.meta(), | ||||
|             UpdateStatus::Aborted(u) => u.meta(), | ||||
|             UpdateStatus::Failed(u) => u.meta(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn processed(&self) -> Option<&Processed> { | ||||
|         match self { | ||||
|             UpdateStatus::Processed(p) => Some(p), | ||||
|             _ => None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<Enqueued> for UpdateStatus { | ||||
|     fn from(other: Enqueued) -> Self { | ||||
|         Self::Enqueued(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<Aborted> for UpdateStatus { | ||||
|     fn from(other: Aborted) -> Self { | ||||
|         Self::Aborted(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<Processed> for UpdateStatus { | ||||
|     fn from(other: Processed) -> Self { | ||||
|         Self::Processed(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<Processing> for UpdateStatus { | ||||
|     fn from(other: Processing) -> Self { | ||||
|         Self::Processing(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<Failed> for UpdateStatus { | ||||
|     fn from(other: Failed) -> Self { | ||||
|         Self::Failed(other) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										98
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/actor.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/actor.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| use std::{collections::HashSet, path::PathBuf}; | ||||
|  | ||||
| use log::{trace, warn}; | ||||
| use tokio::sync::mpsc; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::{error::UuidResolverError, Result, UuidResolveMsg, UuidStore}; | ||||
|  | ||||
| pub struct UuidResolverActor<S> { | ||||
|     inbox: mpsc::Receiver<UuidResolveMsg>, | ||||
|     store: S, | ||||
| } | ||||
|  | ||||
| impl<S: UuidStore> UuidResolverActor<S> { | ||||
|     pub fn new(inbox: mpsc::Receiver<UuidResolveMsg>, store: S) -> Self { | ||||
|         Self { inbox, store } | ||||
|     } | ||||
|  | ||||
|     pub async fn run(mut self) { | ||||
|         use UuidResolveMsg::*; | ||||
|  | ||||
|         trace!("uuid resolver started"); | ||||
|  | ||||
|         loop { | ||||
|             match self.inbox.recv().await { | ||||
|                 Some(Get { uid: name, ret }) => { | ||||
|                     let _ = ret.send(self.handle_get(name).await); | ||||
|                 } | ||||
|                 Some(Delete { uid: name, ret }) => { | ||||
|                     let _ = ret.send(self.handle_delete(name).await); | ||||
|                 } | ||||
|                 Some(List { ret }) => { | ||||
|                     let _ = ret.send(self.handle_list().await); | ||||
|                 } | ||||
|                 Some(Insert { ret, uuid, name }) => { | ||||
|                     let _ = ret.send(self.handle_insert(name, uuid).await); | ||||
|                 } | ||||
|                 Some(SnapshotRequest { path, ret }) => { | ||||
|                     let _ = ret.send(self.handle_snapshot(path).await); | ||||
|                 } | ||||
|                 Some(GetSize { ret }) => { | ||||
|                     let _ = ret.send(self.handle_get_size().await); | ||||
|                 } | ||||
|                 Some(DumpRequest { path, ret }) => { | ||||
|                     let _ = ret.send(self.handle_dump(path).await); | ||||
|                 } | ||||
|                 // all senders have been dropped, need to quit. | ||||
|                 None => break, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         warn!("exiting uuid resolver loop"); | ||||
|     } | ||||
|  | ||||
|     async fn handle_get(&self, uid: String) -> Result<Uuid> { | ||||
|         self.store | ||||
|             .get_uuid(uid.clone()) | ||||
|             .await? | ||||
|             .ok_or(UuidResolverError::UnexistingIndex(uid)) | ||||
|     } | ||||
|  | ||||
|     async fn handle_delete(&self, uid: String) -> Result<Uuid> { | ||||
|         self.store | ||||
|             .delete(uid.clone()) | ||||
|             .await? | ||||
|             .ok_or(UuidResolverError::UnexistingIndex(uid)) | ||||
|     } | ||||
|  | ||||
|     async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> { | ||||
|         let result = self.store.list().await?; | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     async fn handle_snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         self.store.snapshot(path).await | ||||
|     } | ||||
|  | ||||
|     async fn handle_dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         self.store.dump(path).await | ||||
|     } | ||||
|  | ||||
|     async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> { | ||||
|         if !is_index_uid_valid(&uid) { | ||||
|             return Err(UuidResolverError::BadlyFormatted(uid)); | ||||
|         } | ||||
|         self.store.insert(uid, uuid).await?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     async fn handle_get_size(&self) -> Result<u64> { | ||||
|         self.store.get_size().await | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn is_index_uid_valid(uid: &str) -> bool { | ||||
|     uid.chars() | ||||
|         .all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') | ||||
| } | ||||
							
								
								
									
										34
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| use meilisearch_error::{Code, ErrorCode}; | ||||
|  | ||||
| pub type Result<T> = std::result::Result<T, UuidResolverError>; | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| pub enum UuidResolverError { | ||||
|     #[error("Index already exists.")] | ||||
|     NameAlreadyExist, | ||||
|     #[error("Index \"{0}\" not found.")] | ||||
|     UnexistingIndex(String), | ||||
|     #[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")] | ||||
|     BadlyFormatted(String), | ||||
|     #[error("Internal error: {0}")] | ||||
|     Internal(Box<dyn std::error::Error + Sync + Send + 'static>), | ||||
| } | ||||
|  | ||||
| internal_error!( | ||||
|     UuidResolverError: heed::Error, | ||||
|     uuid::Error, | ||||
|     std::io::Error, | ||||
|     tokio::task::JoinError, | ||||
|     serde_json::Error | ||||
| ); | ||||
|  | ||||
| impl ErrorCode for UuidResolverError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             UuidResolverError::NameAlreadyExist => Code::IndexAlreadyExists, | ||||
|             UuidResolverError::UnexistingIndex(_) => Code::IndexNotFound, | ||||
|             UuidResolverError::BadlyFormatted(_) => Code::InvalidIndexUid, | ||||
|             UuidResolverError::Internal(_) => Code::Internal, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,87 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::path::{Path, PathBuf}; | ||||
|  | ||||
| use tokio::sync::{mpsc, oneshot}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::{HeedUuidStore, Result, UuidResolveMsg, UuidResolverActor, UuidResolverHandle}; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct UuidResolverHandleImpl { | ||||
|     sender: mpsc::Sender<UuidResolveMsg>, | ||||
| } | ||||
|  | ||||
| impl UuidResolverHandleImpl { | ||||
|     pub fn new(path: impl AsRef<Path>) -> Result<Self> { | ||||
|         let (sender, reveiver) = mpsc::channel(100); | ||||
|         let store = HeedUuidStore::new(path)?; | ||||
|         let actor = UuidResolverActor::new(reveiver, store); | ||||
|         tokio::spawn(actor.run()); | ||||
|         Ok(Self { sender }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| impl UuidResolverHandle for UuidResolverHandleImpl { | ||||
|     async fn get(&self, name: String) -> Result<Uuid> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UuidResolveMsg::Get { uid: name, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver | ||||
|             .await | ||||
|             .expect("Uuid resolver actor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn delete(&self, name: String) -> Result<Uuid> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UuidResolveMsg::Delete { uid: name, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver | ||||
|             .await | ||||
|             .expect("Uuid resolver actor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn list(&self) -> Result<Vec<(String, Uuid)>> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UuidResolveMsg::List { ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver | ||||
|             .await | ||||
|             .expect("Uuid resolver actor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn insert(&self, name: String, uuid: Uuid) -> Result<()> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UuidResolveMsg::Insert { ret, name, uuid }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver | ||||
|             .await | ||||
|             .expect("Uuid resolver actor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UuidResolveMsg::SnapshotRequest { path, ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver | ||||
|             .await | ||||
|             .expect("Uuid resolver actor has been killed")?) | ||||
|     } | ||||
|  | ||||
|     async fn get_size(&self) -> Result<u64> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UuidResolveMsg::GetSize { ret }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver | ||||
|             .await | ||||
|             .expect("Uuid resolver actor has been killed")?) | ||||
|     } | ||||
|     async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         let (ret, receiver) = oneshot::channel(); | ||||
|         let msg = UuidResolveMsg::DumpRequest { ret, path }; | ||||
|         let _ = self.sender.send(msg).await; | ||||
|         Ok(receiver | ||||
|             .await | ||||
|             .expect("Uuid resolver actor has been killed")?) | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,37 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::path::PathBuf; | ||||
|  | ||||
| use tokio::sync::oneshot; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::Result; | ||||
|  | ||||
| pub enum UuidResolveMsg { | ||||
|     Get { | ||||
|         uid: String, | ||||
|         ret: oneshot::Sender<Result<Uuid>>, | ||||
|     }, | ||||
|     Delete { | ||||
|         uid: String, | ||||
|         ret: oneshot::Sender<Result<Uuid>>, | ||||
|     }, | ||||
|     List { | ||||
|         ret: oneshot::Sender<Result<Vec<(String, Uuid)>>>, | ||||
|     }, | ||||
|     Insert { | ||||
|         uuid: Uuid, | ||||
|         name: String, | ||||
|         ret: oneshot::Sender<Result<()>>, | ||||
|     }, | ||||
|     SnapshotRequest { | ||||
|         path: PathBuf, | ||||
|         ret: oneshot::Sender<Result<HashSet<Uuid>>>, | ||||
|     }, | ||||
|     GetSize { | ||||
|         ret: oneshot::Sender<Result<u64>>, | ||||
|     }, | ||||
|     DumpRequest { | ||||
|         path: PathBuf, | ||||
|         ret: oneshot::Sender<Result<HashSet<Uuid>>>, | ||||
|     }, | ||||
| } | ||||
							
								
								
									
										35
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| mod actor; | ||||
| pub mod error; | ||||
| mod handle_impl; | ||||
| mod message; | ||||
| pub mod store; | ||||
|  | ||||
| use std::collections::HashSet; | ||||
| use std::path::PathBuf; | ||||
|  | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use actor::UuidResolverActor; | ||||
| use error::Result; | ||||
| use message::UuidResolveMsg; | ||||
| use store::UuidStore; | ||||
|  | ||||
| #[cfg(test)] | ||||
| use mockall::automock; | ||||
|  | ||||
| pub use handle_impl::UuidResolverHandleImpl; | ||||
| pub use store::HeedUuidStore; | ||||
|  | ||||
| const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| #[cfg_attr(test, automock)] | ||||
| pub trait UuidResolverHandle { | ||||
|     async fn get(&self, name: String) -> Result<Uuid>; | ||||
|     async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; | ||||
|     async fn delete(&self, name: String) -> Result<Uuid>; | ||||
|     async fn list(&self) -> Result<Vec<(String, Uuid)>>; | ||||
|     async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>; | ||||
|     async fn get_size(&self) -> Result<u64>; | ||||
|     async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>; | ||||
| } | ||||
							
								
								
									
										224
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/store.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										224
									
								
								meilisearch-lib/src/index_controller/uuid_resolver/store.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,224 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::fs::{create_dir_all, File}; | ||||
| use std::io::{BufRead, BufReader, Write}; | ||||
| use std::path::{Path, PathBuf}; | ||||
|  | ||||
| use heed::types::{ByteSlice, Str}; | ||||
| use heed::{CompactionOption, Database, Env, EnvOpenOptions}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::{error::UuidResolverError, Result, UUID_STORE_SIZE}; | ||||
| use crate::EnvSizer; | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| struct DumpEntry { | ||||
|     uuid: Uuid, | ||||
|     uid: String, | ||||
| } | ||||
|  | ||||
| const UUIDS_DB_PATH: &str = "index_uuids"; | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| pub trait UuidStore: Sized { | ||||
|     // Create a new entry for `name`. Return an error if `err` and the entry already exists, return | ||||
|     // the uuid otherwise. | ||||
|     async fn get_uuid(&self, uid: String) -> Result<Option<Uuid>>; | ||||
|     async fn delete(&self, uid: String) -> Result<Option<Uuid>>; | ||||
|     async fn list(&self) -> Result<Vec<(String, Uuid)>>; | ||||
|     async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; | ||||
|     async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>; | ||||
|     async fn get_size(&self) -> Result<u64>; | ||||
|     async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>; | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct HeedUuidStore { | ||||
|     env: Env, | ||||
|     db: Database<Str, ByteSlice>, | ||||
| } | ||||
|  | ||||
| impl HeedUuidStore { | ||||
|     pub fn new(path: impl AsRef<Path>) -> Result<Self> { | ||||
|         let path = path.as_ref().join(UUIDS_DB_PATH); | ||||
|         create_dir_all(&path)?; | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(UUID_STORE_SIZE); // 1GB | ||||
|         let env = options.open(path)?; | ||||
|         let db = env.create_database(None)?; | ||||
|         Ok(Self { env, db }) | ||||
|     } | ||||
|  | ||||
|     pub fn get_uuid(&self, name: String) -> Result<Option<Uuid>> { | ||||
|         let env = self.env.clone(); | ||||
|         let db = self.db; | ||||
|         let txn = env.read_txn()?; | ||||
|         match db.get(&txn, &name)? { | ||||
|             Some(uuid) => { | ||||
|                 let uuid = Uuid::from_slice(uuid)?; | ||||
|                 Ok(Some(uuid)) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn delete(&self, uid: String) -> Result<Option<Uuid>> { | ||||
|         let env = self.env.clone(); | ||||
|         let db = self.db; | ||||
|         let mut txn = env.write_txn()?; | ||||
|         match db.get(&txn, &uid)? { | ||||
|             Some(uuid) => { | ||||
|                 let uuid = Uuid::from_slice(uuid)?; | ||||
|                 db.delete(&mut txn, &uid)?; | ||||
|                 txn.commit()?; | ||||
|                 Ok(Some(uuid)) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn list(&self) -> Result<Vec<(String, Uuid)>> { | ||||
|         let env = self.env.clone(); | ||||
|         let db = self.db; | ||||
|         let txn = env.read_txn()?; | ||||
|         let mut entries = Vec::new(); | ||||
|         for entry in db.iter(&txn)? { | ||||
|             let (name, uuid) = entry?; | ||||
|             let uuid = Uuid::from_slice(uuid)?; | ||||
|             entries.push((name.to_owned(), uuid)) | ||||
|         } | ||||
|         Ok(entries) | ||||
|     } | ||||
|  | ||||
|     pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> { | ||||
|         let env = self.env.clone(); | ||||
|         let db = self.db; | ||||
|         let mut txn = env.write_txn()?; | ||||
|  | ||||
|         if db.get(&txn, &name)?.is_some() { | ||||
|             return Err(UuidResolverError::NameAlreadyExist); | ||||
|         } | ||||
|  | ||||
|         db.put(&mut txn, &name, uuid.as_bytes())?; | ||||
|         txn.commit()?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         let env = self.env.clone(); | ||||
|         let db = self.db; | ||||
|         // Write transaction to acquire a lock on the database. | ||||
|         let txn = env.write_txn()?; | ||||
|         let mut entries = HashSet::new(); | ||||
|         for entry in db.iter(&txn)? { | ||||
|             let (_, uuid) = entry?; | ||||
|             let uuid = Uuid::from_slice(uuid)?; | ||||
|             entries.insert(uuid); | ||||
|         } | ||||
|  | ||||
|         // only perform snapshot if there are indexes | ||||
|         if !entries.is_empty() { | ||||
|             path.push(UUIDS_DB_PATH); | ||||
|             create_dir_all(&path).unwrap(); | ||||
|             path.push("data.mdb"); | ||||
|             env.copy_to_path(path, CompactionOption::Enabled)?; | ||||
|         } | ||||
|         Ok(entries) | ||||
|     } | ||||
|  | ||||
|     pub fn get_size(&self) -> Result<u64> { | ||||
|         Ok(self.env.size()) | ||||
|     } | ||||
|  | ||||
|     pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         let dump_path = path.join(UUIDS_DB_PATH); | ||||
|         create_dir_all(&dump_path)?; | ||||
|         let dump_file_path = dump_path.join("data.jsonl"); | ||||
|         let mut dump_file = File::create(&dump_file_path)?; | ||||
|         let mut uuids = HashSet::new(); | ||||
|  | ||||
|         let txn = self.env.read_txn()?; | ||||
|         for entry in self.db.iter(&txn)? { | ||||
|             let (uid, uuid) = entry?; | ||||
|             let uid = uid.to_string(); | ||||
|             let uuid = Uuid::from_slice(uuid)?; | ||||
|  | ||||
|             let entry = DumpEntry { uuid, uid }; | ||||
|             serde_json::to_writer(&mut dump_file, &entry)?; | ||||
|             dump_file.write_all(b"\n").unwrap(); | ||||
|  | ||||
|             uuids.insert(uuid); | ||||
|         } | ||||
|  | ||||
|         Ok(uuids) | ||||
|     } | ||||
|  | ||||
|     pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { | ||||
|         let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH); | ||||
|         std::fs::create_dir_all(&uuid_resolver_path)?; | ||||
|  | ||||
|         let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); | ||||
|         let indexes = File::open(&src_indexes)?; | ||||
|         let mut indexes = BufReader::new(indexes); | ||||
|         let mut line = String::new(); | ||||
|  | ||||
|         let db = Self::new(dst)?; | ||||
|         let mut txn = db.env.write_txn()?; | ||||
|  | ||||
|         loop { | ||||
|             match indexes.read_line(&mut line) { | ||||
|                 Ok(0) => break, | ||||
|                 Ok(_) => { | ||||
|                     let DumpEntry { uuid, uid } = serde_json::from_str(&line)?; | ||||
|                     println!("importing {} {}", uid, uuid); | ||||
|                     db.db.put(&mut txn, &uid, uuid.as_bytes())?; | ||||
|                 } | ||||
|                 Err(e) => return Err(e.into()), | ||||
|             } | ||||
|  | ||||
|             line.clear(); | ||||
|         } | ||||
|         txn.commit()?; | ||||
|  | ||||
|         db.env.prepare_for_closing().wait(); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| impl UuidStore for HeedUuidStore { | ||||
|     async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> { | ||||
|         let this = self.clone(); | ||||
|         tokio::task::spawn_blocking(move || this.get_uuid(name)).await? | ||||
|     } | ||||
|  | ||||
|     async fn delete(&self, uid: String) -> Result<Option<Uuid>> { | ||||
|         let this = self.clone(); | ||||
|         tokio::task::spawn_blocking(move || this.delete(uid)).await? | ||||
|     } | ||||
|  | ||||
|     async fn list(&self) -> Result<Vec<(String, Uuid)>> { | ||||
|         let this = self.clone(); | ||||
|         tokio::task::spawn_blocking(move || this.list()).await? | ||||
|     } | ||||
|  | ||||
|     async fn insert(&self, name: String, uuid: Uuid) -> Result<()> { | ||||
|         let this = self.clone(); | ||||
|         tokio::task::spawn_blocking(move || this.insert(name, uuid)).await? | ||||
|     } | ||||
|  | ||||
|     async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         let this = self.clone(); | ||||
|         tokio::task::spawn_blocking(move || this.snapshot(path)).await? | ||||
|     } | ||||
|  | ||||
|     async fn get_size(&self) -> Result<u64> { | ||||
|         self.get_size() | ||||
|     } | ||||
|  | ||||
|     async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> { | ||||
|         let this = self.clone(); | ||||
|         tokio::task::spawn_blocking(move || this.dump(path)).await? | ||||
|     } | ||||
| } | ||||
							
								
								
									
										53
									
								
								meilisearch-lib/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								meilisearch-lib/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| #[macro_use] | ||||
| pub mod error; | ||||
| pub mod options; | ||||
|  | ||||
| pub mod index; | ||||
| pub mod index_controller; | ||||
|  | ||||
| pub use index_controller::{UpdateResult, UpdateStatus, IndexController as MeiliSearch, update_actor::RegisterUpdate}; | ||||
|  | ||||
| use walkdir::WalkDir; | ||||
|  | ||||
| pub trait EnvSizer { | ||||
|     fn size(&self) -> u64; | ||||
| } | ||||
|  | ||||
| impl EnvSizer for heed::Env { | ||||
|     fn size(&self) -> u64 { | ||||
|         WalkDir::new(self.path()) | ||||
|             .into_iter() | ||||
|             .filter_map(|entry| entry.ok()) | ||||
|             .filter_map(|entry| entry.metadata().ok()) | ||||
|             .filter(|metadata| metadata.is_file()) | ||||
|             .fold(0, |acc, m| acc + m.len()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| use std::fs::{create_dir_all, File}; | ||||
| use std::io::Write; | ||||
| use std::path::Path; | ||||
|  | ||||
| use flate2::{read::GzDecoder, write::GzEncoder, Compression}; | ||||
| use tar::{Archive, Builder}; | ||||
|  | ||||
| pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> { | ||||
|     let mut f = File::create(dest)?; | ||||
|     let gz_encoder = GzEncoder::new(&mut f, Compression::default()); | ||||
|     let mut tar_encoder = Builder::new(gz_encoder); | ||||
|     tar_encoder.append_dir_all(".", src)?; | ||||
|     let gz_encoder = tar_encoder.into_inner()?; | ||||
|     gz_encoder.finish()?; | ||||
|     f.flush()?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> { | ||||
|     let f = File::open(&src)?; | ||||
|     let gz = GzDecoder::new(f); | ||||
|     let mut ar = Archive::new(gz); | ||||
|     create_dir_all(&dest)?; | ||||
|     ar.unpack(&dest)?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
							
								
								
									
										115
									
								
								meilisearch-lib/src/options.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								meilisearch-lib/src/options.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,115 @@ | ||||
| use core::fmt; | ||||
| use std::{ops::Deref, str::FromStr}; | ||||
|  | ||||
| use byte_unit::{Byte, ByteError}; | ||||
| use milli::CompressionType; | ||||
| use structopt::StructOpt; | ||||
| use sysinfo::{RefreshKind, System, SystemExt}; | ||||
|  | ||||
| #[derive(Debug, Clone, StructOpt)] | ||||
| pub struct IndexerOpts { | ||||
|     /// The amount of documents to skip before printing | ||||
|     /// a log regarding the indexing advancement. | ||||
|     #[structopt(long, default_value = "100000")] // 100k | ||||
|     pub log_every_n: usize, | ||||
|  | ||||
|     /// Grenad max number of chunks in bytes. | ||||
|     #[structopt(long)] | ||||
|     pub max_nb_chunks: Option<usize>, | ||||
|  | ||||
|     /// The maximum amount of memory the indexer will use. It defaults to 2/3 | ||||
|     /// of the available memory. It is recommended to use something like 80%-90% | ||||
|     /// of the available memory, no more. | ||||
|     /// | ||||
|     /// In case the engine is unable to retrieve the available memory the engine will | ||||
|     /// try to use the memory it needs but without real limit, this can lead to | ||||
|     /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. | ||||
|     #[structopt(long, default_value)] | ||||
|     pub max_memory: MaxMemory, | ||||
|  | ||||
|     /// The name of the compression algorithm to use when compressing intermediate | ||||
|     /// Grenad chunks while indexing documents. | ||||
|     /// | ||||
|     /// Choosing a fast algorithm will make the indexing faster but may consume more memory. | ||||
|     #[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] | ||||
|     pub chunk_compression_type: CompressionType, | ||||
|  | ||||
|     /// The level of compression of the chosen algorithm. | ||||
|     #[structopt(long, requires = "chunk-compression-type")] | ||||
|     pub chunk_compression_level: Option<u32>, | ||||
|  | ||||
|     /// Number of parallel jobs for indexing, defaults to # of CPUs. | ||||
|     #[structopt(long)] | ||||
|     pub indexing_jobs: Option<usize>, | ||||
| } | ||||
|  | ||||
| impl Default for IndexerOpts { | ||||
|     fn default() -> Self { | ||||
|         Self { | ||||
|             log_every_n: 100_000, | ||||
|             max_nb_chunks: None, | ||||
|             max_memory: MaxMemory::default(), | ||||
|             chunk_compression_type: CompressionType::None, | ||||
|             chunk_compression_level: None, | ||||
|             indexing_jobs: None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A type used to detect the max memory available and use 2/3 of it. | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct MaxMemory(Option<Byte>); | ||||
|  | ||||
| impl FromStr for MaxMemory { | ||||
|     type Err = ByteError; | ||||
|  | ||||
|     fn from_str(s: &str) -> Result<MaxMemory, ByteError> { | ||||
|         Byte::from_str(s).map(Some).map(MaxMemory) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Default for MaxMemory { | ||||
|     fn default() -> MaxMemory { | ||||
|         MaxMemory( | ||||
|             total_memory_bytes() | ||||
|                 .map(|bytes| bytes * 2 / 3) | ||||
|                 .map(Byte::from_bytes), | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for MaxMemory { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         match self.0 { | ||||
|             Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), | ||||
|             None => f.write_str("unknown"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Deref for MaxMemory { | ||||
|     type Target = Option<Byte>; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl MaxMemory { | ||||
|     pub fn unlimited() -> Self { | ||||
|         Self(None) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Returns the total amount of bytes available or `None` if this system isn't supported. | ||||
| fn total_memory_bytes() -> Option<u64> { | ||||
|     if System::IS_SUPPORTED { | ||||
|         let memory_kind = RefreshKind::new().with_memory(); | ||||
|         let mut system = System::new_with_specifics(memory_kind); | ||||
|         system.refresh_memory(); | ||||
|         Some(system.total_memory() * 1024) // KiB into bytes | ||||
|     } else { | ||||
|         None | ||||
|     } | ||||
| } | ||||
|  | ||||
		Reference in New Issue
	
	Block a user