mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	create workspace with meilisearch-error
This commit is contained in:
		
							
								
								
									
										137
									
								
								meilisearch-http/src/analytics.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								meilisearch-http/src/analytics.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | ||||
| use std::hash::{Hash, Hasher}; | ||||
| use std::{error, thread}; | ||||
| use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; | ||||
|  | ||||
| use log::error; | ||||
| use serde::Serialize; | ||||
| use serde_qs as qs; | ||||
| use siphasher::sip::SipHasher; | ||||
| use walkdir::WalkDir; | ||||
|  | ||||
| use crate::Data; | ||||
| use crate::Opt; | ||||
|  | ||||
| const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47"; | ||||
|  | ||||
| #[derive(Debug, Serialize)] | ||||
| struct EventProperties { | ||||
|     database_size: u64, | ||||
|     last_update_timestamp: Option<i64>, //timestamp | ||||
|     number_of_documents: Vec<u64>, | ||||
| } | ||||
|  | ||||
| impl EventProperties { | ||||
|     fn from(data: Data) -> Result<EventProperties, Box<dyn error::Error>> { | ||||
|         let mut index_list = Vec::new(); | ||||
|  | ||||
|         let reader = data.db.main_read_txn()?; | ||||
|  | ||||
|         for index_uid in data.db.indexes_uids() { | ||||
|             if let Some(index) = data.db.open_index(&index_uid) { | ||||
|                 let number_of_documents = index.main.number_of_documents(&reader)?; | ||||
|                 index_list.push(number_of_documents); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let database_size = WalkDir::new(&data.db_path) | ||||
|             .into_iter() | ||||
|             .filter_map(|entry| entry.ok()) | ||||
|             .filter_map(|entry| entry.metadata().ok()) | ||||
|             .filter(|metadata| metadata.is_file()) | ||||
|             .fold(0, |acc, m| acc + m.len()); | ||||
|  | ||||
|         let last_update_timestamp = data.db.last_update(&reader)?.map(|u| u.timestamp()); | ||||
|  | ||||
|         Ok(EventProperties { | ||||
|             database_size, | ||||
|             last_update_timestamp, | ||||
|             number_of_documents: index_list, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize)] | ||||
| struct UserProperties<'a> { | ||||
|     env: &'a str, | ||||
|     start_since_days: u64, | ||||
|     user_email: Option<String>, | ||||
|     server_provider: Option<String>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize)] | ||||
| struct Event<'a> { | ||||
|     user_id: &'a str, | ||||
|     event_type: &'a str, | ||||
|     device_id: &'a str, | ||||
|     time: u64, | ||||
|     app_version: &'a str, | ||||
|     user_properties: UserProperties<'a>, | ||||
|     event_properties: Option<EventProperties>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize)] | ||||
| struct AmplitudeRequest<'a> { | ||||
|     api_key: &'a str, | ||||
|     event: &'a str, | ||||
| } | ||||
|  | ||||
| pub fn analytics_sender(data: Data, opt: Opt) { | ||||
|     let username = whoami::username(); | ||||
|     let hostname = whoami::hostname(); | ||||
|     let platform = whoami::platform(); | ||||
|  | ||||
|     let uid = username + &hostname + &platform.to_string(); | ||||
|  | ||||
|     let mut hasher = SipHasher::new(); | ||||
|     uid.hash(&mut hasher); | ||||
|     let hash = hasher.finish(); | ||||
|  | ||||
|     let uid = format!("{:X}", hash); | ||||
|     let platform = platform.to_string(); | ||||
|     let first_start = Instant::now(); | ||||
|  | ||||
|     loop { | ||||
|         let n = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); | ||||
|         let user_id = &uid; | ||||
|         let device_id = &platform; | ||||
|         let time = n.as_secs(); | ||||
|         let event_type = "runtime_tick"; | ||||
|         let elapsed_since_start = first_start.elapsed().as_secs() / 86_400; // One day | ||||
|         let event_properties = EventProperties::from(data.clone()).ok(); | ||||
|         let app_version = env!("CARGO_PKG_VERSION").to_string(); | ||||
|         let app_version = app_version.as_str(); | ||||
|         let user_email = std::env::var("MEILI_USER_EMAIL").ok(); | ||||
|         let server_provider = std::env::var("MEILI_SERVER_PROVIDER").ok(); | ||||
|         let user_properties = UserProperties { | ||||
|             env: &opt.env, | ||||
|             start_since_days: elapsed_since_start, | ||||
|             user_email, | ||||
|             server_provider, | ||||
|         }; | ||||
|  | ||||
|         let event = Event { | ||||
|             user_id, | ||||
|             event_type, | ||||
|             device_id, | ||||
|             time, | ||||
|             app_version, | ||||
|             user_properties, | ||||
|             event_properties | ||||
|         }; | ||||
|         let event = serde_json::to_string(&event).unwrap(); | ||||
|  | ||||
|         let request = AmplitudeRequest { | ||||
|             api_key: AMPLITUDE_API_KEY, | ||||
|             event: &event, | ||||
|         }; | ||||
|  | ||||
|         let body = qs::to_string(&request).unwrap(); | ||||
|         let response = ureq::post("https://api.amplitude.com/httpapi").send_string(&body); | ||||
|         if !response.ok() { | ||||
|             let body = response.into_string().unwrap(); | ||||
|             error!("Unsuccessful call to Amplitude: {}", body); | ||||
|         } | ||||
|  | ||||
|         thread::sleep(Duration::from_secs(3600)) // one hour | ||||
|     } | ||||
| } | ||||
							
								
								
									
										147
									
								
								meilisearch-http/src/data/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								meilisearch-http/src/data/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | ||||
| mod search; | ||||
| mod updates; | ||||
|  | ||||
| pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; | ||||
|  | ||||
| use std::fs::create_dir_all; | ||||
| use std::ops::Deref; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use sha2::Digest; | ||||
|  | ||||
| use crate::index_controller::{IndexController, LocalIndexController, IndexMetadata, Settings, IndexSettings}; | ||||
| use crate::option::Opt; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct Data { | ||||
|     inner: Arc<DataInner>, | ||||
| } | ||||
|  | ||||
| impl Deref for Data { | ||||
|     type Target = DataInner; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.inner | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct DataInner { | ||||
|     pub index_controller: Arc<LocalIndexController>, | ||||
|     pub api_keys: ApiKeys, | ||||
|     options: Opt, | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct ApiKeys { | ||||
|     pub public: Option<String>, | ||||
|     pub private: Option<String>, | ||||
|     pub master: Option<String>, | ||||
| } | ||||
|  | ||||
| impl ApiKeys { | ||||
|     pub fn generate_missing_api_keys(&mut self) { | ||||
|         if let Some(master_key) = &self.master { | ||||
|             if self.private.is_none() { | ||||
|                 let key = format!("{}-private", master_key); | ||||
|                 let sha = sha2::Sha256::digest(key.as_bytes()); | ||||
|                 self.private = Some(format!("{:x}", sha)); | ||||
|             } | ||||
|             if self.public.is_none() { | ||||
|                 let key = format!("{}-public", master_key); | ||||
|                 let sha = sha2::Sha256::digest(key.as_bytes()); | ||||
|                 self.public = Some(format!("{:x}", sha)); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Data { | ||||
|     pub fn new(options: Opt) -> anyhow::Result<Data> { | ||||
|         let path = options.db_path.clone(); | ||||
|         let indexer_opts = options.indexer_options.clone(); | ||||
|         create_dir_all(&path)?; | ||||
|         let index_controller = LocalIndexController::new( | ||||
|             &path, | ||||
|             indexer_opts, | ||||
|             options.max_mdb_size.get_bytes(), | ||||
|             options.max_udb_size.get_bytes(), | ||||
|         )?; | ||||
|         let index_controller = Arc::new(index_controller); | ||||
|  | ||||
|         let mut api_keys = ApiKeys { | ||||
|             master: options.clone().master_key, | ||||
|             private: None, | ||||
|             public: None, | ||||
|         }; | ||||
|  | ||||
|         api_keys.generate_missing_api_keys(); | ||||
|  | ||||
|         let inner = DataInner { index_controller, options, api_keys }; | ||||
|         let inner = Arc::new(inner); | ||||
|  | ||||
|         Ok(Data { inner }) | ||||
|     } | ||||
|  | ||||
|     pub fn settings<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<Settings> { | ||||
|         let index = self.index_controller | ||||
|             .index(&index_uid)? | ||||
|             .ok_or_else(|| anyhow::anyhow!("Index {} does not exist.", index_uid.as_ref()))?; | ||||
|  | ||||
|         let txn = index.read_txn()?; | ||||
|  | ||||
|         let displayed_attributes = index | ||||
|             .displayed_fields(&txn)? | ||||
|             .map(|fields| fields.into_iter().map(String::from).collect()) | ||||
|             .unwrap_or_else(|| vec!["*".to_string()]); | ||||
|  | ||||
|         let searchable_attributes = index | ||||
|             .searchable_fields(&txn)? | ||||
|             .map(|fields| fields.into_iter().map(String::from).collect()) | ||||
|             .unwrap_or_else(|| vec!["*".to_string()]); | ||||
|  | ||||
|         let faceted_attributes = index | ||||
|             .faceted_fields(&txn)? | ||||
|             .into_iter() | ||||
|             .map(|(k, v)| (k, v.to_string())) | ||||
|             .collect(); | ||||
|  | ||||
|         Ok(Settings { | ||||
|             displayed_attributes: Some(Some(displayed_attributes)), | ||||
|             searchable_attributes: Some(Some(searchable_attributes)), | ||||
|             faceted_attributes: Some(Some(faceted_attributes)), | ||||
|             criteria: None, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> { | ||||
|         self.index_controller.list_indexes() | ||||
|     } | ||||
|  | ||||
|     pub fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<IndexMetadata>> { | ||||
|         Ok(self | ||||
|             .list_indexes()? | ||||
|             .into_iter() | ||||
|             .find(|i| i.uid == name.as_ref())) | ||||
|     } | ||||
|  | ||||
|     pub fn create_index(&self, name: impl AsRef<str>, primary_key: Option<impl AsRef<str>>) -> anyhow::Result<IndexMetadata> { | ||||
|         let settings = IndexSettings { | ||||
|             name: Some(name.as_ref().to_string()), | ||||
|             primary_key: primary_key.map(|s| s.as_ref().to_string()), | ||||
|         }; | ||||
|  | ||||
|         let meta = self.index_controller.create_index(settings)?; | ||||
|         Ok(meta) | ||||
|     } | ||||
|  | ||||
|     #[inline] | ||||
|     pub fn http_payload_size_limit(&self) -> usize { | ||||
|         self.options.http_payload_size_limit.get_bytes() as usize | ||||
|     } | ||||
|  | ||||
|     #[inline] | ||||
|     pub fn api_keys(&self) -> &ApiKeys { | ||||
|         &self.api_keys | ||||
|     } | ||||
| } | ||||
							
								
								
									
										353
									
								
								meilisearch-http/src/data/search.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										353
									
								
								meilisearch-http/src/data/search.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,353 @@ | ||||
| use std::collections::{HashSet, BTreeMap}; | ||||
| use std::mem; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use anyhow::{bail, Context}; | ||||
| use either::Either; | ||||
| use heed::RoTxn; | ||||
| use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; | ||||
| use milli::{obkv_to_json, FacetCondition, Index, facet::FacetValue}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_json::{Map, Value}; | ||||
|  | ||||
| use super::Data; | ||||
| use crate::index_controller::IndexController; | ||||
|  | ||||
| pub const DEFAULT_SEARCH_LIMIT: usize = 20; | ||||
|  | ||||
| const fn default_search_limit() -> usize { | ||||
|     DEFAULT_SEARCH_LIMIT | ||||
| } | ||||
|  | ||||
| #[derive(Deserialize)] | ||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||
| #[allow(dead_code)] | ||||
| pub struct SearchQuery { | ||||
|     pub q: Option<String>, | ||||
|     pub offset: Option<usize>, | ||||
|     #[serde(default = "default_search_limit")] | ||||
|     pub limit: usize, | ||||
|     pub attributes_to_retrieve: Option<Vec<String>>, | ||||
|     pub attributes_to_crop: Option<Vec<String>>, | ||||
|     pub crop_length: Option<usize>, | ||||
|     pub attributes_to_highlight: Option<HashSet<String>>, | ||||
|     pub filters: Option<String>, | ||||
|     pub matches: Option<bool>, | ||||
|     pub facet_filters: Option<Value>, | ||||
|     pub facet_distributions: Option<Vec<String>>, | ||||
| } | ||||
|  | ||||
| impl SearchQuery { | ||||
|     pub fn perform(&self, index: impl AsRef<Index>) -> anyhow::Result<SearchResult> { | ||||
|         let index = index.as_ref(); | ||||
|         let before_search = Instant::now(); | ||||
|         let rtxn = index.read_txn()?; | ||||
|  | ||||
|         let mut search = index.search(&rtxn); | ||||
|  | ||||
|         if let Some(ref query) = self.q { | ||||
|             search.query(query); | ||||
|         } | ||||
|  | ||||
|         search.limit(self.limit); | ||||
|         search.offset(self.offset.unwrap_or_default()); | ||||
|  | ||||
|         if let Some(ref facets) = self.facet_filters { | ||||
|             if let Some(facets) = parse_facets(facets, index, &rtxn)? { | ||||
|                 search.facet_condition(facets); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let milli::SearchResult { | ||||
|             documents_ids, | ||||
|             found_words, | ||||
|             candidates, | ||||
|         } = search.execute()?; | ||||
|  | ||||
|         let mut documents = Vec::new(); | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||
|  | ||||
|         let displayed_fields_ids = index.displayed_fields_ids(&rtxn)?; | ||||
|  | ||||
|         let attributes_to_retrieve_ids = match self.attributes_to_retrieve { | ||||
|             Some(ref attrs) if attrs.iter().any(|f| f == "*") => None, | ||||
|             Some(ref attrs) => attrs | ||||
|                 .iter() | ||||
|                 .filter_map(|f| fields_ids_map.id(f)) | ||||
|                 .collect::<Vec<_>>() | ||||
|                 .into(), | ||||
|             None => None, | ||||
|         }; | ||||
|  | ||||
|         let displayed_fields_ids = match (displayed_fields_ids, attributes_to_retrieve_ids) { | ||||
|             (_, Some(ids)) => ids, | ||||
|             (Some(ids), None) => ids, | ||||
|             (None, None) => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|         }; | ||||
|  | ||||
|         let stop_words = fst::Set::default(); | ||||
|         let highlighter = Highlighter::new(&stop_words); | ||||
|  | ||||
|         for (_id, obkv) in index.documents(&rtxn, documents_ids)? { | ||||
|             let mut object = obkv_to_json(&displayed_fields_ids, &fields_ids_map, obkv)?; | ||||
|             if let Some(ref attributes_to_highlight) = self.attributes_to_highlight { | ||||
|                 highlighter.highlight_record(&mut object, &found_words, attributes_to_highlight); | ||||
|             } | ||||
|             documents.push(object); | ||||
|         } | ||||
|  | ||||
|         let nb_hits = candidates.len(); | ||||
|  | ||||
|         let facet_distributions = match self.facet_distributions { | ||||
|             Some(ref fields) => { | ||||
|                 let mut facet_distribution = index.facets_distribution(&rtxn); | ||||
|                 if fields.iter().all(|f| f != "*") { | ||||
|                     facet_distribution.facets(fields); | ||||
|                 } | ||||
|                 Some(facet_distribution.candidates(candidates).execute()?) | ||||
|             } | ||||
|             None => None, | ||||
|         }; | ||||
|  | ||||
|         Ok(SearchResult { | ||||
|             hits: documents, | ||||
|             nb_hits, | ||||
|             query: self.q.clone().unwrap_or_default(), | ||||
|             limit: self.limit, | ||||
|             offset: self.offset.unwrap_or_default(), | ||||
|             processing_time_ms: before_search.elapsed().as_millis(), | ||||
|             facet_distributions, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct SearchResult { | ||||
|     hits: Vec<Map<String, Value>>, | ||||
|     nb_hits: u64, | ||||
|     query: String, | ||||
|     limit: usize, | ||||
|     offset: usize, | ||||
|     processing_time_ms: u128, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     facet_distributions: Option<BTreeMap<String, BTreeMap<FacetValue, u64>>>, | ||||
| } | ||||
|  | ||||
| struct Highlighter<'a, A> { | ||||
|     analyzer: Analyzer<'a, A>, | ||||
| } | ||||
|  | ||||
| impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { | ||||
|     fn new(stop_words: &'a fst::Set<A>) -> Self { | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); | ||||
|         Self { analyzer } | ||||
|     } | ||||
|  | ||||
|     fn highlight_value(&self, value: Value, words_to_highlight: &HashSet<String>) -> Value { | ||||
|         match value { | ||||
|             Value::Null => Value::Null, | ||||
|             Value::Bool(boolean) => Value::Bool(boolean), | ||||
|             Value::Number(number) => Value::Number(number), | ||||
|             Value::String(old_string) => { | ||||
|                 let mut string = String::new(); | ||||
|                 let analyzed = self.analyzer.analyze(&old_string); | ||||
|                 for (word, token) in analyzed.reconstruct() { | ||||
|                     if token.is_word() { | ||||
|                         let to_highlight = words_to_highlight.contains(token.text()); | ||||
|                         if to_highlight { | ||||
|                             string.push_str("<mark>") | ||||
|                         } | ||||
|                         string.push_str(word); | ||||
|                         if to_highlight { | ||||
|                             string.push_str("</mark>") | ||||
|                         } | ||||
|                     } else { | ||||
|                         string.push_str(word); | ||||
|                     } | ||||
|                 } | ||||
|                 Value::String(string) | ||||
|             } | ||||
|             Value::Array(values) => Value::Array( | ||||
|                 values | ||||
|                     .into_iter() | ||||
|                     .map(|v| self.highlight_value(v, words_to_highlight)) | ||||
|                     .collect(), | ||||
|             ), | ||||
|             Value::Object(object) => Value::Object( | ||||
|                 object | ||||
|                     .into_iter() | ||||
|                     .map(|(k, v)| (k, self.highlight_value(v, words_to_highlight))) | ||||
|                     .collect(), | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn highlight_record( | ||||
|         &self, | ||||
|         object: &mut Map<String, Value>, | ||||
|         words_to_highlight: &HashSet<String>, | ||||
|         attributes_to_highlight: &HashSet<String>, | ||||
|     ) { | ||||
|         // TODO do we need to create a string for element that are not and needs to be highlight? | ||||
|         for (key, value) in object.iter_mut() { | ||||
|             if attributes_to_highlight.contains(key) { | ||||
|                 let old_value = mem::take(value); | ||||
|                 *value = self.highlight_value(old_value, words_to_highlight); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Data { | ||||
|     pub fn search<S: AsRef<str>>( | ||||
|         &self, | ||||
|         index: S, | ||||
|         search_query: SearchQuery, | ||||
|     ) -> anyhow::Result<SearchResult> { | ||||
|         match self.index_controller.index(&index)? { | ||||
|             Some(index) => Ok(search_query.perform(index)?), | ||||
|             None => bail!("index {:?} doesn't exists", index.as_ref()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub async fn retrieve_documents<S>( | ||||
|         &self, | ||||
|         index: impl AsRef<str> + Send + Sync + 'static, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         attributes_to_retrieve: Option<Vec<S>>, | ||||
|     ) -> anyhow::Result<Vec<Map<String, Value>>> | ||||
|     where | ||||
|         S: AsRef<str> + Send + Sync + 'static, | ||||
|     { | ||||
|         let index_controller = self.index_controller.clone(); | ||||
|         let documents: anyhow::Result<_> = tokio::task::spawn_blocking(move || { | ||||
|             let index = index_controller | ||||
|                 .index(&index)? | ||||
|                 .with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?; | ||||
|  | ||||
|             let txn = index.read_txn()?; | ||||
|  | ||||
|             let fields_ids_map = index.fields_ids_map(&txn)?; | ||||
|  | ||||
|             let attributes_to_retrieve_ids = match attributes_to_retrieve { | ||||
|                 Some(attrs) => attrs | ||||
|                     .iter() | ||||
|                     .filter_map(|f| fields_ids_map.id(f.as_ref())) | ||||
|                     .collect::<Vec<_>>(), | ||||
|                 None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|             }; | ||||
|  | ||||
|             let iter = index.documents.range(&txn, &(..))?.skip(offset).take(limit); | ||||
|  | ||||
|             let mut documents = Vec::new(); | ||||
|  | ||||
|             for entry in iter { | ||||
|                 let (_id, obkv) = entry?; | ||||
|                 let object = obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, obkv)?; | ||||
|                 documents.push(object); | ||||
|             } | ||||
|  | ||||
|             Ok(documents) | ||||
|         }) | ||||
|         .await?; | ||||
|         documents | ||||
|     } | ||||
|  | ||||
|     pub async fn retrieve_document<S>( | ||||
|         &self, | ||||
|         index: impl AsRef<str> + Sync + Send + 'static, | ||||
|         document_id: impl AsRef<str> + Sync + Send + 'static, | ||||
|         attributes_to_retrieve: Option<Vec<S>>, | ||||
|     ) -> anyhow::Result<Map<String, Value>> | ||||
|     where | ||||
|         S: AsRef<str> + Sync + Send + 'static, | ||||
|     { | ||||
|         let index_controller = self.index_controller.clone(); | ||||
|         let document: anyhow::Result<_> = tokio::task::spawn_blocking(move || { | ||||
|             let index = index_controller | ||||
|                 .index(&index)? | ||||
|                 .with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?; | ||||
|             let txn = index.read_txn()?; | ||||
|  | ||||
|             let fields_ids_map = index.fields_ids_map(&txn)?; | ||||
|  | ||||
|             let attributes_to_retrieve_ids = match attributes_to_retrieve { | ||||
|                 Some(attrs) => attrs | ||||
|                     .iter() | ||||
|                     .filter_map(|f| fields_ids_map.id(f.as_ref())) | ||||
|                     .collect::<Vec<_>>(), | ||||
|                 None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|             }; | ||||
|  | ||||
|             let internal_id = index | ||||
|                 .external_documents_ids(&txn)? | ||||
|                 .get(document_id.as_ref().as_bytes()) | ||||
|                 .with_context(|| format!("Document with id {} not found", document_id.as_ref()))?; | ||||
|  | ||||
|             let document = index | ||||
|                 .documents(&txn, std::iter::once(internal_id))? | ||||
|                 .into_iter() | ||||
|                 .next() | ||||
|                 .map(|(_, d)| d); | ||||
|  | ||||
|             match document { | ||||
|                 Some(document) => Ok(obkv_to_json( | ||||
|                     &attributes_to_retrieve_ids, | ||||
|                     &fields_ids_map, | ||||
|                     document, | ||||
|                 )?), | ||||
|                 None => bail!("Document with id {} not found", document_id.as_ref()), | ||||
|             } | ||||
|         }) | ||||
|         .await?; | ||||
|         document | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn parse_facets_array( | ||||
|     txn: &RoTxn, | ||||
|     index: &Index, | ||||
|     arr: &Vec<Value>, | ||||
| ) -> anyhow::Result<Option<FacetCondition>> { | ||||
|     let mut ands = Vec::new(); | ||||
|     for value in arr { | ||||
|         match value { | ||||
|             Value::String(s) => ands.push(Either::Right(s.clone())), | ||||
|             Value::Array(arr) => { | ||||
|                 let mut ors = Vec::new(); | ||||
|                 for value in arr { | ||||
|                     match value { | ||||
|                         Value::String(s) => ors.push(s.clone()), | ||||
|                         v => bail!("Invalid facet expression, expected String, found: {:?}", v), | ||||
|                     } | ||||
|                 } | ||||
|                 ands.push(Either::Left(ors)); | ||||
|             } | ||||
|             v => bail!( | ||||
|                 "Invalid facet expression, expected String or [String], found: {:?}", | ||||
|                 v | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     FacetCondition::from_array(txn, index, ands) | ||||
| } | ||||
|  | ||||
| fn parse_facets( | ||||
|     facets: &Value, | ||||
|     index: &Index, | ||||
|     txn: &RoTxn, | ||||
| ) -> anyhow::Result<Option<FacetCondition>> { | ||||
|     match facets { | ||||
|         // Disabled for now | ||||
|         //Value::String(expr) => Ok(Some(FacetCondition::from_str(txn, index, expr)?)), | ||||
|         Value::Array(arr) => parse_facets_array(txn, index, arr), | ||||
|         v => bail!( | ||||
|             "Invalid facet expression, expected Array, found: {:?}", | ||||
|             v | ||||
|         ), | ||||
|     } | ||||
| } | ||||
|  | ||||
							
								
								
									
										115
									
								
								meilisearch-http/src/data/updates.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								meilisearch-http/src/data/updates.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,115 @@ | ||||
| use std::ops::Deref; | ||||
|  | ||||
| use async_compression::tokio_02::write::GzipEncoder; | ||||
| use futures_util::stream::StreamExt; | ||||
| use milli::update::{IndexDocumentsMethod, UpdateFormat}; | ||||
| use tokio::io::AsyncWriteExt; | ||||
|  | ||||
| use crate::index_controller::UpdateStatus; | ||||
| use crate::index_controller::{IndexController, Settings, IndexSettings, IndexMetadata}; | ||||
| use super::Data; | ||||
|  | ||||
| impl Data { | ||||
|     pub async fn add_documents<B, E>( | ||||
|         &self, | ||||
|         index: impl AsRef<str> + Send + Sync + 'static, | ||||
|         method: IndexDocumentsMethod, | ||||
|         format: UpdateFormat, | ||||
|         mut stream: impl futures::Stream<Item=Result<B, E>> + Unpin, | ||||
|         primary_key: Option<String>, | ||||
|     ) -> anyhow::Result<UpdateStatus> | ||||
|     where | ||||
|         B: Deref<Target = [u8]>, | ||||
|         E: std::error::Error + Send + Sync + 'static, | ||||
|     { | ||||
|         let file = tokio::task::spawn_blocking(tempfile::tempfile).await?; | ||||
|         let file = tokio::fs::File::from_std(file?); | ||||
|         let mut encoder = GzipEncoder::new(file); | ||||
|  | ||||
|         let mut empty_update = true; | ||||
|         while let Some(result) = stream.next().await { | ||||
|             empty_update = false; | ||||
|             let bytes = &*result?; | ||||
|             encoder.write_all(&bytes[..]).await?; | ||||
|         } | ||||
|  | ||||
|         encoder.shutdown().await?; | ||||
|         let mut file = encoder.into_inner(); | ||||
|         file.sync_all().await?; | ||||
|         let file = file.into_std().await; | ||||
|  | ||||
|         let index_controller = self.index_controller.clone(); | ||||
|         let update = tokio::task::spawn_blocking(move ||{ | ||||
|             let mmap; | ||||
|             let bytes = if empty_update { | ||||
|                 &[][..] | ||||
|             } else { | ||||
|                 mmap = unsafe { memmap::Mmap::map(&file)? }; | ||||
|                 &mmap | ||||
|             }; | ||||
|             index_controller.add_documents(index, method, format, &bytes, primary_key) | ||||
|         }).await??; | ||||
|         Ok(update.into()) | ||||
|     } | ||||
|  | ||||
|     pub async fn update_settings( | ||||
|         &self, | ||||
|         index: impl AsRef<str> + Send + Sync + 'static, | ||||
|         settings: Settings | ||||
|     ) -> anyhow::Result<UpdateStatus> { | ||||
|         let index_controller = self.index_controller.clone(); | ||||
|         let update = tokio::task::spawn_blocking(move || index_controller.update_settings(index, settings)).await??; | ||||
|         Ok(update.into()) | ||||
|     } | ||||
|  | ||||
|     pub async fn clear_documents( | ||||
|         &self, | ||||
|         index: impl AsRef<str> + Sync + Send + 'static, | ||||
|     ) -> anyhow::Result<UpdateStatus> { | ||||
|         let index_controller = self.index_controller.clone(); | ||||
|         let update = tokio::task::spawn_blocking(move || index_controller.clear_documents(index)).await??; | ||||
|         Ok(update.into()) | ||||
|     } | ||||
|  | ||||
|     pub async fn delete_documents( | ||||
|         &self, | ||||
|         index: impl AsRef<str> + Sync + Send + 'static, | ||||
|         document_ids: Vec<String>, | ||||
|     ) -> anyhow::Result<UpdateStatus> { | ||||
|         let index_controller = self.index_controller.clone(); | ||||
|         let update = tokio::task::spawn_blocking(move || index_controller.delete_documents(index, document_ids)).await??; | ||||
|         Ok(update.into()) | ||||
|     } | ||||
|  | ||||
|     pub async fn delete_index( | ||||
|         &self, | ||||
|         index: impl AsRef<str> + Send + Sync + 'static, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         let index_controller = self.index_controller.clone(); | ||||
|         tokio::task::spawn_blocking(move || { index_controller.delete_index(index) }).await??; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[inline] | ||||
|     pub fn get_update_status(&self, index: impl AsRef<str>, uid: u64) -> anyhow::Result<Option<UpdateStatus>> { | ||||
|         self.index_controller.update_status(index, uid) | ||||
|     } | ||||
|  | ||||
|     pub fn get_updates_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus>> { | ||||
|         self.index_controller.all_update_status(index) | ||||
|     } | ||||
|  | ||||
|     pub fn update_index( | ||||
|         &self, | ||||
|         name: impl AsRef<str>, | ||||
|         primary_key: Option<impl AsRef<str>>, | ||||
|         new_name: Option<impl AsRef<str>> | ||||
|     ) -> anyhow::Result<IndexMetadata> { | ||||
|         let settings = IndexSettings { | ||||
|             name: new_name.map(|s| s.as_ref().to_string()), | ||||
|             primary_key: primary_key.map(|s| s.as_ref().to_string()), | ||||
|         }; | ||||
|  | ||||
|         self.index_controller.update_index(name, settings) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										423
									
								
								meilisearch-http/src/dump.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										423
									
								
								meilisearch-http/src/dump.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,423 @@ | ||||
| use std::fs::{create_dir_all, File}; | ||||
| use std::io::prelude::*; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Mutex; | ||||
| use std::thread; | ||||
|  | ||||
| use actix_web::web; | ||||
| use chrono::offset::Utc; | ||||
| use indexmap::IndexMap; | ||||
| use log::{error, info}; | ||||
| use once_cell::sync::Lazy; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_json::json; | ||||
| use tempfile::TempDir; | ||||
|  | ||||
| use crate::Data; | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::compression; | ||||
| use crate::routes::index; | ||||
| use crate::routes::setting::Settings; | ||||
| use crate::routes::index::IndexResponse; | ||||
|  | ||||
| // Mutex to share dump progress. | ||||
| static DUMP_INFO: Lazy<Mutex<Option<DumpInfo>>> = Lazy::new(Mutex::default); | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Copy, Clone)] | ||||
| enum DumpVersion { | ||||
|     V1, | ||||
| } | ||||
|  | ||||
| impl DumpVersion { | ||||
|     const CURRENT: Self = Self::V1; | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct DumpMetadata { | ||||
|     indexes: Vec<crate::routes::index::IndexResponse>, | ||||
|     db_version: String, | ||||
|     dump_version: DumpVersion, | ||||
| } | ||||
|  | ||||
| impl DumpMetadata { | ||||
|     /// Create a DumpMetadata with the current dump version of meilisearch. | ||||
|     pub fn new(indexes: Vec<crate::routes::index::IndexResponse>, db_version: String) -> Self { | ||||
|         DumpMetadata { | ||||
|             indexes, | ||||
|             db_version, | ||||
|             dump_version: DumpVersion::CURRENT, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Extract DumpMetadata from `metadata.json` file present at provided `dir_path` | ||||
|     fn from_path(dir_path: &Path) -> Result<Self, Error> { | ||||
|         let path = dir_path.join("metadata.json"); | ||||
|         let file = File::open(path)?; | ||||
|         let reader = std::io::BufReader::new(file); | ||||
|         let metadata = serde_json::from_reader(reader)?; | ||||
|  | ||||
|         Ok(metadata) | ||||
|     } | ||||
|  | ||||
|     /// Write DumpMetadata in `metadata.json` file at provided `dir_path` | ||||
|     fn to_path(&self, dir_path: &Path) -> Result<(), Error> { | ||||
|         let path = dir_path.join("metadata.json"); | ||||
|         let file = File::create(path)?; | ||||
|  | ||||
|         serde_json::to_writer(file, &self)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Extract Settings from `settings.json` file present at provided `dir_path` | ||||
| fn settings_from_path(dir_path: &Path) -> Result<Settings, Error> { | ||||
|     let path = dir_path.join("settings.json"); | ||||
|     let file = File::open(path)?; | ||||
|     let reader = std::io::BufReader::new(file); | ||||
|     let metadata = serde_json::from_reader(reader)?; | ||||
|  | ||||
|     Ok(metadata) | ||||
| } | ||||
|  | ||||
| /// Write Settings in `settings.json` file at provided `dir_path` | ||||
| fn settings_to_path(settings: &Settings, dir_path: &Path) -> Result<(), Error> { | ||||
|     let path = dir_path.join("settings.json"); | ||||
|     let file = File::create(path)?; | ||||
|  | ||||
|     serde_json::to_writer(file, settings)?; | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| /// Import settings and documents of a dump with version `DumpVersion::V1` in specified index. | ||||
| fn import_index_v1( | ||||
|     data: &Data, | ||||
|     dumps_dir: &Path, | ||||
|     index_uid: &str, | ||||
|     document_batch_size: usize, | ||||
|     write_txn: &mut MainWriter, | ||||
| ) -> Result<(), Error> { | ||||
|  | ||||
|     // open index | ||||
|     let index = data | ||||
|         .db | ||||
|         .open_index(index_uid) | ||||
|         .ok_or(Error::index_not_found(index_uid))?; | ||||
|  | ||||
|     // index dir path in  dump dir | ||||
|     let index_path = &dumps_dir.join(index_uid); | ||||
|  | ||||
|     // extract `settings.json` file and import content | ||||
|     let settings = settings_from_path(&index_path)?; | ||||
|     let settings = settings.to_update().map_err(|e| Error::dump_failed(format!("importing settings for index {}; {}", index_uid, e)))?; | ||||
|     apply_settings_update(write_txn, &index, settings)?; | ||||
|  | ||||
|     // create iterator over documents in `documents.jsonl` to make batch importation | ||||
|     // create iterator over documents in `documents.jsonl` to make batch importation | ||||
|     let documents = { | ||||
|         let file = File::open(&index_path.join("documents.jsonl"))?; | ||||
|         let reader = std::io::BufReader::new(file); | ||||
|         let deserializer = serde_json::Deserializer::from_reader(reader); | ||||
|         deserializer.into_iter::<IndexMap<String, serde_json::Value>>() | ||||
|     }; | ||||
|  | ||||
|     // batch import document every `document_batch_size`: | ||||
|     // create a Vec to bufferize documents | ||||
|     let mut values = Vec::with_capacity(document_batch_size); | ||||
|     // iterate over documents | ||||
|     for document in documents { | ||||
|         // push document in buffer | ||||
|         values.push(document?); | ||||
|         // if buffer is full, create and apply a batch, and clean buffer | ||||
|         if values.len() == document_batch_size {  | ||||
|             let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size)); | ||||
|             apply_documents_addition(write_txn, &index, batch)?; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // apply documents remaining in the buffer  | ||||
|     if !values.is_empty() {  | ||||
|         apply_documents_addition(write_txn, &index, values)?; | ||||
|     } | ||||
|  | ||||
|     // sync index information: stats, updated_at, last_update | ||||
|     if let Err(e) = crate::index_update_callback_txn(index, index_uid, data, write_txn) { | ||||
|         return Err(Error::Internal(e)); | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| /// Import dump from `dump_path` in database. | ||||
| pub fn import_dump( | ||||
|     data: &Data, | ||||
|     dump_path: &Path, | ||||
|     document_batch_size: usize, | ||||
| ) -> Result<(), Error> { | ||||
|     info!("Importing dump from {:?}...", dump_path); | ||||
|  | ||||
|     // create a temporary directory | ||||
|     let tmp_dir = TempDir::new()?; | ||||
|     let tmp_dir_path = tmp_dir.path(); | ||||
|  | ||||
|     // extract dump in temporary directory | ||||
|     compression::from_tar_gz(dump_path, tmp_dir_path)?; | ||||
|  | ||||
|     // read dump metadata | ||||
|     let metadata = DumpMetadata::from_path(&tmp_dir_path)?; | ||||
|  | ||||
|     // choose importation function from DumpVersion of metadata | ||||
|     let import_index = match metadata.dump_version { | ||||
|         DumpVersion::V1 => import_index_v1, | ||||
|     }; | ||||
|  | ||||
|     // remove indexes which have same `uid` than indexes to import and create empty indexes | ||||
|     let existing_index_uids = data.db.indexes_uids(); | ||||
|     for index in metadata.indexes.iter() { | ||||
|         if existing_index_uids.contains(&index.uid) { | ||||
|             data.db.delete_index(index.uid.clone())?; | ||||
|         } | ||||
|         index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?; | ||||
|     } | ||||
|  | ||||
|     // import each indexes content | ||||
|     data.db.main_write::<_, _, Error>(|mut writer| { | ||||
|         for index in metadata.indexes { | ||||
|             import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     })?; | ||||
|  | ||||
|     info!("Dump importation from {:?} succeed", dump_path); | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] | ||||
| #[serde(rename_all = "snake_case")] | ||||
| pub enum DumpStatus { | ||||
|     Done, | ||||
|     InProgress, | ||||
|     Failed, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct DumpInfo { | ||||
|     pub uid: String, | ||||
|     pub status: DumpStatus, | ||||
|     #[serde(skip_serializing_if = "Option::is_none", flatten)] | ||||
|     pub error: Option<serde_json::Value>, | ||||
| } | ||||
|  | ||||
| impl DumpInfo { | ||||
|     pub fn new(uid: String, status: DumpStatus) -> Self { | ||||
|         Self { uid, status, error: None } | ||||
|     } | ||||
|  | ||||
|     pub fn with_error(mut self, error: ResponseError) -> Self { | ||||
|         self.status = DumpStatus::Failed; | ||||
|         self.error = Some(json!(error)); | ||||
|  | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn dump_already_in_progress(&self) -> bool { | ||||
|         self.status == DumpStatus::InProgress | ||||
|     } | ||||
|  | ||||
|     pub fn get_current() -> Option<Self> { | ||||
|         DUMP_INFO.lock().unwrap().clone() | ||||
|     } | ||||
|  | ||||
|     pub fn set_current(&self) { | ||||
|         *DUMP_INFO.lock().unwrap() = Some(self.clone()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Generate uid from creation date | ||||
| fn generate_uid() -> String { | ||||
|     Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() | ||||
| } | ||||
|  | ||||
| /// Infer dumps_dir from dump_uid | ||||
| pub fn compressed_dumps_dir(dumps_dir: &Path, dump_uid: &str) -> PathBuf { | ||||
|     dumps_dir.join(format!("{}.dump", dump_uid)) | ||||
| } | ||||
|  | ||||
| /// Write metadata in dump | ||||
| fn dump_metadata(data: &web::Data<Data>, dir_path: &Path, indexes: Vec<IndexResponse>) -> Result<(), Error> { | ||||
|     let (db_major, db_minor, db_patch) = data.db.version(); | ||||
|     let metadata = DumpMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch)); | ||||
|  | ||||
|     metadata.to_path(dir_path) | ||||
| } | ||||
|  | ||||
| /// Export settings of provided index in dump | ||||
| fn dump_index_settings(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> { | ||||
|     let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?; | ||||
|  | ||||
|     settings_to_path(&settings, dir_path) | ||||
| } | ||||
|  | ||||
| /// Export updates of provided index in dump | ||||
| fn dump_index_updates(data: &web::Data<Data>, reader: &UpdateReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> { | ||||
|     let updates_path = dir_path.join("updates.jsonl"); | ||||
|     let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?; | ||||
|  | ||||
|     let file = File::create(updates_path)?; | ||||
|  | ||||
|     for update in updates { | ||||
|         serde_json::to_writer(&file, &update)?; | ||||
|         writeln!(&file)?; | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| /// Export documents of provided index in dump | ||||
| fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> { | ||||
|     let documents_path = dir_path.join("documents.jsonl"); | ||||
|     let file = File::create(documents_path)?; | ||||
|     let dump_batch_size = data.dump_batch_size; | ||||
|  | ||||
|     let mut offset = 0; | ||||
|     loop { | ||||
|         let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, dump_batch_size, None)?; | ||||
|         if documents.is_empty() { break; } else { offset += dump_batch_size; } | ||||
|  | ||||
|         for document in documents { | ||||
|             serde_json::to_writer(&file, &document)?; | ||||
|             writeln!(&file)?; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| /// Write error with a context. | ||||
| fn fail_dump_process<E: std::error::Error>(dump_info: DumpInfo, context: &str, error: E) { | ||||
|         let error_message = format!("{}; {}", context, error); | ||||
|          | ||||
|         error!("Something went wrong during dump process: {}", &error_message); | ||||
|         dump_info.with_error(Error::dump_failed(error_message).into()).set_current(); | ||||
| } | ||||
|  | ||||
| /// Main function of dump. | ||||
| fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo) { | ||||
|     // open read transaction on Update | ||||
|     let update_reader = match data.db.update_read_txn() { | ||||
|         Ok(r) => r, | ||||
|         Err(e) => { | ||||
|             fail_dump_process(dump_info, "creating RO transaction on updates", e); | ||||
|             return ; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // open read transaction on Main | ||||
|     let main_reader = match data.db.main_read_txn() { | ||||
|         Ok(r) => r, | ||||
|         Err(e) => { | ||||
|             fail_dump_process(dump_info, "creating RO transaction on main", e); | ||||
|             return ; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // create a temporary directory | ||||
|     let tmp_dir = match TempDir::new() { | ||||
|         Ok(tmp_dir) => tmp_dir, | ||||
|         Err(e) => { | ||||
|             fail_dump_process(dump_info, "creating temporary directory", e); | ||||
|             return ; | ||||
|         } | ||||
|     }; | ||||
|     let tmp_dir_path = tmp_dir.path(); | ||||
|  | ||||
|     // fetch indexes | ||||
|     let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) { | ||||
|         Ok(indexes) => indexes, | ||||
|         Err(e) => { | ||||
|             fail_dump_process(dump_info, "listing indexes", e); | ||||
|             return ; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // create metadata | ||||
|     if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) { | ||||
|         fail_dump_process(dump_info, "generating metadata", e); | ||||
|         return ; | ||||
|     } | ||||
|  | ||||
|     // export settings, updates and documents for each indexes | ||||
|     for index in indexes { | ||||
|         let index_path = tmp_dir_path.join(&index.uid); | ||||
|  | ||||
|         // create index sub-dircetory | ||||
|         if let Err(e) = create_dir_all(&index_path) { | ||||
|             fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e); | ||||
|             return ; | ||||
|         } | ||||
|  | ||||
|         // export settings | ||||
|         if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) { | ||||
|             fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e); | ||||
|             return ; | ||||
|         } | ||||
|  | ||||
|         // export documents | ||||
|         if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) { | ||||
|             fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e); | ||||
|             return ; | ||||
|         } | ||||
|  | ||||
|         // export updates | ||||
|         if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) { | ||||
|             fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e); | ||||
|             return ; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // compress dump in a file named `{dump_uid}.dump` in `dumps_dir` | ||||
|     if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) { | ||||
|         fail_dump_process(dump_info, "compressing dump", e); | ||||
|         return ; | ||||
|     } | ||||
|  | ||||
|     // update dump info to `done` | ||||
|     let resume = DumpInfo::new( | ||||
|         dump_info.uid, | ||||
|         DumpStatus::Done | ||||
|     ); | ||||
|  | ||||
|     resume.set_current(); | ||||
| } | ||||
|  | ||||
| pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> { | ||||
|     create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?; | ||||
|  | ||||
|     // check if a dump is already in progress | ||||
|     if let Some(resume) = DumpInfo::get_current() { | ||||
|         if resume.dump_already_in_progress() { | ||||
|             return Err(Error::dump_conflict()) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // generate a new dump info | ||||
|     let info = DumpInfo::new( | ||||
|         generate_uid(), | ||||
|         DumpStatus::InProgress | ||||
|     ); | ||||
|  | ||||
|     info.set_current(); | ||||
|  | ||||
|     let data = data.clone(); | ||||
|     let dumps_dir = dumps_dir.to_path_buf(); | ||||
|     let info_cloned = info.clone(); | ||||
|     // run dump process in a new thread | ||||
|     thread::spawn(move ||  | ||||
|         dump_process(data, dumps_dir, info_cloned) | ||||
|     ); | ||||
|  | ||||
|     Ok(info) | ||||
| } | ||||
							
								
								
									
										297
									
								
								meilisearch-http/src/error.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								meilisearch-http/src/error.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,297 @@ | ||||
| use std::error; | ||||
| use std::fmt; | ||||
|  | ||||
| use actix_http::ResponseBuilder; | ||||
| use actix_web as aweb; | ||||
| use actix_web::error::{JsonPayloadError, QueryPayloadError}; | ||||
| use actix_web::http::StatusCode; | ||||
| use serde::ser::{Serialize, Serializer, SerializeStruct}; | ||||
| use meilisearch_error::{ErrorCode, Code}; | ||||
|  | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct ResponseError { | ||||
|     inner: Box<dyn ErrorCode>, | ||||
| } | ||||
|  | ||||
| impl error::Error for ResponseError {} | ||||
|  | ||||
| impl ErrorCode for ResponseError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         self.inner.error_code() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for ResponseError { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         self.inner.fmt(f) | ||||
|     } | ||||
| } | ||||
|  | ||||
| // TODO: remove this when implementing actual error handling | ||||
| impl From<anyhow::Error> for ResponseError { | ||||
|     fn from(other: anyhow::Error) -> ResponseError { | ||||
|         ResponseError { inner: Box::new(Error::NotFound(other.to_string())) } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<Error> for ResponseError { | ||||
|     fn from(error: Error) -> ResponseError { | ||||
|         ResponseError { inner: Box::new(error) } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<FacetCountError> for ResponseError { | ||||
|     fn from(err: FacetCountError) -> ResponseError { | ||||
|         ResponseError { inner: Box::new(err) } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Serialize for ResponseError { | ||||
|     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: Serializer, | ||||
|     { | ||||
|         let struct_name = "ResponseError"; | ||||
|         let field_count = 4; | ||||
|  | ||||
|         let mut state = serializer.serialize_struct(struct_name, field_count)?; | ||||
|         state.serialize_field("message", &self.to_string())?; | ||||
|         state.serialize_field("errorCode", &self.error_name())?; | ||||
|         state.serialize_field("errorType", &self.error_type())?; | ||||
|         state.serialize_field("errorLink", &self.error_url())?; | ||||
|         state.end() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl aweb::error::ResponseError for ResponseError { | ||||
|     fn error_response(&self) -> aweb::HttpResponse { | ||||
|         ResponseBuilder::new(self.status_code()).json(&self) | ||||
|     } | ||||
|  | ||||
|     fn status_code(&self) -> StatusCode { | ||||
|         self.http_status() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum Error { | ||||
|     BadParameter(String, String), | ||||
|     BadRequest(String), | ||||
|     CreateIndex(String), | ||||
|     DocumentNotFound(String), | ||||
|     IndexNotFound(String), | ||||
|     IndexAlreadyExists(String), | ||||
|     Internal(String), | ||||
|     InvalidIndexUid, | ||||
|     InvalidToken(String), | ||||
|     MissingAuthorizationHeader, | ||||
|     NotFound(String), | ||||
|     OpenIndex(String), | ||||
|     RetrieveDocument(u32, String), | ||||
|     SearchDocuments(String), | ||||
|     PayloadTooLarge, | ||||
|     UnsupportedMediaType, | ||||
|     DumpAlreadyInProgress, | ||||
|     DumpProcessFailed(String), | ||||
| } | ||||
|  | ||||
| impl error::Error for Error {} | ||||
|  | ||||
| impl ErrorCode for Error { | ||||
|     fn error_code(&self) -> Code { | ||||
|         use Error::*; | ||||
|         match self { | ||||
|             BadParameter(_, _) => Code::BadParameter, | ||||
|             BadRequest(_) => Code::BadRequest, | ||||
|             CreateIndex(_) => Code::CreateIndex, | ||||
|             DocumentNotFound(_) => Code::DocumentNotFound, | ||||
|             IndexNotFound(_) => Code::IndexNotFound, | ||||
|             IndexAlreadyExists(_) => Code::IndexAlreadyExists, | ||||
|             Internal(_) => Code::Internal, | ||||
|             InvalidIndexUid => Code::InvalidIndexUid, | ||||
|             InvalidToken(_) => Code::InvalidToken, | ||||
|             MissingAuthorizationHeader => Code::MissingAuthorizationHeader, | ||||
|             NotFound(_) => Code::NotFound, | ||||
|             OpenIndex(_) => Code::OpenIndex, | ||||
|             RetrieveDocument(_, _) => Code::RetrieveDocument, | ||||
|             SearchDocuments(_) => Code::SearchDocuments, | ||||
|             PayloadTooLarge => Code::PayloadTooLarge, | ||||
|             UnsupportedMediaType => Code::UnsupportedMediaType, | ||||
|             _ => unreachable!() | ||||
|             //DumpAlreadyInProgress => Code::DumpAlreadyInProgress, | ||||
|             //DumpProcessFailed(_) => Code::DumpProcessFailed, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum FacetCountError { | ||||
|     AttributeNotSet(String), | ||||
|     SyntaxError(String), | ||||
|     UnexpectedToken { found: String, expected: &'static [&'static str] }, | ||||
|     NoFacetSet, | ||||
| } | ||||
|  | ||||
| impl error::Error for FacetCountError {} | ||||
|  | ||||
| impl ErrorCode for FacetCountError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         Code::BadRequest | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl FacetCountError { | ||||
|     pub fn unexpected_token(found: impl ToString, expected: &'static [&'static str]) -> FacetCountError { | ||||
|         let found = found.to_string(); | ||||
|         FacetCountError::UnexpectedToken { expected, found } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<serde_json::error::Error> for FacetCountError { | ||||
|     fn from(other: serde_json::error::Error) -> FacetCountError { | ||||
|         FacetCountError::SyntaxError(other.to_string()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for FacetCountError { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         use FacetCountError::*; | ||||
|  | ||||
|         match self { | ||||
|             AttributeNotSet(attr) => write!(f, "Attribute {} is not set as facet", attr), | ||||
|             SyntaxError(msg) => write!(f, "Syntax error: {}", msg), | ||||
|             UnexpectedToken { expected, found } => write!(f, "Unexpected {} found, expected {:?}", found, expected), | ||||
|             NoFacetSet => write!(f, "Can't perform facet count, as no facet is set"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Error { | ||||
|     pub fn internal(err: impl fmt::Display) -> Error { | ||||
|         Error::Internal(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn bad_request(err: impl fmt::Display) -> Error { | ||||
|         Error::BadRequest(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn missing_authorization_header() -> Error { | ||||
|         Error::MissingAuthorizationHeader | ||||
|     } | ||||
|  | ||||
|     pub fn invalid_token(err: impl fmt::Display) -> Error { | ||||
|         Error::InvalidToken(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn not_found(err: impl fmt::Display) -> Error { | ||||
|         Error::NotFound(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn index_not_found(err: impl fmt::Display) -> Error { | ||||
|         Error::IndexNotFound(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn document_not_found(err: impl fmt::Display) -> Error { | ||||
|         Error::DocumentNotFound(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn bad_parameter(param: impl fmt::Display, err: impl fmt::Display) -> Error { | ||||
|         Error::BadParameter(param.to_string(), err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn open_index(err: impl fmt::Display) -> Error { | ||||
|         Error::OpenIndex(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn create_index(err: impl fmt::Display) -> Error { | ||||
|         Error::CreateIndex(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn invalid_index_uid() -> Error { | ||||
|         Error::InvalidIndexUid | ||||
|     } | ||||
|  | ||||
|     pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> Error { | ||||
|         Error::RetrieveDocument(doc_id, err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn search_documents(err: impl fmt::Display) -> Error { | ||||
|         Error::SearchDocuments(err.to_string()) | ||||
|     } | ||||
|  | ||||
|     pub fn dump_conflict() -> Error { | ||||
|         Error::DumpAlreadyInProgress | ||||
|     } | ||||
|  | ||||
|     pub fn dump_failed(message: String) -> Error { | ||||
|         Error::DumpProcessFailed(message) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for Error { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         match self { | ||||
|             Self::BadParameter(param, err) => write!(f, "Url parameter {} error: {}", param, err), | ||||
|             Self::BadRequest(err) => f.write_str(err), | ||||
|             Self::CreateIndex(err) => write!(f, "Impossible to create index; {}", err), | ||||
|             Self::DocumentNotFound(document_id) => write!(f, "Document with id {} not found", document_id), | ||||
|             Self::IndexNotFound(index_uid) => write!(f, "Index {} not found", index_uid), | ||||
|             Self::IndexAlreadyExists(index_uid) => write!(f, "Index {} already exists", index_uid), | ||||
|             Self::Internal(err) => f.write_str(err), | ||||
|             Self::InvalidIndexUid => f.write_str("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_)."), | ||||
|             Self::InvalidToken(err) => write!(f, "Invalid API key: {}", err), | ||||
|             Self::MissingAuthorizationHeader => f.write_str("You must have an authorization token"), | ||||
|             Self::NotFound(err) => write!(f, "{} not found", err), | ||||
|             Self::OpenIndex(err) => write!(f, "Impossible to open index; {}", err), | ||||
|             Self::RetrieveDocument(id, err) => write!(f, "Impossible to retrieve the document with id: {}; {}", id, err), | ||||
|             Self::SearchDocuments(err) => write!(f, "Impossible to search documents; {}", err), | ||||
|             Self::PayloadTooLarge => f.write_str("Payload too large"), | ||||
|             Self::UnsupportedMediaType => f.write_str("Unsupported media type"), | ||||
|             Self::DumpAlreadyInProgress => f.write_str("Another dump is already in progress"), | ||||
|             Self::DumpProcessFailed(message) => write!(f, "Dump process failed: {}", message), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<std::io::Error> for Error { | ||||
|     fn from(err: std::io::Error) -> Error { | ||||
|         Error::Internal(err.to_string()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<actix_http::Error> for Error { | ||||
|     fn from(err: actix_http::Error) -> Error { | ||||
|         Error::Internal(err.to_string()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<serde_json::error::Error> for Error { | ||||
|     fn from(err: serde_json::error::Error) -> Error { | ||||
|         Error::Internal(err.to_string()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<JsonPayloadError> for Error { | ||||
|     fn from(err: JsonPayloadError) -> Error { | ||||
|         match err { | ||||
|             JsonPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid JSON: {}", err)), | ||||
|             JsonPayloadError::Overflow => Error::PayloadTooLarge, | ||||
|             JsonPayloadError::ContentType => Error::UnsupportedMediaType, | ||||
|             JsonPayloadError::Payload(err) => Error::BadRequest(format!("Problem while decoding the request: {}", err)), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<QueryPayloadError> for Error { | ||||
|     fn from(err: QueryPayloadError) -> Error { | ||||
|         match err { | ||||
|             QueryPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid query parameters: {}", err)), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn payload_error_handler<E: Into<Error>>(err: E) -> ResponseError { | ||||
|     let error: Error = err.into(); | ||||
|     error.into() | ||||
| } | ||||
							
								
								
									
										103
									
								
								meilisearch-http/src/helpers/authentication.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								meilisearch-http/src/helpers/authentication.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| use std::cell::RefCell; | ||||
| use std::pin::Pin; | ||||
| use std::rc::Rc; | ||||
| use std::task::{Context, Poll}; | ||||
|  | ||||
| use actix_service::{Service, Transform}; | ||||
| use actix_web::{dev::ServiceRequest, dev::ServiceResponse, web}; | ||||
| use futures::future::{err, ok, Future, Ready}; | ||||
|  | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::Data; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub enum Authentication { | ||||
|     Public, | ||||
|     Private, | ||||
|     Admin, | ||||
| } | ||||
|  | ||||
| impl<S: 'static, B> Transform<S> for Authentication | ||||
| where | ||||
|     S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>, | ||||
|     S::Future: 'static, | ||||
|     B: 'static, | ||||
| { | ||||
|     type Request = ServiceRequest; | ||||
|     type Response = ServiceResponse<B>; | ||||
|     type Error = actix_web::Error; | ||||
|     type InitError = (); | ||||
|     type Transform = LoggingMiddleware<S>; | ||||
|     type Future = Ready<Result<Self::Transform, Self::InitError>>; | ||||
|  | ||||
|     fn new_transform(&self, service: S) -> Self::Future { | ||||
|         ok(LoggingMiddleware { | ||||
|             acl: self.clone(), | ||||
|             service: Rc::new(RefCell::new(service)), | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct LoggingMiddleware<S> { | ||||
|     acl: Authentication, | ||||
|     service: Rc<RefCell<S>>, | ||||
| } | ||||
|  | ||||
| #[allow(clippy::type_complexity)] | ||||
| impl<S, B> Service for LoggingMiddleware<S> | ||||
| where | ||||
|     S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static, | ||||
|     S::Future: 'static, | ||||
|     B: 'static, | ||||
| { | ||||
|     type Request = ServiceRequest; | ||||
|     type Response = ServiceResponse<B>; | ||||
|     type Error = actix_web::Error; | ||||
|     type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>>>>; | ||||
|  | ||||
|     fn poll_ready(&mut self, cx: &mut Context) -> Poll<Result<(), Self::Error>> { | ||||
|         self.service.poll_ready(cx) | ||||
|     } | ||||
|  | ||||
|     fn call(&mut self, req: ServiceRequest) -> Self::Future { | ||||
|         let mut svc = self.service.clone(); | ||||
|         // This unwrap is left because this error should never appear. If that's the case, then | ||||
|         // it means that actix-web has an issue or someone changes the type `Data`. | ||||
|         let data = req.app_data::<web::Data<Data>>().unwrap(); | ||||
|  | ||||
|         if data.api_keys().master.is_none() { | ||||
|             return Box::pin(svc.call(req)); | ||||
|         } | ||||
|  | ||||
|         let auth_header = match req.headers().get("X-Meili-API-Key") { | ||||
|             Some(auth) => match auth.to_str() { | ||||
|                 Ok(auth) => auth, | ||||
|                 Err(_) => return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into())), | ||||
|             }, | ||||
|             None => { | ||||
|                 return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into())); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         let authenticated = match self.acl { | ||||
|             Authentication::Admin => data.api_keys().master.as_deref() == Some(auth_header), | ||||
|             Authentication::Private => { | ||||
|                 data.api_keys().master.as_deref() == Some(auth_header) | ||||
|                     || data.api_keys().private.as_deref() == Some(auth_header) | ||||
|             } | ||||
|             Authentication::Public => { | ||||
|                 data.api_keys().master.as_deref() == Some(auth_header) | ||||
|                     || data.api_keys().private.as_deref() == Some(auth_header) | ||||
|                     || data.api_keys().public.as_deref() == Some(auth_header) | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         if authenticated { | ||||
|             Box::pin(svc.call(req)) | ||||
|         } else { | ||||
|             Box::pin(err( | ||||
|                 ResponseError::from(Error::InvalidToken(auth_header.to_string())).into() | ||||
|             )) | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										27
									
								
								meilisearch-http/src/helpers/compression.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								meilisearch-http/src/helpers/compression.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| use flate2::Compression; | ||||
| use flate2::read::GzDecoder; | ||||
| use flate2::write::GzEncoder; | ||||
| use std::fs::{create_dir_all, File}; | ||||
| use std::path::Path; | ||||
| use tar::{Builder, Archive}; | ||||
|  | ||||
| use crate::error::Error; | ||||
|  | ||||
| pub fn to_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> { | ||||
|     let f = File::create(dest)?; | ||||
|     let gz_encoder = GzEncoder::new(f, Compression::default()); | ||||
|     let mut tar_encoder = Builder::new(gz_encoder); | ||||
|     tar_encoder.append_dir_all(".", src)?; | ||||
|     let gz_encoder = tar_encoder.into_inner()?; | ||||
|     gz_encoder.finish()?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn from_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> { | ||||
|     let f = File::open(src)?; | ||||
|     let gz = GzDecoder::new(f); | ||||
|     let mut ar = Archive::new(gz); | ||||
|     create_dir_all(dest)?; | ||||
|     ar.unpack(dest)?; | ||||
|     Ok(()) | ||||
| } | ||||
							
								
								
									
										6
									
								
								meilisearch-http/src/helpers/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								meilisearch-http/src/helpers/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| pub mod authentication; | ||||
| pub mod normalize_path; | ||||
| pub mod compression; | ||||
|  | ||||
| pub use authentication::Authentication; | ||||
| pub use normalize_path::NormalizePath; | ||||
							
								
								
									
										86
									
								
								meilisearch-http/src/helpers/normalize_path.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								meilisearch-http/src/helpers/normalize_path.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| /// From https://docs.rs/actix-web/3.0.0-alpha.2/src/actix_web/middleware/normalize.rs.html#34 | ||||
| use actix_http::Error; | ||||
| use actix_service::{Service, Transform}; | ||||
| use actix_web::{ | ||||
|     dev::ServiceRequest, | ||||
|     dev::ServiceResponse, | ||||
|     http::uri::{PathAndQuery, Uri}, | ||||
| }; | ||||
| use futures::future::{ok, Ready}; | ||||
| use regex::Regex; | ||||
| use std::task::{Context, Poll}; | ||||
| pub struct NormalizePath; | ||||
|  | ||||
| impl<S, B> Transform<S> for NormalizePath | ||||
| where | ||||
|     S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>, | ||||
|     S::Future: 'static, | ||||
| { | ||||
|     type Request = ServiceRequest; | ||||
|     type Response = ServiceResponse<B>; | ||||
|     type Error = Error; | ||||
|     type InitError = (); | ||||
|     type Transform = NormalizePathNormalization<S>; | ||||
|     type Future = Ready<Result<Self::Transform, Self::InitError>>; | ||||
|  | ||||
|     fn new_transform(&self, service: S) -> Self::Future { | ||||
|         ok(NormalizePathNormalization { | ||||
|             service, | ||||
|             merge_slash: Regex::new("//+").unwrap(), | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct NormalizePathNormalization<S> { | ||||
|     service: S, | ||||
|     merge_slash: Regex, | ||||
| } | ||||
|  | ||||
| impl<S, B> Service for NormalizePathNormalization<S> | ||||
| where | ||||
|     S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>, | ||||
|     S::Future: 'static, | ||||
| { | ||||
|     type Request = ServiceRequest; | ||||
|     type Response = ServiceResponse<B>; | ||||
|     type Error = Error; | ||||
|     type Future = S::Future; | ||||
|  | ||||
|     fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> { | ||||
|         self.service.poll_ready(cx) | ||||
|     } | ||||
|  | ||||
|     fn call(&mut self, mut req: ServiceRequest) -> Self::Future { | ||||
|         let head = req.head_mut(); | ||||
|  | ||||
|         // always add trailing slash, might be an extra one | ||||
|         let path = head.uri.path().to_string() + "/"; | ||||
|  | ||||
|         if self.merge_slash.find(&path).is_some() { | ||||
|             // normalize multiple /'s to one / | ||||
|             let path = self.merge_slash.replace_all(&path, "/"); | ||||
|  | ||||
|             let path = if path.len() > 1 { | ||||
|                 path.trim_end_matches('/') | ||||
|             } else { | ||||
|                 &path | ||||
|             }; | ||||
|  | ||||
|             let mut parts = head.uri.clone().into_parts(); | ||||
|             let pq = parts.path_and_query.as_ref().unwrap(); | ||||
|  | ||||
|             let path = if let Some(q) = pq.query() { | ||||
|                 bytes::Bytes::from(format!("{}?{}", path, q)) | ||||
|             } else { | ||||
|                 bytes::Bytes::copy_from_slice(path.as_bytes()) | ||||
|             }; | ||||
|             parts.path_and_query = Some(PathAndQuery::from_maybe_shared(path).unwrap()); | ||||
|  | ||||
|             let uri = Uri::from_parts(parts).unwrap(); | ||||
|             req.match_info_mut().get_mut().update(&uri); | ||||
|             req.head_mut().uri = uri; | ||||
|         } | ||||
|  | ||||
|         self.service.call(req) | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,260 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::io; | ||||
| use std::fs::File; | ||||
|  | ||||
| use anyhow::Result; | ||||
| use flate2::read::GzDecoder; | ||||
| use grenad::CompressionType; | ||||
| use log::info; | ||||
| use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; | ||||
| use milli::Index; | ||||
| use rayon::ThreadPool; | ||||
|  | ||||
| use crate::index_controller::updates::{Failed, Processed, Processing}; | ||||
| use crate::index_controller::{Facets, Settings, UpdateMeta, UpdateResult}; | ||||
| use crate::option::IndexerOpts; | ||||
|  | ||||
| pub struct UpdateHandler { | ||||
|     max_nb_chunks: Option<usize>, | ||||
|     chunk_compression_level: Option<u32>, | ||||
|     thread_pool: ThreadPool, | ||||
|     log_frequency: usize, | ||||
|     max_memory: usize, | ||||
|     linked_hash_map_size: usize, | ||||
|     chunk_compression_type: CompressionType, | ||||
|     chunk_fusing_shrink_size: u64, | ||||
| } | ||||
|  | ||||
| impl UpdateHandler { | ||||
|     pub fn new( | ||||
|         opt: &IndexerOpts, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let thread_pool = rayon::ThreadPoolBuilder::new() | ||||
|             .num_threads(opt.indexing_jobs.unwrap_or(0)) | ||||
|             .build()?; | ||||
|         Ok(Self { | ||||
|             max_nb_chunks: opt.max_nb_chunks, | ||||
|             chunk_compression_level: opt.chunk_compression_level, | ||||
|             thread_pool, | ||||
|             log_frequency: opt.log_every_n, | ||||
|             max_memory: opt.max_memory.get_bytes() as usize, | ||||
|             linked_hash_map_size: opt.linked_hash_map_size, | ||||
|             chunk_compression_type: opt.chunk_compression_type, | ||||
|             chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn update_buidler(&self, update_id: u64) -> UpdateBuilder { | ||||
|         // We prepare the update by using the update builder. | ||||
|         let mut update_builder = UpdateBuilder::new(update_id); | ||||
|         if let Some(max_nb_chunks) = self.max_nb_chunks { | ||||
|             update_builder.max_nb_chunks(max_nb_chunks); | ||||
|         } | ||||
|         if let Some(chunk_compression_level) = self.chunk_compression_level { | ||||
|             update_builder.chunk_compression_level(chunk_compression_level); | ||||
|         } | ||||
|         update_builder.thread_pool(&self.thread_pool); | ||||
|         update_builder.log_every_n(self.log_frequency); | ||||
|         update_builder.max_memory(self.max_memory); | ||||
|         update_builder.linked_hash_map_size(self.linked_hash_map_size); | ||||
|         update_builder.chunk_compression_type(self.chunk_compression_type); | ||||
|         update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size); | ||||
|         update_builder | ||||
|     } | ||||
|  | ||||
|     fn update_documents( | ||||
|         &self, | ||||
|         format: UpdateFormat, | ||||
|         method: IndexDocumentsMethod, | ||||
|         content: File, | ||||
|         update_builder: UpdateBuilder, | ||||
|         primary_key: Option<&str>, | ||||
|         index: &Index, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         info!("performing document addition"); | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = index.write_txn()?; | ||||
|  | ||||
|         // Set the primary key if not set already, ignore if already set. | ||||
|         match (index.primary_key(&wtxn)?, primary_key) { | ||||
|             (None, Some(ref primary_key)) => { | ||||
|                 index.put_primary_key(&mut wtxn, primary_key)?; | ||||
|             } | ||||
|             _ => (), | ||||
|         } | ||||
|  | ||||
|         let mut builder = update_builder.index_documents(&mut wtxn, index); | ||||
|         builder.update_format(format); | ||||
|         builder.index_documents_method(method); | ||||
|  | ||||
|         let gzipped = false; | ||||
|         let reader = if gzipped { | ||||
|             Box::new(GzDecoder::new(content)) | ||||
|         } else { | ||||
|             Box::new(content) as Box<dyn io::Read> | ||||
|         }; | ||||
|  | ||||
|         let result = builder.execute(reader, |indexing_step, update_id| { | ||||
|             info!("update {}: {:?}", update_id, indexing_step) | ||||
|         }); | ||||
|  | ||||
|         info!("document addition done: {:?}", result); | ||||
|  | ||||
|         match result { | ||||
|             Ok(addition_result) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::DocumentsAddition(addition_result))) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn clear_documents(&self, update_builder: UpdateBuilder, index: &Index) -> anyhow::Result<UpdateResult> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = index.write_txn()?; | ||||
|         let builder = update_builder.clear_documents(&mut wtxn, index); | ||||
|  | ||||
|         match builder.execute() { | ||||
|             Ok(_count) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::Other)) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn update_settings( | ||||
|         &self, | ||||
|         settings: &Settings, | ||||
|         update_builder: UpdateBuilder, | ||||
|         index: &Index, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = index.write_txn()?; | ||||
|         let mut builder = update_builder.settings(&mut wtxn, index); | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref names) = settings.searchable_attributes { | ||||
|             match names { | ||||
|                 Some(names) => builder.set_searchable_fields(names.clone()), | ||||
|                 None => builder.reset_searchable_fields(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref names) = settings.displayed_attributes { | ||||
|             match names { | ||||
|                 Some(names) => builder.set_displayed_fields(names.clone()), | ||||
|                 None => builder.reset_displayed_fields(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref facet_types) = settings.faceted_attributes { | ||||
|             let facet_types = facet_types.clone().unwrap_or_else(|| HashMap::new()); | ||||
|             builder.set_faceted_fields(facet_types); | ||||
|         } | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref criteria) = settings.criteria { | ||||
|             match criteria { | ||||
|                 Some(criteria) => builder.set_criteria(criteria.clone()), | ||||
|                 None => builder.reset_criteria(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let result = builder | ||||
|             .execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step)); | ||||
|  | ||||
|         match result { | ||||
|             Ok(()) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::Other)) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn update_facets( | ||||
|         &self, | ||||
|         levels: &Facets, | ||||
|         update_builder: UpdateBuilder, | ||||
|         index: &Index, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = index.write_txn()?; | ||||
|         let mut builder = update_builder.facets(&mut wtxn, index); | ||||
|         if let Some(value) = levels.level_group_size { | ||||
|             builder.level_group_size(value); | ||||
|         } | ||||
|         if let Some(value) = levels.min_level_size { | ||||
|             builder.min_level_size(value); | ||||
|         } | ||||
|         match builder.execute() { | ||||
|             Ok(()) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::Other)) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn delete_documents( | ||||
|         &self, | ||||
|         document_ids: File, | ||||
|         update_builder: UpdateBuilder, | ||||
|         index: &Index, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         let ids: Vec<String> = serde_json::from_reader(document_ids)?; | ||||
|         let mut txn = index.write_txn()?; | ||||
|         let mut builder = update_builder.delete_documents(&mut txn, index)?; | ||||
|  | ||||
|         // We ignore unexisting document ids | ||||
|         ids.iter().for_each(|id| { builder.delete_external_id(id); }); | ||||
|  | ||||
|         match builder.execute() { | ||||
|             Ok(deleted) => txn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::DocumentDeletion { deleted })) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn handle_update( | ||||
|         &self, | ||||
|         meta: Processing<UpdateMeta>, | ||||
|         content: File, | ||||
|         index: &Index, | ||||
|     ) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> { | ||||
|         use UpdateMeta::*; | ||||
|  | ||||
|         let update_id = meta.id(); | ||||
|  | ||||
|         let update_builder = self.update_buidler(update_id); | ||||
|  | ||||
|         let result = match meta.meta() { | ||||
|             DocumentsAddition { | ||||
|                 method, | ||||
|                 format, | ||||
|                 primary_key, | ||||
|             } => self.update_documents( | ||||
|                 *format, | ||||
|                 *method, | ||||
|                 content, | ||||
|                 update_builder, | ||||
|                 primary_key.as_deref(), | ||||
|                 index, | ||||
|             ), | ||||
|             ClearDocuments => self.clear_documents(update_builder, index), | ||||
|             DeleteDocuments => self.delete_documents(content, update_builder, index), | ||||
|             Settings(settings) => self.update_settings(settings, update_builder, index), | ||||
|             Facets(levels) => self.update_facets(levels, update_builder, index), | ||||
|         }; | ||||
|  | ||||
|         match result { | ||||
|             Ok(result) => Ok(meta.process(result)), | ||||
|             Err(e) => Err(meta.fail(e.to_string())), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,423 @@ | ||||
| use std::path::Path; | ||||
| use std::sync::{Arc, RwLock}; | ||||
| use std::io::{Cursor, SeekFrom, Seek}; | ||||
|  | ||||
| use crossbeam_channel::Sender; | ||||
| use heed::types::{OwnedType, DecodeIgnore, SerdeJson, ByteSlice}; | ||||
| use heed::{EnvOpenOptions, Env, Database}; | ||||
| use serde::{Serialize, Deserialize}; | ||||
| use std::fs::File; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index_controller::updates::*; | ||||
|  | ||||
| type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct UpdateStore<M, N, E> { | ||||
|     env: Env, | ||||
|     pending_meta: Database<OwnedType<BEU64>, SerdeJson<Pending<M>>>, | ||||
|     pending: Database<OwnedType<BEU64>, ByteSlice>, | ||||
|     processed_meta: Database<OwnedType<BEU64>, SerdeJson<Processed<M, N>>>, | ||||
|     failed_meta: Database<OwnedType<BEU64>, SerdeJson<Failed<M, E>>>, | ||||
|     aborted_meta: Database<OwnedType<BEU64>, SerdeJson<Aborted<M>>>, | ||||
|     processing: Arc<RwLock<Option<Processing<M>>>>, | ||||
|     notification_sender: Sender<()>, | ||||
| } | ||||
|  | ||||
| pub trait HandleUpdate<M, N, E> { | ||||
|     fn handle_update(&mut self, meta: Processing<M>, content: File) -> Result<Processed<M, N>, Failed<M, E>>; | ||||
| } | ||||
|  | ||||
| impl<M, N, E, F> HandleUpdate<M, N, E> for F | ||||
| where F: FnMut(Processing<M>, File) -> Result<Processed<M, N>, Failed<M, E>> | ||||
| { | ||||
|     fn handle_update(&mut self, meta: Processing<M>, content: File) -> Result<Processed<M, N>, Failed<M, E>> { | ||||
|         self(meta, content) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<M, N, E> UpdateStore<M, N, E> | ||||
| where | ||||
|     M: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync + Clone, | ||||
|     N: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync, | ||||
|     E: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync, | ||||
| { | ||||
|     pub fn open<P, U>( | ||||
|         mut options: EnvOpenOptions, | ||||
|         path: P, | ||||
|         mut update_handler: U, | ||||
|     ) -> heed::Result<Arc<Self>> | ||||
|     where | ||||
|         P: AsRef<Path>, | ||||
|         U: HandleUpdate<M, N, E> + Send + 'static, | ||||
|     { | ||||
|         options.max_dbs(5); | ||||
|  | ||||
|         let env = options.open(path)?; | ||||
|         let pending_meta = env.create_database(Some("pending-meta"))?; | ||||
|         let pending = env.create_database(Some("pending"))?; | ||||
|         let processed_meta = env.create_database(Some("processed-meta"))?; | ||||
|         let aborted_meta = env.create_database(Some("aborted-meta"))?; | ||||
|         let failed_meta = env.create_database(Some("failed-meta"))?; | ||||
|         let processing = Arc::new(RwLock::new(None)); | ||||
|  | ||||
|         let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1); | ||||
|         // Send a first notification to trigger the process. | ||||
|         let _ = notification_sender.send(()); | ||||
|  | ||||
|         let update_store = Arc::new(UpdateStore { | ||||
|             env, | ||||
|             pending, | ||||
|             pending_meta, | ||||
|             processed_meta, | ||||
|             aborted_meta, | ||||
|             notification_sender, | ||||
|             failed_meta, | ||||
|             processing, | ||||
|         }); | ||||
|  | ||||
|         // We need a weak reference so we can take ownership on the arc later when we | ||||
|         // want to close the index. | ||||
|         let update_store_weak = Arc::downgrade(&update_store); | ||||
|         std::thread::spawn(move || { | ||||
|             // Block and wait for something to process. | ||||
|             'outer: for _ in notification_receiver { | ||||
|                 loop { | ||||
|                     match update_store_weak.upgrade() { | ||||
|                         Some(update_store) => { | ||||
|                             match update_store.process_pending_update(&mut update_handler) { | ||||
|                                 Ok(Some(_)) => (), | ||||
|                                 Ok(None) => break, | ||||
|                                 Err(e) => eprintln!("error while processing update: {}", e), | ||||
|                             } | ||||
|                         } | ||||
|                         // the ownership on the arc has been taken, we need to exit. | ||||
|                         None => break 'outer, | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         }); | ||||
|  | ||||
|         Ok(update_store) | ||||
|     } | ||||
|  | ||||
|     pub fn prepare_for_closing(self) -> heed::EnvClosingEvent { | ||||
|         self.env.prepare_for_closing() | ||||
|     } | ||||
|  | ||||
|     /// Returns the new biggest id to use to store the new update. | ||||
|     fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> { | ||||
|         let last_pending = self.pending_meta | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .last(txn)? | ||||
|             .map(|(k, _)| k.get()); | ||||
|  | ||||
|         let last_processed = self.processed_meta | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .last(txn)? | ||||
|             .map(|(k, _)| k.get()); | ||||
|  | ||||
|         let last_aborted = self.aborted_meta | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .last(txn)? | ||||
|             .map(|(k, _)| k.get()); | ||||
|  | ||||
|         let last_update_id = [last_pending, last_processed, last_aborted] | ||||
|             .iter() | ||||
|             .copied() | ||||
|             .flatten() | ||||
|             .max(); | ||||
|  | ||||
|         match last_update_id { | ||||
|             Some(last_id) => Ok(last_id + 1), | ||||
|             None => Ok(0), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Registers the update content in the pending store and the meta | ||||
|     /// into the pending-meta store. Returns the new unique update id. | ||||
|     pub fn register_update( | ||||
|         &self, | ||||
|         meta: M, | ||||
|         content: &[u8], | ||||
|         index_uuid: Uuid, | ||||
|     ) -> heed::Result<Pending<M>> { | ||||
|         let mut wtxn = self.env.write_txn()?; | ||||
|  | ||||
|         // We ask the update store to give us a new update id, this is safe, | ||||
|         // no other update can have the same id because we use a write txn before | ||||
|         // asking for the id and registering it so other update registering | ||||
|         // will be forced to wait for a new write txn. | ||||
|         let update_id = self.new_update_id(&wtxn)?; | ||||
|         let update_key = BEU64::new(update_id); | ||||
|  | ||||
|         let meta = Pending::new(meta, update_id, index_uuid); | ||||
|         self.pending_meta.put(&mut wtxn, &update_key, &meta)?; | ||||
|         self.pending.put(&mut wtxn, &update_key, content)?; | ||||
|  | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         if let Err(e) = self.notification_sender.try_send(()) { | ||||
|             assert!(!e.is_disconnected(), "update notification channel is disconnected"); | ||||
|         } | ||||
|         Ok(meta) | ||||
|     } | ||||
|     /// Executes the user provided function on the next pending update (the one with the lowest id). | ||||
|     /// This is asynchronous as it let the user process the update with a read-only txn and | ||||
|     /// only writing the result meta to the processed-meta store *after* it has been processed. | ||||
|     fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<()>> | ||||
|     where | ||||
|         U: HandleUpdate<M, N, E> + Send + 'static, | ||||
|     { | ||||
|         // Create a read transaction to be able to retrieve the pending update in order. | ||||
|         let rtxn = self.env.read_txn()?; | ||||
|         let first_meta = self.pending_meta.first(&rtxn)?; | ||||
|  | ||||
|         // If there is a pending update we process and only keep | ||||
|         // a reader while processing it, not a writer. | ||||
|         match first_meta { | ||||
|             Some((first_id, pending)) => { | ||||
|                 let first_content = self.pending | ||||
|                     .get(&rtxn, &first_id)? | ||||
|                     .expect("associated update content"); | ||||
|  | ||||
|                 // we change the state of the update from pending to processing before we pass it | ||||
|                 // to the update handler. Processing store is non persistent to be able recover | ||||
|                 // from a failure | ||||
|                 let processing = pending.processing(); | ||||
|                 self.processing | ||||
|                     .write() | ||||
|                     .unwrap() | ||||
|                     .replace(processing.clone()); | ||||
|                 let mut cursor = Cursor::new(first_content); | ||||
|                 let mut file = tempfile::tempfile()?; | ||||
|                 std::io::copy(&mut cursor, &mut file)?; | ||||
|                 file.seek(SeekFrom::Start(0))?; | ||||
|                 // Process the pending update using the provided user function. | ||||
|                 let result = handler.handle_update(processing, file); | ||||
|                 drop(rtxn); | ||||
|  | ||||
|                 // Once the pending update have been successfully processed | ||||
|                 // we must remove the content from the pending and processing stores and | ||||
|                 // write the *new* meta to the processed-meta store and commit. | ||||
|                 let mut wtxn = self.env.write_txn()?; | ||||
|                 self.processing | ||||
|                     .write() | ||||
|                     .unwrap() | ||||
|                     .take(); | ||||
|                 self.pending_meta.delete(&mut wtxn, &first_id)?; | ||||
|                 self.pending.delete(&mut wtxn, &first_id)?; | ||||
|                 match result { | ||||
|                     Ok(processed) => self.processed_meta.put(&mut wtxn, &first_id, &processed)?, | ||||
|                     Err(failed) => self.failed_meta.put(&mut wtxn, &first_id, &failed)?, | ||||
|                 } | ||||
|                 wtxn.commit()?; | ||||
|  | ||||
|                 Ok(Some(())) | ||||
|             }, | ||||
|             None => Ok(None) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Execute the user defined function with the meta-store iterators, the first | ||||
|     /// iterator is the *processed* meta one, the second the *aborted* meta one | ||||
|     /// and, the last is the *pending* meta one. | ||||
|     pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T> | ||||
|     where | ||||
|         F: for<'a> FnMut( | ||||
|             Option<Processing<M>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Processed<M, N>>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Aborted<M>>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Pending<M>>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Failed<M, E>>>, | ||||
|         ) -> heed::Result<T>, | ||||
|     { | ||||
|         let rtxn = self.env.read_txn()?; | ||||
|  | ||||
|         // We get the pending, processed and aborted meta iterators. | ||||
|         let processed_iter = self.processed_meta.iter(&rtxn)?; | ||||
|         let aborted_iter = self.aborted_meta.iter(&rtxn)?; | ||||
|         let pending_iter = self.pending_meta.iter(&rtxn)?; | ||||
|         let processing = self.processing.read().unwrap().clone(); | ||||
|         let failed_iter = self.failed_meta.iter(&rtxn)?; | ||||
|  | ||||
|         // We execute the user defined function with both iterators. | ||||
|         (f)(processing, processed_iter, aborted_iter, pending_iter, failed_iter) | ||||
|     } | ||||
|  | ||||
|     /// Returns the update associated meta or `None` if the update doesn't exist. | ||||
|     pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatus<M, N, E>>> { | ||||
|         let rtxn = self.env.read_txn()?; | ||||
|         let key = BEU64::new(update_id); | ||||
|  | ||||
|         if let Some(ref meta) = *self.processing.read().unwrap() { | ||||
|             if meta.id() == update_id { | ||||
|                 return Ok(Some(UpdateStatus::Processing(meta.clone()))); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.pending_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Pending(meta))); | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.processed_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Processed(meta))); | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Aborted(meta))); | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.failed_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Failed(meta))); | ||||
|         } | ||||
|  | ||||
|         Ok(None) | ||||
|     } | ||||
|  | ||||
|     /// Aborts an update, an aborted update content is deleted and | ||||
|     /// the meta of it is moved into the aborted updates database. | ||||
|     /// | ||||
|     /// Trying to abort an update that is currently being processed, an update | ||||
|     /// that as already been processed or which doesn't actually exist, will | ||||
|     /// return `None`. | ||||
|     #[allow(dead_code)] | ||||
|     pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<Aborted<M>>> { | ||||
|         let mut wtxn = self.env.write_txn()?; | ||||
|         let key = BEU64::new(update_id); | ||||
|  | ||||
|         // We cannot abort an update that is currently being processed. | ||||
|         if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) { | ||||
|             return Ok(None); | ||||
|         } | ||||
|  | ||||
|         let pending = match self.pending_meta.get(&wtxn, &key)? { | ||||
|             Some(meta) => meta, | ||||
|             None => return Ok(None), | ||||
|         }; | ||||
|  | ||||
|         let aborted = pending.abort(); | ||||
|  | ||||
|         self.aborted_meta.put(&mut wtxn, &key, &aborted)?; | ||||
|         self.pending_meta.delete(&mut wtxn, &key)?; | ||||
|         self.pending.delete(&mut wtxn, &key)?; | ||||
|  | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         Ok(Some(aborted)) | ||||
|     } | ||||
|  | ||||
|     /// Aborts all the pending updates, and not the one being currently processed. | ||||
|     /// Returns the update metas and ids that were successfully aborted. | ||||
|     #[allow(dead_code)] | ||||
|     pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, Aborted<M>)>> { | ||||
|         let mut wtxn = self.env.write_txn()?; | ||||
|         let mut aborted_updates = Vec::new(); | ||||
|  | ||||
|         // We skip the first pending update as it is currently being processed. | ||||
|         for result in self.pending_meta.iter(&wtxn)?.skip(1) { | ||||
|             let (key, pending) = result?; | ||||
|             let id = key.get(); | ||||
|             aborted_updates.push((id, pending.abort())); | ||||
|         } | ||||
|  | ||||
|         for (id, aborted) in &aborted_updates { | ||||
|             let key = BEU64::new(*id); | ||||
|             self.aborted_meta.put(&mut wtxn, &key, &aborted)?; | ||||
|             self.pending_meta.delete(&mut wtxn, &key)?; | ||||
|             self.pending.delete(&mut wtxn, &key)?; | ||||
|         } | ||||
|  | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         Ok(aborted_updates) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|     use std::thread; | ||||
|     use std::time::{Duration, Instant}; | ||||
|  | ||||
|     impl<M, N, F, E> HandleUpdate<M, N, E> for F | ||||
|         where F: FnMut(Processing<M>, &[u8]) -> Result<Processed<M, N>, Failed<M, E>> + Send + 'static { | ||||
|             fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>> { | ||||
|                 self(meta, content) | ||||
|             } | ||||
|         } | ||||
|  | ||||
|     #[test] | ||||
|     fn simple() { | ||||
|         let dir = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(4096 * 100); | ||||
|         let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> { | ||||
|             let new_meta = meta.meta().to_string() + " processed"; | ||||
|             let processed = meta.process(new_meta); | ||||
|             Ok(processed) | ||||
|         }).unwrap(); | ||||
|  | ||||
|         let meta = String::from("kiki"); | ||||
|         let update = update_store.register_update(meta, &[]).unwrap(); | ||||
|         thread::sleep(Duration::from_millis(100)); | ||||
|         let meta = update_store.meta(update.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "kiki processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     #[ignore] | ||||
|     fn long_running_update() { | ||||
|         let dir = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(4096 * 100); | ||||
|         let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content:&_| -> Result<_, Failed<_, ()>> { | ||||
|             thread::sleep(Duration::from_millis(400)); | ||||
|             let new_meta = meta.meta().to_string() + "processed"; | ||||
|             let processed = meta.process(new_meta); | ||||
|             Ok(processed) | ||||
|         }).unwrap(); | ||||
|  | ||||
|         let before_register = Instant::now(); | ||||
|  | ||||
|         let meta = String::from("kiki"); | ||||
|         let update_kiki = update_store.register_update(meta, &[]).unwrap(); | ||||
|         assert!(before_register.elapsed() < Duration::from_millis(200)); | ||||
|  | ||||
|         let meta = String::from("coco"); | ||||
|         let update_coco = update_store.register_update(meta, &[]).unwrap(); | ||||
|         assert!(before_register.elapsed() < Duration::from_millis(200)); | ||||
|  | ||||
|         let meta = String::from("cucu"); | ||||
|         let update_cucu = update_store.register_update(meta, &[]).unwrap(); | ||||
|         assert!(before_register.elapsed() < Duration::from_millis(200)); | ||||
|  | ||||
|         thread::sleep(Duration::from_millis(400 * 3 + 100)); | ||||
|  | ||||
|         let meta = update_store.meta(update_kiki.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "kiki processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|  | ||||
|         let meta = update_store.meta(update_coco.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "coco processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|  | ||||
|         let meta = update_store.meta(update_cucu.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "cucu processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,607 @@ | ||||
| use std::fs::{create_dir_all, remove_dir_all}; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use anyhow::{bail, Context}; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use dashmap::{mapref::entry::Entry, DashMap}; | ||||
| use heed::{ | ||||
|     types::{ByteSlice, SerdeJson, Str}, | ||||
|     Database, Env, EnvOpenOptions, RoTxn, RwTxn, | ||||
| }; | ||||
| use log::{error, info}; | ||||
| use milli::Index; | ||||
| use rayon::ThreadPool; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::update_handler::UpdateHandler; | ||||
| use super::{UpdateMeta, UpdateResult}; | ||||
| use crate::option::IndexerOpts; | ||||
|  | ||||
| type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>; | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug, PartialEq)] | ||||
| pub struct IndexMeta { | ||||
|     update_store_size: u64, | ||||
|     index_store_size: u64, | ||||
|     pub uuid: Uuid, | ||||
|     pub created_at: DateTime<Utc>, | ||||
|     pub updated_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl IndexMeta { | ||||
|     fn open( | ||||
|         &self, | ||||
|         path: impl AsRef<Path>, | ||||
|         thread_pool: Arc<ThreadPool>, | ||||
|         indexer_options: &IndexerOpts, | ||||
|     ) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> { | ||||
|         let update_path = make_update_db_path(&path, &self.uuid); | ||||
|         let index_path = make_index_db_path(&path, &self.uuid); | ||||
|  | ||||
|         create_dir_all(&update_path)?; | ||||
|         create_dir_all(&index_path)?; | ||||
|  | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(self.index_store_size as usize); | ||||
|         let index = Arc::new(Index::new(options, index_path)?); | ||||
|  | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(self.update_store_size as usize); | ||||
|         let handler = UpdateHandler::new(indexer_options, index.clone(), thread_pool)?; | ||||
|         let update_store = UpdateStore::open(options, update_path, handler)?; | ||||
|  | ||||
|         Ok((index, update_store)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct IndexStore { | ||||
|     env: Env, | ||||
|     name_to_uuid: Database<Str, ByteSlice>, | ||||
|     uuid_to_index: DashMap<Uuid, (Arc<Index>, Arc<UpdateStore>)>, | ||||
|     uuid_to_index_meta: Database<ByteSlice, SerdeJson<IndexMeta>>, | ||||
|  | ||||
|     thread_pool: Arc<ThreadPool>, | ||||
|     indexer_options: IndexerOpts, | ||||
| } | ||||
|  | ||||
| impl IndexStore { | ||||
|     pub fn new(path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<Self> { | ||||
|         let env = EnvOpenOptions::new() | ||||
|             .map_size(4096 * 100) | ||||
|             .max_dbs(2) | ||||
|             .open(path)?; | ||||
|  | ||||
|         let uuid_to_index = DashMap::new(); | ||||
|         let name_to_uuid = open_or_create_database(&env, Some("name_to_uid"))?; | ||||
|         let uuid_to_index_meta = open_or_create_database(&env, Some("uid_to_index_db"))?; | ||||
|  | ||||
|         let thread_pool = rayon::ThreadPoolBuilder::new() | ||||
|             .num_threads(indexer_options.indexing_jobs.unwrap_or(0)) | ||||
|             .build()?; | ||||
|         let thread_pool = Arc::new(thread_pool); | ||||
|  | ||||
|         Ok(Self { | ||||
|             env, | ||||
|             name_to_uuid, | ||||
|             uuid_to_index, | ||||
|             uuid_to_index_meta, | ||||
|  | ||||
|             thread_pool, | ||||
|             indexer_options, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn delete(&self, index_uid: impl AsRef<str>) -> anyhow::Result<()> { | ||||
|         // we remove the references to the index from the index map so it is not accessible anymore | ||||
|         let mut txn = self.env.write_txn()?; | ||||
|         let uuid = self | ||||
|             .index_uuid(&txn, &index_uid)? | ||||
|             .with_context(|| format!("Index {:?} doesn't exist", index_uid.as_ref()))?; | ||||
|         self.name_to_uuid.delete(&mut txn, index_uid.as_ref())?; | ||||
|         self.uuid_to_index_meta.delete(&mut txn, uuid.as_bytes())?; | ||||
|         txn.commit()?; | ||||
|         // If the index was loaded (i.e it is present in the uuid_to_index map), then we need to | ||||
|         // close it. The process goes as follow: | ||||
|         // | ||||
|         // 1) We want to remove any pending updates from the store. | ||||
|         // 2) We try to get ownership on the update store so we can close it. It may take a | ||||
|         // couple of tries, but since the update store event loop only has a weak reference to | ||||
|         // itself, and we are the only other function holding a reference to it otherwise, we will | ||||
|         // get it eventually. | ||||
|         // 3) We request a closing of the update store. | ||||
|         // 4) We can take ownership on the index, and close it. | ||||
|         // 5) We remove all the files from the file system. | ||||
|         let index_uid = index_uid.as_ref().to_string(); | ||||
|         let path = self.env.path().to_owned(); | ||||
|         if let Some((_, (index, updates))) = self.uuid_to_index.remove(&uuid) { | ||||
|             std::thread::spawn(move || { | ||||
|                 info!("Preparing for {:?} deletion.", index_uid); | ||||
|                 // this error is non fatal, but may delay the deletion. | ||||
|                 if let Err(e) = updates.abort_pendings() { | ||||
|                     error!( | ||||
|                         "error aborting pending updates when deleting index {:?}: {}", | ||||
|                         index_uid, e | ||||
|                     ); | ||||
|                 } | ||||
|                 let updates = get_arc_ownership_blocking(updates); | ||||
|                 let close_event = updates.prepare_for_closing(); | ||||
|                 close_event.wait(); | ||||
|                 info!("closed update store for {:?}", index_uid); | ||||
|  | ||||
|                 let index = get_arc_ownership_blocking(index); | ||||
|                 let close_event = index.prepare_for_closing(); | ||||
|                 close_event.wait(); | ||||
|  | ||||
|                 let update_path = make_update_db_path(&path, &uuid); | ||||
|                 let index_path = make_index_db_path(&path, &uuid); | ||||
|  | ||||
|                 if let Err(e) = remove_dir_all(index_path) { | ||||
|                     error!("error removing index {:?}: {}", index_uid, e); | ||||
|                 } | ||||
|  | ||||
|                 if let Err(e) = remove_dir_all(update_path) { | ||||
|                     error!("error removing index {:?}: {}", index_uid, e); | ||||
|                 } | ||||
|  | ||||
|                 info!("index {:?} deleted.", index_uid); | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn index_uuid(&self, txn: &RoTxn, name: impl AsRef<str>) -> anyhow::Result<Option<Uuid>> { | ||||
|         match self.name_to_uuid.get(txn, name.as_ref())? { | ||||
|             Some(bytes) => { | ||||
|                 let uuid = Uuid::from_slice(bytes)?; | ||||
|                 Ok(Some(uuid)) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn retrieve_index( | ||||
|         &self, | ||||
|         txn: &RoTxn, | ||||
|         uid: Uuid, | ||||
|     ) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> { | ||||
|         match self.uuid_to_index.entry(uid.clone()) { | ||||
|             Entry::Vacant(entry) => match self.uuid_to_index_meta.get(txn, uid.as_bytes())? { | ||||
|                 Some(meta) => { | ||||
|                     let path = self.env.path(); | ||||
|                     let (index, updates) = | ||||
|                         meta.open(path, self.thread_pool.clone(), &self.indexer_options)?; | ||||
|                     entry.insert((index.clone(), updates.clone())); | ||||
|                     Ok(Some((index, updates))) | ||||
|                 } | ||||
|                 None => Ok(None), | ||||
|             }, | ||||
|             Entry::Occupied(entry) => { | ||||
|                 let (index, updates) = entry.get(); | ||||
|                 Ok(Some((index.clone(), updates.clone()))) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn get_index_txn( | ||||
|         &self, | ||||
|         txn: &RoTxn, | ||||
|         name: impl AsRef<str>, | ||||
|     ) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> { | ||||
|         match self.index_uuid(&txn, name)? { | ||||
|             Some(uid) => self.retrieve_index(&txn, uid), | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn index( | ||||
|         &self, | ||||
|         name: impl AsRef<str>, | ||||
|     ) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> { | ||||
|         let txn = self.env.read_txn()?; | ||||
|         self.get_index_txn(&txn, name) | ||||
|     } | ||||
|  | ||||
|     /// Use this function to perform an update on an index. | ||||
|     /// This function also puts a lock on what index is allowed to perform an update. | ||||
|     pub fn update_index<F, T>(&self, name: impl AsRef<str>, f: F) -> anyhow::Result<(T, IndexMeta)> | ||||
|     where | ||||
|         F: FnOnce(&Index) -> anyhow::Result<T>, | ||||
|     { | ||||
|         let mut txn = self.env.write_txn()?; | ||||
|         let (index, _) = self | ||||
|             .get_index_txn(&txn, &name)? | ||||
|             .with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?; | ||||
|         let result = f(index.as_ref()); | ||||
|         match result { | ||||
|             Ok(ret) => { | ||||
|                 let meta = self.update_meta(&mut txn, name, |meta| meta.updated_at = Utc::now())?; | ||||
|                 txn.commit()?; | ||||
|                 Ok((ret, meta)) | ||||
|             } | ||||
|             Err(e) => Err(e), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn index_with_meta( | ||||
|         &self, | ||||
|         name: impl AsRef<str>, | ||||
|     ) -> anyhow::Result<Option<(Arc<Index>, IndexMeta)>> { | ||||
|         let txn = self.env.read_txn()?; | ||||
|         let uuid = self.index_uuid(&txn, &name)?; | ||||
|         match uuid { | ||||
|             Some(uuid) => { | ||||
|                 let meta = self | ||||
|                     .uuid_to_index_meta | ||||
|                     .get(&txn, uuid.as_bytes())? | ||||
|                     .with_context(|| { | ||||
|                         format!("unable to retrieve metadata for index {:?}", name.as_ref()) | ||||
|                     })?; | ||||
|                 let (index, _) = self | ||||
|                     .retrieve_index(&txn, uuid)? | ||||
|                     .with_context(|| format!("unable to retrieve index {:?}", name.as_ref()))?; | ||||
|                 Ok(Some((index, meta))) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn update_meta<F>( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         name: impl AsRef<str>, | ||||
|         f: F, | ||||
|     ) -> anyhow::Result<IndexMeta> | ||||
|     where | ||||
|         F: FnOnce(&mut IndexMeta), | ||||
|     { | ||||
|         let uuid = self | ||||
|             .index_uuid(txn, &name)? | ||||
|             .with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?; | ||||
|         let mut meta = self | ||||
|             .uuid_to_index_meta | ||||
|             .get(txn, uuid.as_bytes())? | ||||
|             .with_context(|| format!("couldn't retrieve metadata for index {:?}", name.as_ref()))?; | ||||
|         f(&mut meta); | ||||
|         self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?; | ||||
|         Ok(meta) | ||||
|     } | ||||
|  | ||||
|     pub fn get_or_create_index( | ||||
|         &self, | ||||
|         name: impl AsRef<str>, | ||||
|         update_size: u64, | ||||
|         index_size: u64, | ||||
|     ) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> { | ||||
|         let mut txn = self.env.write_txn()?; | ||||
|         match self.get_index_txn(&txn, name.as_ref())? { | ||||
|             Some(res) => Ok(res), | ||||
|             None => { | ||||
|                 let uuid = Uuid::new_v4(); | ||||
|                 let (index, updates, _) = | ||||
|                     self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?; | ||||
|                 // If we fail to commit the transaction, we must delete the database from the | ||||
|                 // file-system. | ||||
|                 if let Err(e) = txn.commit() { | ||||
|                     self.clean_db(uuid); | ||||
|                     return Err(e)?; | ||||
|                 } | ||||
|                 Ok((index, updates)) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Remove all the files and data associated with a db uuid. | ||||
|     fn clean_db(&self, uuid: Uuid) { | ||||
|         let update_db_path = make_update_db_path(self.env.path(), &uuid); | ||||
|         let index_db_path = make_index_db_path(self.env.path(), &uuid); | ||||
|  | ||||
|         remove_dir_all(update_db_path).expect("Failed to clean database"); | ||||
|         remove_dir_all(index_db_path).expect("Failed to clean database"); | ||||
|  | ||||
|         self.uuid_to_index.remove(&uuid); | ||||
|     } | ||||
|  | ||||
|     fn create_index_txn( | ||||
|         &self, | ||||
|         txn: &mut RwTxn, | ||||
|         uuid: Uuid, | ||||
|         name: impl AsRef<str>, | ||||
|         update_store_size: u64, | ||||
|         index_store_size: u64, | ||||
|     ) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> { | ||||
|         let created_at = Utc::now(); | ||||
|         let updated_at = created_at; | ||||
|         let meta = IndexMeta { | ||||
|             update_store_size, | ||||
|             index_store_size, | ||||
|             uuid: uuid.clone(), | ||||
|             created_at, | ||||
|             updated_at, | ||||
|         }; | ||||
|  | ||||
|         self.name_to_uuid.put(txn, name.as_ref(), uuid.as_bytes())?; | ||||
|         self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?; | ||||
|  | ||||
|         let path = self.env.path(); | ||||
|         let (index, update_store) = | ||||
|             match meta.open(path, self.thread_pool.clone(), &self.indexer_options) { | ||||
|                 Ok(res) => res, | ||||
|                 Err(e) => { | ||||
|                     self.clean_db(uuid); | ||||
|                     return Err(e); | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|         self.uuid_to_index | ||||
|             .insert(uuid, (index.clone(), update_store.clone())); | ||||
|  | ||||
|         Ok((index, update_store, meta)) | ||||
|     } | ||||
|  | ||||
|     /// Same as `get_or_create`, but returns an error if the index already exists. | ||||
|     pub fn create_index( | ||||
|         &self, | ||||
|         name: impl AsRef<str>, | ||||
|         update_size: u64, | ||||
|         index_size: u64, | ||||
|     ) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> { | ||||
|         let uuid = Uuid::new_v4(); | ||||
|         let mut txn = self.env.write_txn()?; | ||||
|  | ||||
|         if self.name_to_uuid.get(&txn, name.as_ref())?.is_some() { | ||||
|             bail!("index {:?} already exists", name.as_ref()) | ||||
|         } | ||||
|  | ||||
|         let result = self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?; | ||||
|         // If we fail to commit the transaction, we must delete the database from the | ||||
|         // file-system. | ||||
|         if let Err(e) = txn.commit() { | ||||
|             self.clean_db(uuid); | ||||
|             return Err(e)?; | ||||
|         } | ||||
|         Ok(result) | ||||
|     } | ||||
|  | ||||
|     /// Returns each index associated with its metadata: | ||||
|     /// (index_name, IndexMeta, primary_key) | ||||
|     /// This method will force all the indexes to be loaded. | ||||
|     pub fn list_indexes(&self) -> anyhow::Result<Vec<(String, IndexMeta, Option<String>)>> { | ||||
|         let txn = self.env.read_txn()?; | ||||
|         let metas = self.name_to_uuid.iter(&txn)?.filter_map(|entry| { | ||||
|             entry | ||||
|                 .map_err(|e| { | ||||
|                     error!("error decoding entry while listing indexes: {}", e); | ||||
|                     e | ||||
|                 }) | ||||
|                 .ok() | ||||
|         }); | ||||
|         let mut indexes = Vec::new(); | ||||
|         for (name, uuid) in metas { | ||||
|             // get index to retrieve primary key | ||||
|             let (index, _) = self | ||||
|                 .get_index_txn(&txn, name)? | ||||
|                 .with_context(|| format!("could not load index {:?}", name))?; | ||||
|             let primary_key = index.primary_key(&index.read_txn()?)?.map(String::from); | ||||
|             // retieve meta | ||||
|             let meta = self | ||||
|                 .uuid_to_index_meta | ||||
|                 .get(&txn, &uuid)? | ||||
|                 .with_context(|| format!("could not retieve meta for index {:?}", name))?; | ||||
|             indexes.push((name.to_owned(), meta, primary_key)); | ||||
|         } | ||||
|         Ok(indexes) | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Loops on an arc to get ownership on the wrapped value. This method sleeps 100ms before retrying. | ||||
| fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T { | ||||
|     loop { | ||||
|         match Arc::try_unwrap(item) { | ||||
|             Ok(item) => return item, | ||||
|             Err(item_arc) => { | ||||
|                 item = item_arc; | ||||
|                 std::thread::sleep(Duration::from_millis(100)); | ||||
|                 continue; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn open_or_create_database<K: 'static, V: 'static>( | ||||
|     env: &Env, | ||||
|     name: Option<&str>, | ||||
| ) -> anyhow::Result<Database<K, V>> { | ||||
|     match env.open_database::<K, V>(name)? { | ||||
|         Some(db) => Ok(db), | ||||
|         None => Ok(env.create_database::<K, V>(name)?), | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn make_update_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf { | ||||
|     let mut path = path.as_ref().to_path_buf(); | ||||
|     path.push(format!("update{}", uuid)); | ||||
|     path | ||||
| } | ||||
|  | ||||
| fn make_index_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf { | ||||
|     let mut path = path.as_ref().to_path_buf(); | ||||
|     path.push(format!("index{}", uuid)); | ||||
|     path | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|     use std::path::PathBuf; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_make_update_db_path() { | ||||
|         let uuid = Uuid::new_v4(); | ||||
|         assert_eq!( | ||||
|             make_update_db_path("/home", &uuid), | ||||
|             PathBuf::from(format!("/home/update{}", uuid)) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_make_index_db_path() { | ||||
|         let uuid = Uuid::new_v4(); | ||||
|         assert_eq!( | ||||
|             make_index_db_path("/home", &uuid), | ||||
|             PathBuf::from(format!("/home/index{}", uuid)) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     mod index_store { | ||||
|         use super::*; | ||||
|  | ||||
|         #[test] | ||||
|         fn test_index_uuid() { | ||||
|             let temp = tempfile::tempdir().unwrap(); | ||||
|             let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); | ||||
|  | ||||
|             let name = "foobar"; | ||||
|             let txn = store.env.read_txn().unwrap(); | ||||
|             // name is not found if the uuid in not present in the db | ||||
|             assert!(store.index_uuid(&txn, &name).unwrap().is_none()); | ||||
|             drop(txn); | ||||
|  | ||||
|             // insert an uuid in the the name_to_uuid_db: | ||||
|             let uuid = Uuid::new_v4(); | ||||
|             let mut txn = store.env.write_txn().unwrap(); | ||||
|             store | ||||
|                 .name_to_uuid | ||||
|                 .put(&mut txn, &name, uuid.as_bytes()) | ||||
|                 .unwrap(); | ||||
|             txn.commit().unwrap(); | ||||
|  | ||||
|             // check that the uuid is there | ||||
|             let txn = store.env.read_txn().unwrap(); | ||||
|             assert_eq!(store.index_uuid(&txn, &name).unwrap(), Some(uuid)); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn test_retrieve_index() { | ||||
|             let temp = tempfile::tempdir().unwrap(); | ||||
|             let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); | ||||
|             let uuid = Uuid::new_v4(); | ||||
|  | ||||
|             let txn = store.env.read_txn().unwrap(); | ||||
|             assert!(store.retrieve_index(&txn, uuid).unwrap().is_none()); | ||||
|  | ||||
|             let created_at = Utc::now(); | ||||
|             let updated_at = created_at; | ||||
|  | ||||
|             let meta = IndexMeta { | ||||
|                 update_store_size: 4096 * 100, | ||||
|                 index_store_size: 4096 * 100, | ||||
|                 uuid: uuid.clone(), | ||||
|                 created_at, | ||||
|                 updated_at, | ||||
|             }; | ||||
|             let mut txn = store.env.write_txn().unwrap(); | ||||
|             store | ||||
|                 .uuid_to_index_meta | ||||
|                 .put(&mut txn, uuid.as_bytes(), &meta) | ||||
|                 .unwrap(); | ||||
|             txn.commit().unwrap(); | ||||
|  | ||||
|             // the index cache should be empty | ||||
|             assert!(store.uuid_to_index.is_empty()); | ||||
|  | ||||
|             let txn = store.env.read_txn().unwrap(); | ||||
|             assert!(store.retrieve_index(&txn, uuid).unwrap().is_some()); | ||||
|             assert_eq!(store.uuid_to_index.len(), 1); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn test_index() { | ||||
|             let temp = tempfile::tempdir().unwrap(); | ||||
|             let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); | ||||
|             let name = "foobar"; | ||||
|  | ||||
|             assert!(store.index(&name).unwrap().is_none()); | ||||
|  | ||||
|             let created_at = Utc::now(); | ||||
|             let updated_at = created_at; | ||||
|  | ||||
|             let uuid = Uuid::new_v4(); | ||||
|             let meta = IndexMeta { | ||||
|                 update_store_size: 4096 * 100, | ||||
|                 index_store_size: 4096 * 100, | ||||
|                 uuid: uuid.clone(), | ||||
|                 created_at, | ||||
|                 updated_at, | ||||
|             }; | ||||
|             let mut txn = store.env.write_txn().unwrap(); | ||||
|             store | ||||
|                 .name_to_uuid | ||||
|                 .put(&mut txn, &name, uuid.as_bytes()) | ||||
|                 .unwrap(); | ||||
|             store | ||||
|                 .uuid_to_index_meta | ||||
|                 .put(&mut txn, uuid.as_bytes(), &meta) | ||||
|                 .unwrap(); | ||||
|             txn.commit().unwrap(); | ||||
|  | ||||
|             assert!(store.index(&name).unwrap().is_some()); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn test_get_or_create_index() { | ||||
|             let temp = tempfile::tempdir().unwrap(); | ||||
|             let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); | ||||
|             let name = "foobar"; | ||||
|  | ||||
|             let update_store_size = 4096 * 100; | ||||
|             let index_store_size = 4096 * 100; | ||||
|             store | ||||
|                 .get_or_create_index(&name, update_store_size, index_store_size) | ||||
|                 .unwrap(); | ||||
|             let txn = store.env.read_txn().unwrap(); | ||||
|             let uuid = store.name_to_uuid.get(&txn, &name).unwrap(); | ||||
|             assert_eq!(store.uuid_to_index.len(), 1); | ||||
|             assert!(uuid.is_some()); | ||||
|             let uuid = Uuid::from_slice(uuid.unwrap()).unwrap(); | ||||
|             let meta = store | ||||
|                 .uuid_to_index_meta | ||||
|                 .get(&txn, uuid.as_bytes()) | ||||
|                 .unwrap() | ||||
|                 .unwrap(); | ||||
|             assert_eq!(meta.update_store_size, update_store_size); | ||||
|             assert_eq!(meta.index_store_size, index_store_size); | ||||
|             assert_eq!(meta.uuid, uuid); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn test_create_index() { | ||||
|             let temp = tempfile::tempdir().unwrap(); | ||||
|             let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); | ||||
|             let name = "foobar"; | ||||
|  | ||||
|             let update_store_size = 4096 * 100; | ||||
|             let index_store_size = 4096 * 100; | ||||
|             let uuid = Uuid::new_v4(); | ||||
|             let mut txn = store.env.write_txn().unwrap(); | ||||
|             store | ||||
|                 .create_index_txn(&mut txn, uuid, name, update_store_size, index_store_size) | ||||
|                 .unwrap(); | ||||
|             let uuid = store.name_to_uuid.get(&txn, &name).unwrap(); | ||||
|             assert_eq!(store.uuid_to_index.len(), 1); | ||||
|             assert!(uuid.is_some()); | ||||
|             let uuid = Uuid::from_slice(uuid.unwrap()).unwrap(); | ||||
|             let meta = store | ||||
|                 .uuid_to_index_meta | ||||
|                 .get(&txn, uuid.as_bytes()) | ||||
|                 .unwrap() | ||||
|                 .unwrap(); | ||||
|             assert_eq!(meta.update_store_size, update_store_size); | ||||
|             assert_eq!(meta.index_store_size, index_store_size); | ||||
|             assert_eq!(meta.uuid, uuid); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,228 @@ | ||||
| mod update_store; | ||||
| mod index_store; | ||||
| mod update_handler; | ||||
|  | ||||
| use std::path::Path; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use anyhow::{bail, Context}; | ||||
| use itertools::Itertools; | ||||
| use milli::Index; | ||||
|  | ||||
| use crate::option::IndexerOpts; | ||||
| use index_store::IndexStore; | ||||
| use super::IndexController; | ||||
| use super::updates::UpdateStatus; | ||||
| use super::{UpdateMeta, UpdateResult, IndexMetadata, IndexSettings}; | ||||
|  | ||||
| pub struct LocalIndexController { | ||||
|     indexes: IndexStore, | ||||
|     update_db_size: u64, | ||||
|     index_db_size: u64, | ||||
| } | ||||
|  | ||||
| impl LocalIndexController { | ||||
|     pub fn new( | ||||
|         path: impl AsRef<Path>, | ||||
|         opt: IndexerOpts, | ||||
|         index_db_size: u64, | ||||
|         update_db_size: u64, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let indexes = IndexStore::new(path, opt)?; | ||||
|         Ok(Self { indexes, index_db_size, update_db_size }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl IndexController for LocalIndexController { | ||||
|     fn add_documents<S: AsRef<str>>( | ||||
|         &self, | ||||
|         index: S, | ||||
|         method: milli::update::IndexDocumentsMethod, | ||||
|         format: milli::update::UpdateFormat, | ||||
|         data: &[u8], | ||||
|         primary_key: Option<String>, | ||||
|     ) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> { | ||||
|         let (_, update_store) = self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?; | ||||
|         let meta = UpdateMeta::DocumentsAddition { method, format, primary_key }; | ||||
|         let pending = update_store.register_update(meta, data)?; | ||||
|         Ok(pending.into()) | ||||
|     } | ||||
|  | ||||
|     fn update_settings<S: AsRef<str>>( | ||||
|         &self, | ||||
|         index: S, | ||||
|         settings: super::Settings | ||||
|     ) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> { | ||||
|         let (_, update_store) = self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?; | ||||
|         let meta = UpdateMeta::Settings(settings); | ||||
|         let pending = update_store.register_update(meta, &[])?; | ||||
|         Ok(pending.into()) | ||||
|     } | ||||
|  | ||||
|     fn create_index(&self, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> { | ||||
|         let index_name = index_settings.name.context("Missing name for index")?; | ||||
|         let (index, _, meta) = self.indexes.create_index(&index_name, self.update_db_size, self.index_db_size)?; | ||||
|         if let Some(ref primary_key) = index_settings.primary_key { | ||||
|             if let Err(e) = update_primary_key(index, primary_key).context("error creating index") { | ||||
|                 // TODO: creating index could not be completed, delete everything. | ||||
|                 Err(e)? | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let meta = IndexMetadata { | ||||
|             uid: index_name, | ||||
|             uuid: meta.uuid.clone(), | ||||
|             created_at: meta.created_at, | ||||
|             updated_at: meta.created_at, | ||||
|             primary_key: index_settings.primary_key, | ||||
|         }; | ||||
|  | ||||
|         Ok(meta) | ||||
|     } | ||||
|  | ||||
|     fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<()> { | ||||
|         self.indexes.delete(index_uid) | ||||
|     } | ||||
|  | ||||
|     fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, _index1_uid: S1, _index2_uid: S2) -> anyhow::Result<()> { | ||||
|         todo!() | ||||
|     } | ||||
|  | ||||
|     fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<Arc<Index>>> { | ||||
|         let index = self.indexes.index(name)?.map(|(i, _)| i); | ||||
|         Ok(index) | ||||
|     } | ||||
|  | ||||
|     fn update_status(&self, index: impl AsRef<str>, id: u64) -> anyhow::Result<Option<UpdateStatus<UpdateMeta, UpdateResult, String>>> { | ||||
|         match self.indexes.index(&index)? { | ||||
|             Some((_, update_store)) => Ok(update_store.meta(id)?), | ||||
|             None => bail!("index {:?} doesn't exist", index.as_ref()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn all_update_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus<UpdateMeta, UpdateResult, String>>> { | ||||
|         match self.indexes.index(&index)? { | ||||
|             Some((_, update_store)) => { | ||||
|                 let updates = update_store.iter_metas(|processing, processed, pending, aborted, failed| { | ||||
|                     Ok(processing | ||||
|                         .map(UpdateStatus::from) | ||||
|                         .into_iter() | ||||
|                         .chain(pending.filter_map(|p| p.ok()).map(|(_, u)| UpdateStatus::from(u))) | ||||
|                         .chain(aborted.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u))) | ||||
|                         .chain(processed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u))) | ||||
|                         .chain(failed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u))) | ||||
|                         .sorted_by(|a, b| a.id().cmp(&b.id())) | ||||
|                         .collect()) | ||||
|                 })?; | ||||
|                 Ok(updates) | ||||
|             } | ||||
|             None => bail!("index {} doesn't exist.", index.as_ref()), | ||||
|         } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> { | ||||
|         let metas = self.indexes.list_indexes()?; | ||||
|         let mut output_meta = Vec::new(); | ||||
|         for (uid, meta, primary_key) in metas { | ||||
|             let created_at = meta.created_at; | ||||
|             let uuid = meta.uuid; | ||||
|             let updated_at = self | ||||
|                 .all_update_status(&uid)? | ||||
|                 .iter() | ||||
|                 .filter_map(|u| u.processed().map(|u| u.processed_at)) | ||||
|                 .max() | ||||
|                 .unwrap_or(created_at); | ||||
|  | ||||
|             let index_meta = IndexMetadata { | ||||
|                 uid, | ||||
|                 created_at, | ||||
|                 updated_at, | ||||
|                 uuid, | ||||
|                 primary_key, | ||||
|             }; | ||||
|             output_meta.push(index_meta); | ||||
|         } | ||||
|         Ok(output_meta) | ||||
|     } | ||||
|  | ||||
|     fn update_index(&self, uid: impl AsRef<str>, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> { | ||||
|         if index_settings.name.is_some() { | ||||
|             bail!("can't udpate an index name.") | ||||
|         } | ||||
|  | ||||
|         let (primary_key, meta) = match index_settings.primary_key { | ||||
|             Some(ref primary_key) => { | ||||
|                 self.indexes | ||||
|                     .update_index(&uid, |index| { | ||||
|                         let mut txn = index.write_txn()?; | ||||
|                         if index.primary_key(&txn)?.is_some() { | ||||
|                             bail!("primary key already exists.") | ||||
|                         } | ||||
|                         index.put_primary_key(&mut txn, primary_key)?; | ||||
|                         txn.commit()?; | ||||
|                         Ok(Some(primary_key.clone())) | ||||
|                     })? | ||||
|             }, | ||||
|             None => { | ||||
|                 let (index, meta) = self.indexes | ||||
|                     .index_with_meta(&uid)? | ||||
|                     .with_context(|| format!("index {:?} doesn't exist.", uid.as_ref()))?; | ||||
|                 let primary_key = index | ||||
|                     .primary_key(&index.read_txn()?)? | ||||
|                     .map(String::from); | ||||
|                 (primary_key, meta) | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
|         Ok(IndexMetadata { | ||||
|             uid: uid.as_ref().to_string(), | ||||
|             uuid: meta.uuid.clone(), | ||||
|             created_at: meta.created_at, | ||||
|             updated_at: meta.updated_at, | ||||
|             primary_key, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn clear_documents(&self, index: impl AsRef<str>) -> anyhow::Result<super::UpdateStatus> { | ||||
|         let (_, update_store) = self.indexes.index(&index)? | ||||
|             .with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?; | ||||
|         let meta = UpdateMeta::ClearDocuments; | ||||
|         let pending = update_store.register_update(meta, &[])?; | ||||
|         Ok(pending.into()) | ||||
|     } | ||||
|  | ||||
|     fn delete_documents(&self, index: impl AsRef<str>, document_ids: Vec<String>) -> anyhow::Result<super::UpdateStatus> { | ||||
|         let (_, update_store) = self.indexes.index(&index)? | ||||
|             .with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?; | ||||
|         let meta = UpdateMeta::DeleteDocuments; | ||||
|         let content = serde_json::to_vec(&document_ids)?; | ||||
|         let pending = update_store.register_update(meta, &content)?; | ||||
|         Ok(pending.into()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn update_primary_key(index: impl AsRef<Index>, primary_key: impl AsRef<str>) -> anyhow::Result<()> { | ||||
|     let index = index.as_ref(); | ||||
|     let mut txn = index.write_txn()?; | ||||
|     if index.primary_key(&txn)?.is_some() { | ||||
|         bail!("primary key already set.") | ||||
|     } | ||||
|     index.put_primary_key(&mut txn, primary_key.as_ref())?; | ||||
|     txn.commit()?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|     use tempfile::tempdir; | ||||
|     use crate::make_index_controller_tests; | ||||
|  | ||||
|     make_index_controller_tests!({ | ||||
|         let options = IndexerOpts::default(); | ||||
|         let path = tempdir().unwrap(); | ||||
|         let size = 4096 * 100; | ||||
|         LocalIndexController::new(path, options, size, size).unwrap() | ||||
|     }); | ||||
| } | ||||
| @@ -0,0 +1,255 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::io; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use anyhow::Result; | ||||
| use flate2::read::GzDecoder; | ||||
| use grenad::CompressionType; | ||||
| use log::info; | ||||
| use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; | ||||
| use milli::Index; | ||||
| use rayon::ThreadPool; | ||||
|  | ||||
| use super::update_store::HandleUpdate; | ||||
| use crate::index_controller::updates::{Failed, Processed, Processing}; | ||||
| use crate::index_controller::{Facets, Settings, UpdateMeta, UpdateResult}; | ||||
| use crate::option::IndexerOpts; | ||||
|  | ||||
| pub struct UpdateHandler { | ||||
|     index: Arc<Index>, | ||||
|     max_nb_chunks: Option<usize>, | ||||
|     chunk_compression_level: Option<u32>, | ||||
|     thread_pool: Arc<ThreadPool>, | ||||
|     log_frequency: usize, | ||||
|     max_memory: usize, | ||||
|     linked_hash_map_size: usize, | ||||
|     chunk_compression_type: CompressionType, | ||||
|     chunk_fusing_shrink_size: u64, | ||||
| } | ||||
|  | ||||
| impl UpdateHandler { | ||||
|     pub fn new( | ||||
|         opt: &IndexerOpts, | ||||
|         index: Arc<Index>, | ||||
|         thread_pool: Arc<ThreadPool>, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         Ok(Self { | ||||
|             index, | ||||
|             max_nb_chunks: opt.max_nb_chunks, | ||||
|             chunk_compression_level: opt.chunk_compression_level, | ||||
|             thread_pool, | ||||
|             log_frequency: opt.log_every_n, | ||||
|             max_memory: opt.max_memory.get_bytes() as usize, | ||||
|             linked_hash_map_size: opt.linked_hash_map_size, | ||||
|             chunk_compression_type: opt.chunk_compression_type, | ||||
|             chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn update_buidler(&self, update_id: u64) -> UpdateBuilder { | ||||
|         // We prepare the update by using the update builder. | ||||
|         let mut update_builder = UpdateBuilder::new(update_id); | ||||
|         if let Some(max_nb_chunks) = self.max_nb_chunks { | ||||
|             update_builder.max_nb_chunks(max_nb_chunks); | ||||
|         } | ||||
|         if let Some(chunk_compression_level) = self.chunk_compression_level { | ||||
|             update_builder.chunk_compression_level(chunk_compression_level); | ||||
|         } | ||||
|         update_builder.thread_pool(&self.thread_pool); | ||||
|         update_builder.log_every_n(self.log_frequency); | ||||
|         update_builder.max_memory(self.max_memory); | ||||
|         update_builder.linked_hash_map_size(self.linked_hash_map_size); | ||||
|         update_builder.chunk_compression_type(self.chunk_compression_type); | ||||
|         update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size); | ||||
|         update_builder | ||||
|     } | ||||
|  | ||||
|     fn update_documents( | ||||
|         &self, | ||||
|         format: UpdateFormat, | ||||
|         method: IndexDocumentsMethod, | ||||
|         content: &[u8], | ||||
|         update_builder: UpdateBuilder, | ||||
|         primary_key: Option<&str>, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = self.index.write_txn()?; | ||||
|  | ||||
|         // Set the primary key if not set already, ignore if already set. | ||||
|         match (self.index.primary_key(&wtxn)?, primary_key) { | ||||
|             (None, Some(ref primary_key)) => { | ||||
|                 self.index.put_primary_key(&mut wtxn, primary_key)?; | ||||
|             } | ||||
|             _ => (), | ||||
|         } | ||||
|  | ||||
|         let mut builder = update_builder.index_documents(&mut wtxn, &self.index); | ||||
|         builder.update_format(format); | ||||
|         builder.index_documents_method(method); | ||||
|  | ||||
|         let gzipped = true; | ||||
|         let reader = if gzipped && !content.is_empty() { | ||||
|             Box::new(GzDecoder::new(content)) | ||||
|         } else { | ||||
|             Box::new(content) as Box<dyn io::Read> | ||||
|         }; | ||||
|  | ||||
|         let result = builder.execute(reader, |indexing_step, update_id| { | ||||
|             info!("update {}: {:?}", update_id, indexing_step) | ||||
|         }); | ||||
|  | ||||
|         match result { | ||||
|             Ok(addition_result) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::DocumentsAddition(addition_result))) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = self.index.write_txn()?; | ||||
|         let builder = update_builder.clear_documents(&mut wtxn, &self.index); | ||||
|  | ||||
|         match builder.execute() { | ||||
|             Ok(_count) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::Other)) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn update_settings( | ||||
|         &self, | ||||
|         settings: &Settings, | ||||
|         update_builder: UpdateBuilder, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = self.index.write_txn()?; | ||||
|         let mut builder = update_builder.settings(&mut wtxn, &self.index); | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref names) = settings.searchable_attributes { | ||||
|             match names { | ||||
|                 Some(names) => builder.set_searchable_fields(names.clone()), | ||||
|                 None => builder.reset_searchable_fields(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref names) = settings.displayed_attributes { | ||||
|             match names { | ||||
|                 Some(names) => builder.set_displayed_fields(names.clone()), | ||||
|                 None => builder.reset_displayed_fields(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref facet_types) = settings.faceted_attributes { | ||||
|             let facet_types = facet_types.clone().unwrap_or_else(|| HashMap::new()); | ||||
|             builder.set_faceted_fields(facet_types); | ||||
|         } | ||||
|  | ||||
|         // We transpose the settings JSON struct into a real setting update. | ||||
|         if let Some(ref criteria) = settings.criteria { | ||||
|             match criteria { | ||||
|                 Some(criteria) => builder.set_criteria(criteria.clone()), | ||||
|                 None => builder.reset_criteria(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let result = builder | ||||
|             .execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step)); | ||||
|  | ||||
|         match result { | ||||
|             Ok(()) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::Other)) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn update_facets( | ||||
|         &self, | ||||
|         levels: &Facets, | ||||
|         update_builder: UpdateBuilder, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         // We must use the write transaction of the update here. | ||||
|         let mut wtxn = self.index.write_txn()?; | ||||
|         let mut builder = update_builder.facets(&mut wtxn, &self.index); | ||||
|         if let Some(value) = levels.level_group_size { | ||||
|             builder.level_group_size(value); | ||||
|         } | ||||
|         if let Some(value) = levels.min_level_size { | ||||
|             builder.min_level_size(value); | ||||
|         } | ||||
|         match builder.execute() { | ||||
|             Ok(()) => wtxn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::Other)) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn delete_documents( | ||||
|         &self, | ||||
|         document_ids: &[u8], | ||||
|         update_builder: UpdateBuilder, | ||||
|     ) -> anyhow::Result<UpdateResult> { | ||||
|         let ids: Vec<String> = serde_json::from_slice(document_ids)?; | ||||
|         let mut txn = self.index.write_txn()?; | ||||
|         let mut builder = update_builder.delete_documents(&mut txn, &self.index)?; | ||||
|  | ||||
|         // We ignore unexisting document ids | ||||
|         ids.iter().for_each(|id| { builder.delete_external_id(id); }); | ||||
|  | ||||
|         match builder.execute() { | ||||
|             Ok(deleted) => txn | ||||
|                 .commit() | ||||
|                 .and(Ok(UpdateResult::DocumentDeletion { deleted })) | ||||
|                 .map_err(Into::into), | ||||
|             Err(e) => Err(e.into()) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl HandleUpdate<UpdateMeta, UpdateResult, String> for UpdateHandler { | ||||
|     fn handle_update( | ||||
|         &mut self, | ||||
|         meta: Processing<UpdateMeta>, | ||||
|         content: &[u8], | ||||
|     ) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> { | ||||
|         use UpdateMeta::*; | ||||
|  | ||||
|         let update_id = meta.id(); | ||||
|  | ||||
|         let update_builder = self.update_buidler(update_id); | ||||
|  | ||||
|         let result = match meta.meta() { | ||||
|             DocumentsAddition { | ||||
|                 method, | ||||
|                 format, | ||||
|                 primary_key, | ||||
|             } => self.update_documents( | ||||
|                 *format, | ||||
|                 *method, | ||||
|                 content, | ||||
|                 update_builder, | ||||
|                 primary_key.as_deref(), | ||||
|             ), | ||||
|             ClearDocuments => self.clear_documents(update_builder), | ||||
|             DeleteDocuments => self.delete_documents(content, update_builder), | ||||
|             Settings(settings) => self.update_settings(settings, update_builder), | ||||
|             Facets(levels) => self.update_facets(levels, update_builder), | ||||
|         }; | ||||
|  | ||||
|         match result { | ||||
|             Ok(result) => Ok(meta.process(result)), | ||||
|             Err(e) => Err(meta.fail(e.to_string())), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,407 @@ | ||||
| use std::path::Path; | ||||
| use std::sync::{Arc, RwLock}; | ||||
|  | ||||
| use crossbeam_channel::Sender; | ||||
| use heed::types::{OwnedType, DecodeIgnore, SerdeJson, ByteSlice}; | ||||
| use heed::{EnvOpenOptions, Env, Database}; | ||||
| use serde::{Serialize, Deserialize}; | ||||
|  | ||||
| use crate::index_controller::updates::*; | ||||
|  | ||||
| type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct UpdateStore<M, N, E> { | ||||
|     env: Env, | ||||
|     pending_meta: Database<OwnedType<BEU64>, SerdeJson<Pending<M>>>, | ||||
|     pending: Database<OwnedType<BEU64>, ByteSlice>, | ||||
|     processed_meta: Database<OwnedType<BEU64>, SerdeJson<Processed<M, N>>>, | ||||
|     failed_meta: Database<OwnedType<BEU64>, SerdeJson<Failed<M, E>>>, | ||||
|     aborted_meta: Database<OwnedType<BEU64>, SerdeJson<Aborted<M>>>, | ||||
|     processing: Arc<RwLock<Option<Processing<M>>>>, | ||||
|     notification_sender: Sender<()>, | ||||
| } | ||||
|  | ||||
| pub trait HandleUpdate<M, N, E> { | ||||
|     fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>>; | ||||
| } | ||||
|  | ||||
| impl<M, N, E> UpdateStore<M, N, E> | ||||
| where | ||||
|     M: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync + Clone, | ||||
|     N: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync, | ||||
|     E: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync, | ||||
| { | ||||
|     pub fn open<P, U>( | ||||
|         mut options: EnvOpenOptions, | ||||
|         path: P, | ||||
|         mut update_handler: U, | ||||
|     ) -> heed::Result<Arc<Self>> | ||||
|     where | ||||
|         P: AsRef<Path>, | ||||
|         U: HandleUpdate<M, N, E> + Send + 'static, | ||||
|     { | ||||
|         options.max_dbs(5); | ||||
|  | ||||
|         let env = options.open(path)?; | ||||
|         let pending_meta = env.create_database(Some("pending-meta"))?; | ||||
|         let pending = env.create_database(Some("pending"))?; | ||||
|         let processed_meta = env.create_database(Some("processed-meta"))?; | ||||
|         let aborted_meta = env.create_database(Some("aborted-meta"))?; | ||||
|         let failed_meta = env.create_database(Some("failed-meta"))?; | ||||
|         let processing = Arc::new(RwLock::new(None)); | ||||
|  | ||||
|         let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1); | ||||
|         // Send a first notification to trigger the process. | ||||
|         let _ = notification_sender.send(()); | ||||
|  | ||||
|         let update_store = Arc::new(UpdateStore { | ||||
|             env, | ||||
|             pending, | ||||
|             pending_meta, | ||||
|             processed_meta, | ||||
|             aborted_meta, | ||||
|             notification_sender, | ||||
|             failed_meta, | ||||
|             processing, | ||||
|         }); | ||||
|  | ||||
|         // We need a weak reference so we can take ownership on the arc later when we | ||||
|         // want to close the index. | ||||
|         let update_store_weak = Arc::downgrade(&update_store); | ||||
|         std::thread::spawn(move || { | ||||
|             // Block and wait for something to process. | ||||
|             'outer: for _ in notification_receiver { | ||||
|                 loop { | ||||
|                     match update_store_weak.upgrade() { | ||||
|                         Some(update_store) => { | ||||
|                             match update_store.process_pending_update(&mut update_handler) { | ||||
|                                 Ok(Some(_)) => (), | ||||
|                                 Ok(None) => break, | ||||
|                                 Err(e) => eprintln!("error while processing update: {}", e), | ||||
|                             } | ||||
|                         } | ||||
|                         // the ownership on the arc has been taken, we need to exit. | ||||
|                         None => break 'outer, | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         }); | ||||
|  | ||||
|         Ok(update_store) | ||||
|     } | ||||
|  | ||||
|     pub fn prepare_for_closing(self) -> heed::EnvClosingEvent { | ||||
|         self.env.prepare_for_closing() | ||||
|     } | ||||
|  | ||||
|     /// Returns the new biggest id to use to store the new update. | ||||
|     fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> { | ||||
|         let last_pending = self.pending_meta | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .last(txn)? | ||||
|             .map(|(k, _)| k.get()); | ||||
|  | ||||
|         let last_processed = self.processed_meta | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .last(txn)? | ||||
|             .map(|(k, _)| k.get()); | ||||
|  | ||||
|         let last_aborted = self.aborted_meta | ||||
|             .remap_data_type::<DecodeIgnore>() | ||||
|             .last(txn)? | ||||
|             .map(|(k, _)| k.get()); | ||||
|  | ||||
|         let last_update_id = [last_pending, last_processed, last_aborted] | ||||
|             .iter() | ||||
|             .copied() | ||||
|             .flatten() | ||||
|             .max(); | ||||
|  | ||||
|         match last_update_id { | ||||
|             Some(last_id) => Ok(last_id + 1), | ||||
|             None => Ok(0), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Registers the update content in the pending store and the meta | ||||
|     /// into the pending-meta store. Returns the new unique update id. | ||||
|     pub fn register_update( | ||||
|         &self, | ||||
|         meta: M, | ||||
|         content: &[u8] | ||||
|     ) -> heed::Result<Pending<M>> { | ||||
|         let mut wtxn = self.env.write_txn()?; | ||||
|  | ||||
|         // We ask the update store to give us a new update id, this is safe, | ||||
|         // no other update can have the same id because we use a write txn before | ||||
|         // asking for the id and registering it so other update registering | ||||
|         // will be forced to wait for a new write txn. | ||||
|         let update_id = self.new_update_id(&wtxn)?; | ||||
|         let update_key = BEU64::new(update_id); | ||||
|  | ||||
|         let meta = Pending::new(meta, update_id); | ||||
|         self.pending_meta.put(&mut wtxn, &update_key, &meta)?; | ||||
|         self.pending.put(&mut wtxn, &update_key, content)?; | ||||
|  | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         if let Err(e) = self.notification_sender.try_send(()) { | ||||
|             assert!(!e.is_disconnected(), "update notification channel is disconnected"); | ||||
|         } | ||||
|         Ok(meta) | ||||
|     } | ||||
|     /// Executes the user provided function on the next pending update (the one with the lowest id). | ||||
|     /// This is asynchronous as it let the user process the update with a read-only txn and | ||||
|     /// only writing the result meta to the processed-meta store *after* it has been processed. | ||||
|     fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<()>> | ||||
|     where | ||||
|         U: HandleUpdate<M, N, E> + Send + 'static, | ||||
|     { | ||||
|         // Create a read transaction to be able to retrieve the pending update in order. | ||||
|         let rtxn = self.env.read_txn()?; | ||||
|         let first_meta = self.pending_meta.first(&rtxn)?; | ||||
|  | ||||
|         // If there is a pending update we process and only keep | ||||
|         // a reader while processing it, not a writer. | ||||
|         match first_meta { | ||||
|             Some((first_id, pending)) => { | ||||
|                 let first_content = self.pending | ||||
|                     .get(&rtxn, &first_id)? | ||||
|                     .expect("associated update content"); | ||||
|  | ||||
|                 // we change the state of the update from pending to processing before we pass it | ||||
|                 // to the update handler. Processing store is non persistent to be able recover | ||||
|                 // from a failure | ||||
|                 let processing = pending.processing(); | ||||
|                 self.processing | ||||
|                     .write() | ||||
|                     .unwrap() | ||||
|                     .replace(processing.clone()); | ||||
|                 // Process the pending update using the provided user function. | ||||
|                 let result = handler.handle_update(processing, first_content); | ||||
|                 drop(rtxn); | ||||
|  | ||||
|                 // Once the pending update have been successfully processed | ||||
|                 // we must remove the content from the pending and processing stores and | ||||
|                 // write the *new* meta to the processed-meta store and commit. | ||||
|                 let mut wtxn = self.env.write_txn()?; | ||||
|                 self.processing | ||||
|                     .write() | ||||
|                     .unwrap() | ||||
|                     .take(); | ||||
|                 self.pending_meta.delete(&mut wtxn, &first_id)?; | ||||
|                 self.pending.delete(&mut wtxn, &first_id)?; | ||||
|                 match result { | ||||
|                     Ok(processed) => self.processed_meta.put(&mut wtxn, &first_id, &processed)?, | ||||
|                     Err(failed) => self.failed_meta.put(&mut wtxn, &first_id, &failed)?, | ||||
|                 } | ||||
|                 wtxn.commit()?; | ||||
|  | ||||
|                 Ok(Some(())) | ||||
|             }, | ||||
|             None => Ok(None) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Execute the user defined function with the meta-store iterators, the first | ||||
|     /// iterator is the *processed* meta one, the second the *aborted* meta one | ||||
|     /// and, the last is the *pending* meta one. | ||||
|     pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T> | ||||
|     where | ||||
|         F: for<'a> FnMut( | ||||
|             Option<Processing<M>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Processed<M, N>>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Aborted<M>>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Pending<M>>>, | ||||
|             heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Failed<M, E>>>, | ||||
|         ) -> heed::Result<T>, | ||||
|     { | ||||
|         let rtxn = self.env.read_txn()?; | ||||
|  | ||||
|         // We get the pending, processed and aborted meta iterators. | ||||
|         let processed_iter = self.processed_meta.iter(&rtxn)?; | ||||
|         let aborted_iter = self.aborted_meta.iter(&rtxn)?; | ||||
|         let pending_iter = self.pending_meta.iter(&rtxn)?; | ||||
|         let processing = self.processing.read().unwrap().clone(); | ||||
|         let failed_iter = self.failed_meta.iter(&rtxn)?; | ||||
|  | ||||
|         // We execute the user defined function with both iterators. | ||||
|         (f)(processing, processed_iter, aborted_iter, pending_iter, failed_iter) | ||||
|     } | ||||
|  | ||||
|     /// Returns the update associated meta or `None` if the update doesn't exist. | ||||
|     pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatus<M, N, E>>> { | ||||
|         let rtxn = self.env.read_txn()?; | ||||
|         let key = BEU64::new(update_id); | ||||
|  | ||||
|         if let Some(ref meta) = *self.processing.read().unwrap() { | ||||
|             if meta.id() == update_id { | ||||
|                 return Ok(Some(UpdateStatus::Processing(meta.clone()))); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.pending_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Pending(meta))); | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.processed_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Processed(meta))); | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Aborted(meta))); | ||||
|         } | ||||
|  | ||||
|         if let Some(meta) = self.failed_meta.get(&rtxn, &key)? { | ||||
|             return Ok(Some(UpdateStatus::Failed(meta))); | ||||
|         } | ||||
|  | ||||
|         Ok(None) | ||||
|     } | ||||
|  | ||||
|     /// Aborts an update, an aborted update content is deleted and | ||||
|     /// the meta of it is moved into the aborted updates database. | ||||
|     /// | ||||
|     /// Trying to abort an update that is currently being processed, an update | ||||
|     /// that as already been processed or which doesn't actually exist, will | ||||
|     /// return `None`. | ||||
|     #[allow(dead_code)] | ||||
|     pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<Aborted<M>>> { | ||||
|         let mut wtxn = self.env.write_txn()?; | ||||
|         let key = BEU64::new(update_id); | ||||
|  | ||||
|         // We cannot abort an update that is currently being processed. | ||||
|         if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) { | ||||
|             return Ok(None); | ||||
|         } | ||||
|  | ||||
|         let pending = match self.pending_meta.get(&wtxn, &key)? { | ||||
|             Some(meta) => meta, | ||||
|             None => return Ok(None), | ||||
|         }; | ||||
|  | ||||
|         let aborted = pending.abort(); | ||||
|  | ||||
|         self.aborted_meta.put(&mut wtxn, &key, &aborted)?; | ||||
|         self.pending_meta.delete(&mut wtxn, &key)?; | ||||
|         self.pending.delete(&mut wtxn, &key)?; | ||||
|  | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         Ok(Some(aborted)) | ||||
|     } | ||||
|  | ||||
|     /// Aborts all the pending updates, and not the one being currently processed. | ||||
|     /// Returns the update metas and ids that were successfully aborted. | ||||
|     #[allow(dead_code)] | ||||
|     pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, Aborted<M>)>> { | ||||
|         let mut wtxn = self.env.write_txn()?; | ||||
|         let mut aborted_updates = Vec::new(); | ||||
|  | ||||
|         // We skip the first pending update as it is currently being processed. | ||||
|         for result in self.pending_meta.iter(&wtxn)?.skip(1) { | ||||
|             let (key, pending) = result?; | ||||
|             let id = key.get(); | ||||
|             aborted_updates.push((id, pending.abort())); | ||||
|         } | ||||
|  | ||||
|         for (id, aborted) in &aborted_updates { | ||||
|             let key = BEU64::new(*id); | ||||
|             self.aborted_meta.put(&mut wtxn, &key, &aborted)?; | ||||
|             self.pending_meta.delete(&mut wtxn, &key)?; | ||||
|             self.pending.delete(&mut wtxn, &key)?; | ||||
|         } | ||||
|  | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         Ok(aborted_updates) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|     use std::thread; | ||||
|     use std::time::{Duration, Instant}; | ||||
|  | ||||
|     impl<M, N, F, E> HandleUpdate<M, N, E> for F | ||||
|         where F: FnMut(Processing<M>, &[u8]) -> Result<Processed<M, N>, Failed<M, E>> + Send + 'static { | ||||
|             fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>> { | ||||
|                 self(meta, content) | ||||
|             } | ||||
|         } | ||||
|  | ||||
|     #[test] | ||||
|     fn simple() { | ||||
|         let dir = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(4096 * 100); | ||||
|         let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> { | ||||
|             let new_meta = meta.meta().to_string() + " processed"; | ||||
|             let processed = meta.process(new_meta); | ||||
|             Ok(processed) | ||||
|         }).unwrap(); | ||||
|  | ||||
|         let meta = String::from("kiki"); | ||||
|         let update = update_store.register_update(meta, &[]).unwrap(); | ||||
|         thread::sleep(Duration::from_millis(100)); | ||||
|         let meta = update_store.meta(update.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "kiki processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     #[ignore] | ||||
|     fn long_running_update() { | ||||
|         let dir = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(4096 * 100); | ||||
|         let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content:&_| -> Result<_, Failed<_, ()>> { | ||||
|             thread::sleep(Duration::from_millis(400)); | ||||
|             let new_meta = meta.meta().to_string() + "processed"; | ||||
|             let processed = meta.process(new_meta); | ||||
|             Ok(processed) | ||||
|         }).unwrap(); | ||||
|  | ||||
|         let before_register = Instant::now(); | ||||
|  | ||||
|         let meta = String::from("kiki"); | ||||
|         let update_kiki = update_store.register_update(meta, &[]).unwrap(); | ||||
|         assert!(before_register.elapsed() < Duration::from_millis(200)); | ||||
|  | ||||
|         let meta = String::from("coco"); | ||||
|         let update_coco = update_store.register_update(meta, &[]).unwrap(); | ||||
|         assert!(before_register.elapsed() < Duration::from_millis(200)); | ||||
|  | ||||
|         let meta = String::from("cucu"); | ||||
|         let update_cucu = update_store.register_update(meta, &[]).unwrap(); | ||||
|         assert!(before_register.elapsed() < Duration::from_millis(200)); | ||||
|  | ||||
|         thread::sleep(Duration::from_millis(400 * 3 + 100)); | ||||
|  | ||||
|         let meta = update_store.meta(update_kiki.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "kiki processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|  | ||||
|         let meta = update_store.meta(update_coco.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "coco processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|  | ||||
|         let meta = update_store.meta(update_cucu.id()).unwrap().unwrap(); | ||||
|         if let UpdateStatus::Processed(Processed { success, .. }) = meta { | ||||
|             assert_eq!(success, "cucu processed"); | ||||
|         } else { | ||||
|             panic!() | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										281
									
								
								meilisearch-http/src/index_controller/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										281
									
								
								meilisearch-http/src/index_controller/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,281 @@ | ||||
| mod local_index_controller; | ||||
| mod updates; | ||||
|  | ||||
| pub use local_index_controller::LocalIndexController; | ||||
|  | ||||
| use std::collections::HashMap; | ||||
| use std::num::NonZeroUsize; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use anyhow::Result; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use milli::Index; | ||||
| use milli::update::{IndexDocumentsMethod, UpdateFormat, DocumentAdditionResult}; | ||||
| use serde::{Serialize, Deserialize, de::Deserializer}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| pub use updates::{Processed, Processing, Failed}; | ||||
|  | ||||
| pub type UpdateStatus = updates::UpdateStatus<UpdateMeta, UpdateResult, String>; | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexMetadata { | ||||
|     pub uid: String, | ||||
|     uuid: Uuid, | ||||
|     created_at: DateTime<Utc>, | ||||
|     updated_at: DateTime<Utc>, | ||||
|     primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| #[serde(tag = "type")] | ||||
| pub enum UpdateMeta { | ||||
|     DocumentsAddition { | ||||
|         method: IndexDocumentsMethod, | ||||
|         format: UpdateFormat, | ||||
|         primary_key: Option<String>, | ||||
|     }, | ||||
|     ClearDocuments, | ||||
|     DeleteDocuments, | ||||
|     Settings(Settings), | ||||
|     Facets(Facets), | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Facets { | ||||
|     pub level_group_size: Option<NonZeroUsize>, | ||||
|     pub min_level_size: Option<NonZeroUsize>, | ||||
| } | ||||
|  | ||||
| fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error> | ||||
| where T: Deserialize<'de>, | ||||
|       D: Deserializer<'de> | ||||
| { | ||||
|     Deserialize::deserialize(deserializer).map(Some) | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Serialize, Deserialize)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Settings { | ||||
|     #[serde( | ||||
|         default, | ||||
|         deserialize_with = "deserialize_some", | ||||
|         skip_serializing_if = "Option::is_none", | ||||
|     )] | ||||
|     pub displayed_attributes: Option<Option<Vec<String>>>, | ||||
|  | ||||
|     #[serde( | ||||
|         default, | ||||
|         deserialize_with = "deserialize_some", | ||||
|         skip_serializing_if = "Option::is_none", | ||||
|     )] | ||||
|     pub searchable_attributes: Option<Option<Vec<String>>>, | ||||
|  | ||||
|     #[serde(default)] | ||||
|     pub faceted_attributes: Option<Option<HashMap<String, String>>>, | ||||
|  | ||||
|     #[serde( | ||||
|         default, | ||||
|         deserialize_with = "deserialize_some", | ||||
|         skip_serializing_if = "Option::is_none", | ||||
|     )] | ||||
|     pub criteria: Option<Option<Vec<String>>>, | ||||
| } | ||||
|  | ||||
| impl Settings { | ||||
|     pub fn cleared() -> Self { | ||||
|         Self { | ||||
|             displayed_attributes: Some(None), | ||||
|             searchable_attributes: Some(None), | ||||
|             faceted_attributes: Some(None), | ||||
|             criteria: Some(None), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub enum UpdateResult { | ||||
|     DocumentsAddition(DocumentAdditionResult), | ||||
|     DocumentDeletion { deleted: usize }, | ||||
|     Other, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug)] | ||||
| pub struct IndexSettings { | ||||
|     pub name: Option<String>, | ||||
|     pub primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| /// The `IndexController` is in charge of the access to the underlying indices. It splits the logic | ||||
| /// for read access which is provided thanks to an handle to the index, and write access which must | ||||
| /// be provided. This allows the implementer to define the behaviour of write accesses to the | ||||
| /// indices, and abstract the scheduling of the updates. The implementer must be able to provide an | ||||
| /// instance of `IndexStore` | ||||
| pub trait IndexController { | ||||
|  | ||||
|     /* | ||||
|      * Write operations | ||||
|      * | ||||
|      * Logic for the write operation need to be provided by the implementer, since they can be made | ||||
|      * asynchronous thanks to an update_store for example. | ||||
|      * | ||||
|      * */ | ||||
|  | ||||
|     /// Perform document addition on the database. If the provided index does not exist, it will be | ||||
|     /// created when the addition is applied to the index. | ||||
|     fn add_documents<S: AsRef<str>>( | ||||
|         &self, | ||||
|         index: S, | ||||
|         method: IndexDocumentsMethod, | ||||
|         format: UpdateFormat, | ||||
|         data: &[u8], | ||||
|         primary_key: Option<String>, | ||||
|     ) -> anyhow::Result<UpdateStatus>; | ||||
|  | ||||
|     /// Clear all documents in the given index. | ||||
|     fn clear_documents(&self, index: impl AsRef<str>) -> anyhow::Result<UpdateStatus>; | ||||
|  | ||||
|     /// Delete all documents in `document_ids`. | ||||
|     fn delete_documents(&self, index: impl AsRef<str>, document_ids: Vec<String>) -> anyhow::Result<UpdateStatus>; | ||||
|  | ||||
|     /// Updates an index settings. If the index does not exist, it will be created when the update | ||||
|     /// is applied to the index. | ||||
|     fn update_settings<S: AsRef<str>>(&self, index_uid: S, settings: Settings) -> anyhow::Result<UpdateStatus>; | ||||
|  | ||||
|     /// Create an index with the given `index_uid`. | ||||
|     fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata>; | ||||
|  | ||||
|     /// Delete index with the given `index_uid`, attempting to close it beforehand. | ||||
|     fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> Result<()>; | ||||
|  | ||||
|     /// Swap two indexes, concretely, it simply swaps the index the names point to. | ||||
|     fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, index1_uid: S1, index2_uid: S2) -> Result<()>; | ||||
|  | ||||
|     /// Apply an update to the given index. This method can be called when an update is ready to be | ||||
|     /// processed | ||||
|     fn handle_update<S: AsRef<str>>( | ||||
|         &self, | ||||
|         _index: S, | ||||
|         _update_id: u64, | ||||
|         _meta: Processing<UpdateMeta>, | ||||
|         _content: &[u8] | ||||
|     ) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> { | ||||
|         todo!() | ||||
|     } | ||||
|  | ||||
|     /// Returns, if it exists, the `Index` with the povided name. | ||||
|     fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<Arc<Index>>>; | ||||
|  | ||||
|     /// Returns the udpate status an update | ||||
|     fn update_status(&self, index: impl AsRef<str>, id: u64) -> anyhow::Result<Option<UpdateStatus>>; | ||||
|  | ||||
|     /// Returns all the udpate status for an index | ||||
|     fn all_update_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus>>; | ||||
|  | ||||
|     /// List all the indexes | ||||
|     fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>>; | ||||
|  | ||||
|     fn update_index(&self, name: impl AsRef<str>, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata>; | ||||
| } | ||||
|  | ||||
|  | ||||
| #[cfg(test)] | ||||
| #[macro_use] | ||||
| pub(crate) mod test { | ||||
|     use super::*; | ||||
|  | ||||
|     #[macro_export] | ||||
|     macro_rules! make_index_controller_tests { | ||||
|         ($controller_buider:block) => { | ||||
|             #[test] | ||||
|             fn test_create_and_list_indexes() { | ||||
|                 crate::index_controller::test::create_and_list_indexes($controller_buider); | ||||
|             } | ||||
|  | ||||
|             #[test] | ||||
|             fn test_create_index_with_no_name_is_error() { | ||||
|                 crate::index_controller::test::create_index_with_no_name_is_error($controller_buider); | ||||
|             } | ||||
|  | ||||
|             #[test] | ||||
|             fn test_update_index() { | ||||
|                 crate::index_controller::test::update_index($controller_buider); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn create_and_list_indexes(controller: impl IndexController) { | ||||
|         let settings1 = IndexSettings { | ||||
|             name: Some(String::from("test_index")), | ||||
|             primary_key: None, | ||||
|         }; | ||||
|  | ||||
|         let settings2 = IndexSettings { | ||||
|             name: Some(String::from("test_index2")), | ||||
|             primary_key: Some(String::from("foo")), | ||||
|         }; | ||||
|  | ||||
|         controller.create_index(settings1).unwrap(); | ||||
|         controller.create_index(settings2).unwrap(); | ||||
|  | ||||
|         let indexes = controller.list_indexes().unwrap(); | ||||
|         assert_eq!(indexes.len(), 2); | ||||
|         assert_eq!(indexes[0].uid, "test_index"); | ||||
|         assert_eq!(indexes[1].uid, "test_index2"); | ||||
|         assert_eq!(indexes[1].primary_key.clone().unwrap(), "foo"); | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn create_index_with_no_name_is_error(controller: impl IndexController) { | ||||
|         let settings = IndexSettings { | ||||
|             name: None, | ||||
|             primary_key: None, | ||||
|         }; | ||||
|         assert!(controller.create_index(settings).is_err()); | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn update_index(controller: impl IndexController) { | ||||
|  | ||||
|         let settings = IndexSettings { | ||||
|             name: Some(String::from("test")), | ||||
|             primary_key: None, | ||||
|         }; | ||||
|  | ||||
|         assert!(controller.create_index(settings).is_ok()); | ||||
|  | ||||
|         // perform empty update returns index meta unchanged | ||||
|         let settings = IndexSettings { | ||||
|             name: None, | ||||
|             primary_key: None, | ||||
|         }; | ||||
|  | ||||
|         let result = controller.update_index("test", settings).unwrap(); | ||||
|         assert_eq!(result.uid, "test"); | ||||
|         assert_eq!(result.created_at, result.updated_at); | ||||
|         assert!(result.primary_key.is_none()); | ||||
|  | ||||
|         // Changing the name trigger an error | ||||
|         let settings = IndexSettings { | ||||
|             name: Some(String::from("bar")), | ||||
|             primary_key: None, | ||||
|         }; | ||||
|  | ||||
|         assert!(controller.update_index("test", settings).is_err()); | ||||
|  | ||||
|         // Update primary key | ||||
|         let settings = IndexSettings { | ||||
|             name: None, | ||||
|             primary_key: Some(String::from("foo")), | ||||
|         }; | ||||
|  | ||||
|         let result = controller.update_index("test", settings.clone()).unwrap(); | ||||
|         assert_eq!(result.uid, "test"); | ||||
|         assert!(result.created_at < result.updated_at); | ||||
|         assert_eq!(result.primary_key.unwrap(), "foo"); | ||||
|  | ||||
|         // setting the primary key again is an error | ||||
|         assert!(controller.update_index("test", settings).is_err()); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										179
									
								
								meilisearch-http/src/index_controller/updates.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										179
									
								
								meilisearch-http/src/index_controller/updates.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,179 @@ | ||||
| use chrono::{Utc, DateTime}; | ||||
| use serde::{Serialize, Deserialize}; | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Pending<M> { | ||||
|     pub update_id: u64, | ||||
|     pub meta: M, | ||||
|     pub enqueued_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl<M> Pending<M> { | ||||
|     pub fn new(meta: M, update_id: u64) -> Self { | ||||
|         Self { | ||||
|             enqueued_at: Utc::now(), | ||||
|             meta, | ||||
|             update_id, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn processing(self) -> Processing<M> { | ||||
|         Processing { | ||||
|             from: self, | ||||
|             started_processing_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn abort(self) -> Aborted<M> { | ||||
|         Aborted { | ||||
|             from: self, | ||||
|             aborted_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &M { | ||||
|         &self.meta | ||||
|     } | ||||
|  | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.update_id | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Processed<M, N> { | ||||
|     pub success: N, | ||||
|     pub processed_at: DateTime<Utc>, | ||||
|     #[serde(flatten)] | ||||
|     pub from: Processing<M>, | ||||
| } | ||||
|  | ||||
| impl<M, N> Processed<M, N> { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Processing<M> { | ||||
|     #[serde(flatten)] | ||||
|     pub from: Pending<M>, | ||||
|     pub started_processing_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl<M> Processing<M> { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> &M { | ||||
|         self.from.meta() | ||||
|     } | ||||
|  | ||||
|     pub fn process<N>(self, meta: N) -> Processed<M, N> { | ||||
|         Processed { | ||||
|             success: meta, | ||||
|             from: self, | ||||
|             processed_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn fail<E>(self, error: E) -> Failed<M, E> { | ||||
|         Failed { | ||||
|             from: self, | ||||
|             error, | ||||
|             failed_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Aborted<M> { | ||||
|     #[serde(flatten)] | ||||
|     from: Pending<M>, | ||||
|     aborted_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl<M> Aborted<M> { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Failed<M, E> { | ||||
|     #[serde(flatten)] | ||||
|     from: Processing<M>, | ||||
|     error: E, | ||||
|     failed_at: DateTime<Utc>, | ||||
| } | ||||
|  | ||||
| impl<M, E> Failed<M, E> { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         self.from.id() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Hash, Serialize)] | ||||
| #[serde(tag = "status", rename_all = "camelCase")] | ||||
| pub enum UpdateStatus<M, N, E> { | ||||
|     Processing(Processing<M>), | ||||
|     Pending(Pending<M>), | ||||
|     Processed(Processed<M, N>), | ||||
|     Aborted(Aborted<M>), | ||||
|     Failed(Failed<M, E>), | ||||
| } | ||||
|  | ||||
| impl<M, N, E> UpdateStatus<M, N, E> { | ||||
|     pub fn id(&self) -> u64 { | ||||
|         match self { | ||||
|             UpdateStatus::Processing(u) => u.id(), | ||||
|             UpdateStatus::Pending(u) => u.id(), | ||||
|             UpdateStatus::Processed(u) => u.id(), | ||||
|             UpdateStatus::Aborted(u) => u.id(), | ||||
|             UpdateStatus::Failed(u) => u.id(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn processed(&self) -> Option<&Processed<M, N>> { | ||||
|         match self { | ||||
|             UpdateStatus::Processed(p) => Some(p), | ||||
|             _ => None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<M, N, E> From<Pending<M>> for UpdateStatus<M, N, E> { | ||||
|     fn from(other: Pending<M>) -> Self { | ||||
|         Self::Pending(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<M, N, E> From<Aborted<M>> for UpdateStatus<M, N, E> { | ||||
|     fn from(other: Aborted<M>) -> Self { | ||||
|         Self::Aborted(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<M, N, E> From<Processed<M, N>> for UpdateStatus<M, N, E> { | ||||
|     fn from(other: Processed<M, N>) -> Self { | ||||
|         Self::Processed(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<M, N, E> From<Processing<M>> for UpdateStatus<M, N, E> { | ||||
|     fn from(other: Processing<M>) -> Self { | ||||
|         Self::Processing(other) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<M, N, E> From<Failed<M, E>> for UpdateStatus<M, N, E> { | ||||
|     fn from(other: Failed<M, E>) -> Self { | ||||
|         Self::Failed(other) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										60
									
								
								meilisearch-http/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								meilisearch-http/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| #![allow(clippy::or_fun_call)] | ||||
|  | ||||
| pub mod data; | ||||
| pub mod error; | ||||
| pub mod helpers; | ||||
| pub mod option; | ||||
| pub mod routes; | ||||
| mod index_controller; | ||||
|  | ||||
| use actix_http::Error; | ||||
| use actix_service::ServiceFactory; | ||||
| use actix_web::{dev, web, App}; | ||||
|  | ||||
| pub use option::Opt; | ||||
| pub use self::data::Data; | ||||
| use self::error::payload_error_handler; | ||||
|  | ||||
| pub fn create_app( | ||||
|     data: &Data, | ||||
|     enable_frontend: bool, | ||||
| ) -> App< | ||||
|     impl ServiceFactory< | ||||
|         Config = (), | ||||
|         Request = dev::ServiceRequest, | ||||
|         Response = dev::ServiceResponse<actix_http::body::Body>, | ||||
|         Error = Error, | ||||
|         InitError = (), | ||||
|     >, | ||||
|     actix_http::body::Body, | ||||
| > { | ||||
|     let app = App::new() | ||||
|         .data(data.clone()) | ||||
|         .app_data( | ||||
|             web::JsonConfig::default() | ||||
|                 .limit(data.http_payload_size_limit()) | ||||
|                 .content_type(|_mime| true) // Accept all mime types | ||||
|                 .error_handler(|err, _req| payload_error_handler(err).into()), | ||||
|         ) | ||||
|         .app_data( | ||||
|             web::QueryConfig::default() | ||||
|             .error_handler(|err, _req| payload_error_handler(err).into()) | ||||
|         ) | ||||
|         .configure(routes::document::services) | ||||
|         .configure(routes::index::services) | ||||
|         .configure(routes::search::services) | ||||
|         .configure(routes::settings::services) | ||||
|         .configure(routes::stop_words::services) | ||||
|         .configure(routes::synonym::services) | ||||
|         .configure(routes::health::services) | ||||
|         .configure(routes::stats::services) | ||||
|         .configure(routes::key::services); | ||||
|         //.configure(routes::dump::services); | ||||
|     if enable_frontend { | ||||
|         app | ||||
|             .service(routes::load_html) | ||||
|             .service(routes::load_css) | ||||
|     } else { | ||||
|         app | ||||
|     } | ||||
| } | ||||
							
								
								
									
										162
									
								
								meilisearch-http/src/main.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								meilisearch-http/src/main.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,162 @@ | ||||
| use std::env; | ||||
|  | ||||
| use actix_cors::Cors; | ||||
| use actix_web::{middleware, HttpServer}; | ||||
| use main_error::MainError; | ||||
| use meilisearch_http::helpers::NormalizePath; | ||||
| use meilisearch_http::{create_app, Data, Opt}; | ||||
| use structopt::StructOpt; | ||||
|  | ||||
| //mod analytics; | ||||
|  | ||||
| #[cfg(target_os = "linux")] | ||||
| #[global_allocator] | ||||
| static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||||
|  | ||||
| #[actix_web::main] | ||||
| async fn main() -> Result<(), MainError> { | ||||
|     let opt = Opt::from_args(); | ||||
|  | ||||
|     #[cfg(all(not(debug_assertions), feature = "sentry"))] | ||||
|     let _sentry = sentry::init(( | ||||
|         if !opt.no_sentry { | ||||
|             Some(opt.sentry_dsn.clone()) | ||||
|         } else { | ||||
|             None | ||||
|         }, | ||||
|         sentry::ClientOptions { | ||||
|             release: sentry::release_name!(), | ||||
|             ..Default::default() | ||||
|         }, | ||||
|     )); | ||||
|  | ||||
|     match opt.env.as_ref() { | ||||
|         "production" => { | ||||
|             if opt.master_key.is_none() { | ||||
|                 return Err( | ||||
|                     "In production mode, the environment variable MEILI_MASTER_KEY is mandatory" | ||||
|                         .into(), | ||||
|                 ); | ||||
|             } | ||||
|  | ||||
|             #[cfg(all(not(debug_assertions), feature = "sentry"))] | ||||
|             if !opt.no_sentry && _sentry.is_enabled() { | ||||
|                 sentry::integrations::panic::register_panic_handler(); // TODO: This shouldn't be needed when upgrading to sentry 0.19.0. These integrations are turned on by default when using `sentry::init`. | ||||
|                 sentry::integrations::env_logger::init(None, Default::default()); | ||||
|             } | ||||
|         } | ||||
|         "development" => { | ||||
|             env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); | ||||
|         } | ||||
|         _ => unreachable!(), | ||||
|     } | ||||
|  | ||||
|     //if let Some(path) = &opt.import_snapshot { | ||||
|         //snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?; | ||||
|     //} | ||||
|  | ||||
|     let data = Data::new(opt.clone())?; | ||||
|  | ||||
|     //if !opt.no_analytics { | ||||
|         //let analytics_data = data.clone(); | ||||
|         //let analytics_opt = opt.clone(); | ||||
|         //thread::spawn(move || analytics::analytics_sender(analytics_data, analytics_opt)); | ||||
|     //} | ||||
|  | ||||
|     //if let Some(path) = &opt.import_dump { | ||||
|         //dump::import_dump(&data, path, opt.dump_batch_size)?; | ||||
|     //} | ||||
|  | ||||
|     //if opt.schedule_snapshot { | ||||
|         //snapshot::schedule_snapshot(data.clone(), &opt.snapshot_dir, opt.snapshot_interval_sec.unwrap_or(86400))?; | ||||
|     //} | ||||
|  | ||||
|     print_launch_resume(&opt, &data); | ||||
|  | ||||
|     let enable_frontend = opt.env != "production"; | ||||
|     let http_server = HttpServer::new(move || { | ||||
|         create_app(&data, enable_frontend) | ||||
|             .wrap( | ||||
|                 Cors::default() | ||||
|                     .send_wildcard() | ||||
|                     .allowed_headers(vec!["content-type", "x-meili-api-key"]) | ||||
|                     .max_age(86_400) // 24h | ||||
|             ) | ||||
|             .wrap(middleware::Logger::default()) | ||||
|             .wrap(middleware::Compress::default()) | ||||
|             .wrap(NormalizePath) | ||||
|     }); | ||||
|  | ||||
|     if let Some(config) = opt.get_ssl_config()? { | ||||
|         http_server | ||||
|             .bind_rustls(opt.http_addr, config)? | ||||
|             .run() | ||||
|             .await?; | ||||
|     } else { | ||||
|         http_server.bind(opt.http_addr)?.run().await?; | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn print_launch_resume(opt: &Opt, data: &Data) { | ||||
|     let ascii_name = r#" | ||||
| 888b     d888          d8b 888 d8b  .d8888b.                                    888 | ||||
| 8888b   d8888          Y8P 888 Y8P d88P  Y88b                                   888 | ||||
| 88888b.d88888              888     Y88b.                                        888 | ||||
| 888Y88888P888  .d88b.  888 888 888  "Y888b.    .d88b.   8888b.  888d888 .d8888b 88888b. | ||||
| 888 Y888P 888 d8P  Y8b 888 888 888     "Y88b. d8P  Y8b     "88b 888P"  d88P"    888 "88b | ||||
| 888  Y8P  888 88888888 888 888 888       "888 88888888 .d888888 888    888      888  888 | ||||
| 888   "   888 Y8b.     888 888 888 Y88b  d88P Y8b.     888  888 888    Y88b.    888  888 | ||||
| 888       888  "Y8888  888 888 888  "Y8888P"   "Y8888  "Y888888 888     "Y8888P 888  888 | ||||
| "#; | ||||
|  | ||||
|     eprintln!("{}", ascii_name); | ||||
|  | ||||
|     eprintln!("Database path:\t\t{:?}", opt.db_path); | ||||
|     eprintln!("Server listening on:\t{:?}", opt.http_addr); | ||||
|     eprintln!("Environment:\t\t{:?}", opt.env); | ||||
|     eprintln!("Commit SHA:\t\t{:?}", env!("VERGEN_SHA").to_string()); | ||||
|     eprintln!( | ||||
|         "Build date:\t\t{:?}", | ||||
|         env!("VERGEN_BUILD_TIMESTAMP").to_string() | ||||
|     ); | ||||
|     eprintln!( | ||||
|         "Package version:\t{:?}", | ||||
|         env!("CARGO_PKG_VERSION").to_string() | ||||
|     ); | ||||
|  | ||||
|     #[cfg(all(not(debug_assertions), feature = "sentry"))] | ||||
|     eprintln!( | ||||
|         "Sentry DSN:\t\t{:?}", | ||||
|         if !opt.no_sentry { | ||||
|             &opt.sentry_dsn | ||||
|         } else { | ||||
|             "Disabled" | ||||
|         } | ||||
|     ); | ||||
|  | ||||
|     eprintln!( | ||||
|         "Amplitude Analytics:\t{:?}", | ||||
|         if !opt.no_analytics { | ||||
|             "Enabled" | ||||
|         } else { | ||||
|             "Disabled" | ||||
|         } | ||||
|     ); | ||||
|  | ||||
|     eprintln!(); | ||||
|  | ||||
|     if data.api_keys().master.is_some() { | ||||
|         eprintln!("A Master Key has been set. Requests to MeiliSearch won't be authorized unless you provide an authentication key."); | ||||
|     } else { | ||||
|         eprintln!("No master key found; The server will accept unidentified requests. \ | ||||
|             If you need some protection in development mode, please export a key: export MEILI_MASTER_KEY=xxx"); | ||||
|     } | ||||
|  | ||||
|     eprintln!(); | ||||
|     eprintln!("Documentation:\t\thttps://docs.meilisearch.com"); | ||||
|     eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch"); | ||||
|     eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html or bonjour@meilisearch.com"); | ||||
|     eprintln!(); | ||||
| } | ||||
							
								
								
									
										294
									
								
								meilisearch-http/src/option.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										294
									
								
								meilisearch-http/src/option.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,294 @@ | ||||
| use std::{error, fs}; | ||||
| use std::io::{BufReader, Read}; | ||||
| use std::path::PathBuf; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use byte_unit::Byte; | ||||
| use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; | ||||
| use rustls::{ | ||||
|     AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth, | ||||
|     RootCertStore, | ||||
| }; | ||||
| use grenad::CompressionType; | ||||
| use structopt::StructOpt; | ||||
|  | ||||
| #[derive(Debug, Clone, StructOpt)] | ||||
| pub struct IndexerOpts { | ||||
|     /// The amount of documents to skip before printing | ||||
|     /// a log regarding the indexing advancement. | ||||
|     #[structopt(long, default_value = "100000")] // 100k | ||||
|     pub log_every_n: usize, | ||||
|  | ||||
|     /// Grenad max number of chunks in bytes. | ||||
|     #[structopt(long)] | ||||
|     pub max_nb_chunks: Option<usize>, | ||||
|  | ||||
|     /// The maximum amount of memory to use for the Grenad buffer. It is recommended | ||||
|     /// to use something like 80%-90% of the available memory. | ||||
|     /// | ||||
|     /// It is automatically split by the number of jobs e.g. if you use 7 jobs | ||||
|     /// and 7 GB of max memory, each thread will use a maximum of 1 GB. | ||||
|     #[structopt(long, default_value = "7 GiB")] | ||||
|     pub max_memory: Byte, | ||||
|  | ||||
|     /// Size of the linked hash map cache when indexing. | ||||
|     /// The bigger it is, the faster the indexing is but the more memory it takes. | ||||
|     #[structopt(long, default_value = "500")] | ||||
|     pub linked_hash_map_size: usize, | ||||
|  | ||||
|     /// The name of the compression algorithm to use when compressing intermediate | ||||
|     /// Grenad chunks while indexing documents. | ||||
|     /// | ||||
|     /// Choosing a fast algorithm will make the indexing faster but may consume more memory. | ||||
|     #[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] | ||||
|     pub chunk_compression_type: CompressionType, | ||||
|  | ||||
|     /// The level of compression of the chosen algorithm. | ||||
|     #[structopt(long, requires = "chunk-compression-type")] | ||||
|     pub chunk_compression_level: Option<u32>, | ||||
|  | ||||
|     /// The number of bytes to remove from the begining of the chunks while reading/sorting | ||||
|     /// or merging them. | ||||
|     /// | ||||
|     /// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`, | ||||
|     /// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set. | ||||
|     #[structopt(long, default_value = "4 GiB")] | ||||
|     pub chunk_fusing_shrink_size: Byte, | ||||
|  | ||||
|     /// Enable the chunk fusing or not, this reduces the amount of disk space used. | ||||
|     #[structopt(long)] | ||||
|     pub enable_chunk_fusing: bool, | ||||
|  | ||||
|     /// Number of parallel jobs for indexing, defaults to # of CPUs. | ||||
|     #[structopt(long)] | ||||
|     pub indexing_jobs: Option<usize>, | ||||
| } | ||||
|  | ||||
| impl Default for IndexerOpts { | ||||
|     fn default() -> Self { | ||||
|         Self { | ||||
|             log_every_n: 100_000, | ||||
|             max_nb_chunks: None, | ||||
|             max_memory: Byte::from_str("1GiB").unwrap(), | ||||
|             linked_hash_map_size: 500, | ||||
|             chunk_compression_type: CompressionType::None, | ||||
|             chunk_compression_level: None, | ||||
|             chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(), | ||||
|             enable_chunk_fusing: false, | ||||
|             indexing_jobs: None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; | ||||
|  | ||||
| #[derive(Debug, Clone, StructOpt)] | ||||
| pub struct Opt { | ||||
|     /// The destination where the database must be created. | ||||
|     #[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] | ||||
|     pub db_path: PathBuf, | ||||
|  | ||||
|     /// The address on which the http server will listen. | ||||
|     #[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")] | ||||
|     pub http_addr: String, | ||||
|  | ||||
|     /// The master key allowing you to do everything on the server. | ||||
|     #[structopt(long, env = "MEILI_MASTER_KEY")] | ||||
|     pub master_key: Option<String>, | ||||
|  | ||||
|     /// The Sentry DSN to use for error reporting. This defaults to the MeiliSearch Sentry project. | ||||
|     /// You can disable sentry all together using the `--no-sentry` flag or `MEILI_NO_SENTRY` environment variable. | ||||
|     #[cfg(all(not(debug_assertions), feature = "sentry"))] | ||||
|     #[structopt(long, env = "SENTRY_DSN", default_value = "https://5ddfa22b95f241198be2271aaf028653@sentry.io/3060337")] | ||||
|     pub sentry_dsn: String, | ||||
|  | ||||
|     /// Disable Sentry error reporting. | ||||
|     #[structopt(long, env = "MEILI_NO_SENTRY")] | ||||
|     #[cfg(all(not(debug_assertions), feature = "sentry"))] | ||||
|     pub no_sentry: bool, | ||||
|  | ||||
|     /// This environment variable must be set to `production` if you are running in production. | ||||
|     /// If the server is running in development mode more logs will be displayed, | ||||
|     /// and the master key can be avoided which implies that there is no security on the updates routes. | ||||
|     /// This is useful to debug when integrating the engine with another service. | ||||
|     #[structopt(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)] | ||||
|     pub env: String, | ||||
|  | ||||
|     /// Do not send analytics to Meili. | ||||
|     #[structopt(long, env = "MEILI_NO_ANALYTICS")] | ||||
|     pub no_analytics: bool, | ||||
|  | ||||
|     /// The maximum size, in bytes, of the main lmdb database directory | ||||
|     #[structopt(long, env = "MEILI_MAX_MDB_SIZE", default_value = "100 GiB")] | ||||
|     pub max_mdb_size: Byte, | ||||
|  | ||||
|     /// The maximum size, in bytes, of the update lmdb database directory | ||||
|     #[structopt(long, env = "MEILI_MAX_UDB_SIZE", default_value = "10 GiB")] | ||||
|     pub max_udb_size: Byte, | ||||
|  | ||||
|     /// The maximum size, in bytes, of accepted JSON payloads | ||||
|     #[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "10 MiB")] | ||||
|     pub http_payload_size_limit: Byte, | ||||
|  | ||||
|     /// Read server certificates from CERTFILE. | ||||
|     /// This should contain PEM-format certificates | ||||
|     /// in the right order (the first certificate should | ||||
|     /// certify KEYFILE, the last should be a root CA). | ||||
|     #[structopt(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))] | ||||
|     pub ssl_cert_path: Option<PathBuf>, | ||||
|  | ||||
|     /// Read private key from KEYFILE.  This should be a RSA | ||||
|     /// private key or PKCS8-encoded private key, in PEM format. | ||||
|     #[structopt(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))] | ||||
|     pub ssl_key_path: Option<PathBuf>, | ||||
|  | ||||
|     /// Enable client authentication, and accept certificates | ||||
|     /// signed by those roots provided in CERTFILE. | ||||
|     #[structopt(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))] | ||||
|     pub ssl_auth_path: Option<PathBuf>, | ||||
|  | ||||
|     /// Read DER-encoded OCSP response from OCSPFILE and staple to certificate. | ||||
|     /// Optional | ||||
|     #[structopt(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))] | ||||
|     pub ssl_ocsp_path: Option<PathBuf>, | ||||
|  | ||||
|     /// Send a fatal alert if the client does not complete client authentication. | ||||
|     #[structopt(long, env = "MEILI_SSL_REQUIRE_AUTH")] | ||||
|     pub ssl_require_auth: bool, | ||||
|  | ||||
|     /// SSL support session resumption | ||||
|     #[structopt(long, env = "MEILI_SSL_RESUMPTION")] | ||||
|     pub ssl_resumption: bool, | ||||
|  | ||||
|     /// SSL support tickets. | ||||
|     #[structopt(long, env = "MEILI_SSL_TICKETS")] | ||||
|     pub ssl_tickets: bool, | ||||
|  | ||||
|     /// Defines the path of the snapshot file to import. | ||||
|     /// This option will, by default, stop the process if a database already exist or if no snapshot exists at | ||||
|     /// the given path. If this option is not specified no snapshot is imported. | ||||
|     #[structopt(long)] | ||||
|     pub import_snapshot: Option<PathBuf>, | ||||
|  | ||||
|     /// The engine will ignore a missing snapshot and not return an error in such case. | ||||
|     #[structopt(long, requires = "import-snapshot")] | ||||
|     pub ignore_missing_snapshot: bool, | ||||
|  | ||||
|     /// The engine will skip snapshot importation and not return an error in such case. | ||||
|     #[structopt(long, requires = "import-snapshot")] | ||||
|     pub ignore_snapshot_if_db_exists: bool, | ||||
|  | ||||
|     /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. | ||||
|     #[structopt(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")] | ||||
|     pub snapshot_dir: PathBuf, | ||||
|  | ||||
|     /// Activate snapshot scheduling. | ||||
|     #[structopt(long, env = "MEILI_SCHEDULE_SNAPSHOT")] | ||||
|     pub schedule_snapshot: bool, | ||||
|  | ||||
|     /// Defines time interval, in seconds, between each snapshot creation. | ||||
|     #[structopt(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC")] | ||||
|     pub snapshot_interval_sec: Option<u64>, | ||||
|  | ||||
|     /// Folder where dumps are created when the dump route is called. | ||||
|     #[structopt(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] | ||||
|     pub dumps_dir: PathBuf, | ||||
|  | ||||
|     /// Import a dump from the specified path, must be a `.tar.gz` file. | ||||
|     #[structopt(long, conflicts_with = "import-snapshot")] | ||||
|     pub import_dump: Option<PathBuf>, | ||||
|  | ||||
|     /// The batch size used in the importation process, the bigger it is the faster the dump is created. | ||||
|     #[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")] | ||||
|     pub dump_batch_size: usize, | ||||
|  | ||||
|     #[structopt(flatten)] | ||||
|     pub indexer_options: IndexerOpts, | ||||
| } | ||||
|  | ||||
| impl Opt { | ||||
|     pub fn get_ssl_config(&self) -> Result<Option<rustls::ServerConfig>, Box<dyn error::Error>> { | ||||
|         if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { | ||||
|             let client_auth = match &self.ssl_auth_path { | ||||
|                 Some(auth_path) => { | ||||
|                     let roots = load_certs(auth_path.to_path_buf())?; | ||||
|                     let mut client_auth_roots = RootCertStore::empty(); | ||||
|                     for root in roots { | ||||
|                         client_auth_roots.add(&root).unwrap(); | ||||
|                     } | ||||
|                     if self.ssl_require_auth { | ||||
|                         AllowAnyAuthenticatedClient::new(client_auth_roots) | ||||
|                     } else { | ||||
|                         AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots) | ||||
|                     } | ||||
|                 } | ||||
|                 None => NoClientAuth::new(), | ||||
|             }; | ||||
|  | ||||
|             let mut config = rustls::ServerConfig::new(client_auth); | ||||
|             config.key_log = Arc::new(rustls::KeyLogFile::new()); | ||||
|  | ||||
|             let certs = load_certs(cert_path.to_path_buf())?; | ||||
|             let privkey = load_private_key(key_path.to_path_buf())?; | ||||
|             let ocsp = load_ocsp(&self.ssl_ocsp_path)?; | ||||
|             config | ||||
|                 .set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![]) | ||||
|                 .map_err(|_| "bad certificates/private key")?; | ||||
|  | ||||
|             if self.ssl_resumption { | ||||
|                 config.set_persistence(rustls::ServerSessionMemoryCache::new(256)); | ||||
|             } | ||||
|  | ||||
|             if self.ssl_tickets { | ||||
|                 config.ticketer = rustls::Ticketer::new(); | ||||
|             } | ||||
|  | ||||
|             Ok(Some(config)) | ||||
|         } else { | ||||
|             Ok(None) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> { | ||||
|     let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?; | ||||
|     let mut reader = BufReader::new(certfile); | ||||
|     Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?) | ||||
| } | ||||
|  | ||||
| fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn error::Error>> { | ||||
|     let rsa_keys = { | ||||
|         let keyfile = | ||||
|             fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?; | ||||
|         let mut reader = BufReader::new(keyfile); | ||||
|         rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")? | ||||
|     }; | ||||
|  | ||||
|     let pkcs8_keys = { | ||||
|         let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?; | ||||
|         let mut reader = BufReader::new(keyfile); | ||||
|         pkcs8_private_keys(&mut reader) | ||||
|             .map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")? | ||||
|     }; | ||||
|  | ||||
|     // prefer to load pkcs8 keys | ||||
|     if !pkcs8_keys.is_empty() { | ||||
|         Ok(pkcs8_keys[0].clone()) | ||||
|     } else { | ||||
|         assert!(!rsa_keys.is_empty()); | ||||
|         Ok(rsa_keys[0].clone()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn load_ocsp(filename: &Option<PathBuf>) -> Result<Vec<u8>, Box<dyn error::Error>> { | ||||
|     let mut ret = Vec::new(); | ||||
|  | ||||
|     if let Some(ref name) = filename { | ||||
|         fs::File::open(name) | ||||
|             .map_err(|_| "cannot open ocsp file")? | ||||
|             .read_to_end(&mut ret) | ||||
|             .map_err(|_| "cannot read oscp file")?; | ||||
|     } | ||||
|  | ||||
|     Ok(ret) | ||||
| } | ||||
							
								
								
									
										262
									
								
								meilisearch-http/src/routes/document.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								meilisearch-http/src/routes/document.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | ||||
| use actix_web::web::Payload; | ||||
| use actix_web::{delete, get, post, put}; | ||||
| use actix_web::{web, HttpResponse}; | ||||
| use indexmap::IndexMap; | ||||
| use log::error; | ||||
| use milli::update::{IndexDocumentsMethod, UpdateFormat}; | ||||
| use serde::Deserialize; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use crate::Data; | ||||
| use crate::error::ResponseError; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::routes::IndexParam; | ||||
|  | ||||
| const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0; | ||||
| const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20; | ||||
|  | ||||
| macro_rules! guard_content_type { | ||||
|     ($fn_name:ident, $guard_value:literal) => { | ||||
|         fn $fn_name(head: &actix_web::dev::RequestHead) -> bool { | ||||
|             if let Some(content_type) = head.headers.get("Content-Type") { | ||||
|                 content_type.to_str().map(|v| v.contains($guard_value)).unwrap_or(false) | ||||
|             } else { | ||||
|                 false | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| } | ||||
|  | ||||
| guard_content_type!(guard_json, "application/json"); | ||||
|  | ||||
| type Document = IndexMap<String, Value>; | ||||
|  | ||||
| #[derive(Deserialize)] | ||||
| struct DocumentParam { | ||||
|     index_uid: String, | ||||
|     document_id: String, | ||||
| } | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(get_document) | ||||
|         .service(delete_document) | ||||
|         .service(get_all_documents) | ||||
|         .service(add_documents_json) | ||||
|         .service(update_documents) | ||||
|         .service(delete_documents) | ||||
|         .service(clear_all_documents); | ||||
| } | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/documents/{document_id}", | ||||
|     wrap = "Authentication::Public" | ||||
| )] | ||||
| async fn get_document( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<DocumentParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let index = path.index_uid.clone(); | ||||
|     let id = path.document_id.clone(); | ||||
|     match data.retrieve_document(index, id, None as Option<Vec<String>>).await { | ||||
|         Ok(document) => { | ||||
|             let json = serde_json::to_string(&document).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[delete( | ||||
|     "/indexes/{index_uid}/documents/{document_id}", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn delete_document( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<DocumentParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()]).await { | ||||
|         Ok(result) => { | ||||
|             let json = serde_json::to_string(&result).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Deserialize)] | ||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||
| struct BrowseQuery { | ||||
|     offset: Option<usize>, | ||||
|     limit: Option<usize>, | ||||
|     attributes_to_retrieve: Option<String>, | ||||
| } | ||||
|  | ||||
| #[get("/indexes/{index_uid}/documents", wrap = "Authentication::Public")] | ||||
| async fn get_all_documents( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
|     params: web::Query<BrowseQuery>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let attributes_to_retrieve = params | ||||
|         .attributes_to_retrieve | ||||
|         .as_ref() | ||||
|         .map(|attrs| attrs | ||||
|             .split(",") | ||||
|             .map(String::from) | ||||
|             .collect::<Vec<_>>()); | ||||
|  | ||||
|     match data.retrieve_documents( | ||||
|         path.index_uid.clone(), | ||||
|         params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET), | ||||
|         params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT), | ||||
|         attributes_to_retrieve).await { | ||||
|         Ok(docs) => { | ||||
|             let json = serde_json::to_string(&docs).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Deserialize)] | ||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||
| struct UpdateDocumentsQuery { | ||||
|     primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| /// Route used when the payload type is "application/json" | ||||
| #[post( | ||||
|     "/indexes/{index_uid}/documents", | ||||
|     wrap = "Authentication::Private", | ||||
|     guard = "guard_json" | ||||
| )] | ||||
| async fn add_documents_json( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
|     params: web::Query<UpdateDocumentsQuery>, | ||||
|     body: Payload, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let addition_result = data | ||||
|         .add_documents( | ||||
|             path.into_inner().index_uid, | ||||
|             IndexDocumentsMethod::ReplaceDocuments, | ||||
|             UpdateFormat::Json, | ||||
|             body, | ||||
|             params.primary_key.clone(), | ||||
|         ).await; | ||||
|  | ||||
|     match addition_result { | ||||
|         Ok(update) => { | ||||
|             let value = serde_json::to_string(&update).unwrap(); | ||||
|             let response = HttpResponse::Ok().body(value); | ||||
|             Ok(response) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| /// Default route for adding documents, this should return an error and redirect to the documentation | ||||
| #[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")] | ||||
| async fn add_documents_default( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
|     _params: web::Query<UpdateDocumentsQuery>, | ||||
|     _body: web::Json<Vec<Document>>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     error!("Unknown document type"); | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| /// Default route for adding documents, this should return an error and redirect to the documentation | ||||
| #[put("/indexes/{index_uid}/documents", wrap = "Authentication::Private")] | ||||
| async fn update_documents_default( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
|     _params: web::Query<UpdateDocumentsQuery>, | ||||
|     _body: web::Json<Vec<Document>>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     error!("Unknown document type"); | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[put( | ||||
|     "/indexes/{index_uid}/documents", | ||||
|     wrap = "Authentication::Private", | ||||
|     guard = "guard_json", | ||||
| )] | ||||
| async fn update_documents( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
|     params: web::Query<UpdateDocumentsQuery>, | ||||
|     body: web::Payload, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let addition_result = data | ||||
|         .add_documents( | ||||
|             path.into_inner().index_uid, | ||||
|             IndexDocumentsMethod::UpdateDocuments, | ||||
|             UpdateFormat::Json, | ||||
|             body, | ||||
|             params.primary_key.clone(), | ||||
|         ).await; | ||||
|  | ||||
|     match addition_result { | ||||
|         Ok(update) => { | ||||
|             let value = serde_json::to_string(&update).unwrap(); | ||||
|             let response = HttpResponse::Ok().body(value); | ||||
|             Ok(response) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[post( | ||||
|     "/indexes/{index_uid}/documents/delete-batch", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn delete_documents( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
|     body: web::Json<Vec<Value>>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let ids = body | ||||
|         .iter() | ||||
|         .map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string())) | ||||
|         .collect(); | ||||
|  | ||||
|     match data.delete_documents(path.index_uid.clone(), ids).await { | ||||
|         Ok(result) => { | ||||
|             let json = serde_json::to_string(&result).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[delete("/indexes/{index_uid}/documents", wrap = "Authentication::Private")] | ||||
| async fn clear_all_documents( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.clear_documents(path.index_uid.clone()).await { | ||||
|         Ok(update) => { | ||||
|             let json = serde_json::to_string(&update).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										42
									
								
								meilisearch-http/src/routes/dump.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								meilisearch-http/src/routes/dump.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| use std::fs::File; | ||||
| use std::path::Path; | ||||
|  | ||||
| use actix_web::{get, post}; | ||||
| use actix_web::{HttpResponse, web}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::dump::{DumpInfo, DumpStatus, compressed_dumps_dir, init_dump_process}; | ||||
| use crate::Data; | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(trigger_dump) | ||||
|         .service(get_dump_status); | ||||
| } | ||||
|  | ||||
| #[post("/dumps", wrap = "Authentication::Private")] | ||||
| async fn trigger_dump( | ||||
|     data: web::Data<Data>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| struct DumpStatusResponse { | ||||
|     status: String, | ||||
| } | ||||
|  | ||||
| #[derive(Deserialize)] | ||||
| struct DumpParam { | ||||
|     dump_uid: String, | ||||
| } | ||||
|  | ||||
| #[get("/dumps/{dump_uid}/status", wrap = "Authentication::Private")] | ||||
| async fn get_dump_status( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<DumpParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
							
								
								
									
										13
									
								
								meilisearch-http/src/routes/health.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								meilisearch-http/src/routes/health.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| use actix_web::get; | ||||
| use actix_web::{web, HttpResponse}; | ||||
|  | ||||
| use crate::error::ResponseError; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(get_health); | ||||
| } | ||||
|  | ||||
| #[get("/health")] | ||||
| async fn get_health() -> Result<HttpResponse, ResponseError> { | ||||
|     Ok(HttpResponse::NoContent().finish()) | ||||
| } | ||||
							
								
								
									
										167
									
								
								meilisearch-http/src/routes/index.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										167
									
								
								meilisearch-http/src/routes/index.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,167 @@ | ||||
| use actix_web::{delete, get, post, put}; | ||||
| use actix_web::{web, HttpResponse}; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::Data; | ||||
| use crate::error::ResponseError; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::routes::IndexParam; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(list_indexes) | ||||
|         .service(get_index) | ||||
|         .service(create_index) | ||||
|         .service(update_index) | ||||
|         .service(delete_index) | ||||
|         .service(get_update_status) | ||||
|         .service(get_all_updates_status); | ||||
| } | ||||
|  | ||||
|  | ||||
| #[get("/indexes", wrap = "Authentication::Private")] | ||||
| async fn list_indexes(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.list_indexes() { | ||||
|         Ok(indexes) => { | ||||
|             let json = serde_json::to_string(&indexes).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(&json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[get("/indexes/{index_uid}", wrap = "Authentication::Private")] | ||||
| async fn get_index( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.index(&path.index_uid)? { | ||||
|         Some(meta) => { | ||||
|             let json = serde_json::to_string(&meta).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         None => { | ||||
|             let e = format!("Index {:?} doesn't exist.", path.index_uid); | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Deserialize)] | ||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||
| struct IndexCreateRequest { | ||||
|     uid: String, | ||||
|     primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| #[post("/indexes", wrap = "Authentication::Private")] | ||||
| async fn create_index( | ||||
|     data: web::Data<Data>, | ||||
|     body: web::Json<IndexCreateRequest>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.create_index(&body.uid, body.primary_key.clone()) { | ||||
|         Ok(meta) => { | ||||
|             let json = serde_json::to_string(&meta).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Deserialize)] | ||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||
| struct UpdateIndexRequest { | ||||
|     name: Option<String>, | ||||
|     primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| struct UpdateIndexResponse { | ||||
|     name: String, | ||||
|     uid: String, | ||||
|     created_at: DateTime<Utc>, | ||||
|     updated_at: DateTime<Utc>, | ||||
|     primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| #[put("/indexes/{index_uid}", wrap = "Authentication::Private")] | ||||
| async fn update_index( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
|     body: web::Json<UpdateIndexRequest>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.update_index(&path.index_uid, body.primary_key.as_ref(),  body.name.as_ref()) { | ||||
|         Ok(meta) => { | ||||
|             let json = serde_json::to_string(&meta).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[delete("/indexes/{index_uid}", wrap = "Authentication::Private")] | ||||
| async fn delete_index( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.delete_index(path.index_uid.clone()).await { | ||||
|         Ok(_) => Ok(HttpResponse::Ok().finish()), | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Deserialize)] | ||||
| struct UpdateParam { | ||||
|     index_uid: String, | ||||
|     update_id: u64, | ||||
| } | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/updates/{update_id}", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get_update_status( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<UpdateParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let result = data.get_update_status(&path.index_uid, path.update_id); | ||||
|     match result { | ||||
|         Ok(Some(meta)) => { | ||||
|             let json = serde_json::to_string(&meta).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Ok(None) => { | ||||
|             let e = format!("udpate {} for index {:?} doesn't exists.", path.update_id, path.index_uid); | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[get("/indexes/{index_uid}/updates", wrap = "Authentication::Private")] | ||||
| async fn get_all_updates_status( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let result = data.get_updates_status(&path.index_uid); | ||||
|     match result { | ||||
|         Ok(metas) => { | ||||
|             let json = serde_json::to_string(&metas).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										26
									
								
								meilisearch-http/src/routes/key.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								meilisearch-http/src/routes/key.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| use actix_web::web; | ||||
| use actix_web::HttpResponse; | ||||
| use actix_web::get; | ||||
| use serde::Serialize; | ||||
|  | ||||
| use crate::helpers::Authentication; | ||||
| use crate::Data; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(list); | ||||
| } | ||||
|  | ||||
| #[derive(Serialize)] | ||||
| struct KeysResponse { | ||||
|     private: Option<String>, | ||||
|     public: Option<String>, | ||||
| } | ||||
|  | ||||
| #[get("/keys", wrap = "Authentication::Admin")] | ||||
| async fn list(data: web::Data<Data>) -> HttpResponse { | ||||
|     let api_keys = data.api_keys.clone(); | ||||
|     HttpResponse::Ok().json(KeysResponse { | ||||
|         private: api_keys.private, | ||||
|         public: api_keys.public, | ||||
|     }) | ||||
| } | ||||
							
								
								
									
										44
									
								
								meilisearch-http/src/routes/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								meilisearch-http/src/routes/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| use actix_web::{get, HttpResponse}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| pub mod document; | ||||
| pub mod health; | ||||
| pub mod index; | ||||
| pub mod key; | ||||
| pub mod search; | ||||
| pub mod settings; | ||||
| pub mod stats; | ||||
| pub mod stop_words; | ||||
| pub mod synonym; | ||||
| //pub mod dump; | ||||
|  | ||||
| #[derive(Deserialize)] | ||||
| pub struct IndexParam { | ||||
|     index_uid: String, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexUpdateResponse { | ||||
|     pub update_id: u64, | ||||
| } | ||||
|  | ||||
| impl IndexUpdateResponse { | ||||
|     pub fn with_id(update_id: u64) -> Self { | ||||
|         Self { update_id } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[get("/")] | ||||
| pub async fn load_html() -> HttpResponse { | ||||
|     HttpResponse::Ok() | ||||
|         .content_type("text/html; charset=utf-8") | ||||
|         .body(include_str!("../../public/interface.html").to_string()) | ||||
| } | ||||
|  | ||||
| #[get("/bulma.min.css")] | ||||
| pub async fn load_css() -> HttpResponse { | ||||
|     HttpResponse::Ok() | ||||
|         .content_type("text/css; charset=utf-8") | ||||
|         .body(include_str!("../../public/bulma.min.css").to_string()) | ||||
| } | ||||
							
								
								
									
										114
									
								
								meilisearch-http/src/routes/search.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								meilisearch-http/src/routes/search.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::convert::{TryFrom, TryInto}; | ||||
|  | ||||
| use actix_web::{get, post, web, HttpResponse}; | ||||
| use serde::Deserialize; | ||||
|  | ||||
| use crate::data::{SearchQuery, DEFAULT_SEARCH_LIMIT}; | ||||
| use crate::error::ResponseError; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::routes::IndexParam; | ||||
| use crate::Data; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(search_with_post).service(search_with_url_query); | ||||
| } | ||||
|  | ||||
| #[derive(Deserialize, Debug)] | ||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||
| pub struct SearchQueryGet { | ||||
|     q: Option<String>, | ||||
|     offset: Option<usize>, | ||||
|     limit: Option<usize>, | ||||
|     attributes_to_retrieve: Option<String>, | ||||
|     attributes_to_crop: Option<String>, | ||||
|     crop_length: Option<usize>, | ||||
|     attributes_to_highlight: Option<String>, | ||||
|     filters: Option<String>, | ||||
|     matches: Option<bool>, | ||||
|     facet_filters: Option<String>, | ||||
|     facet_distributions: Option<String>, | ||||
| } | ||||
|  | ||||
| impl TryFrom<SearchQueryGet> for SearchQuery { | ||||
|     type Error = anyhow::Error; | ||||
|  | ||||
|     fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> { | ||||
|         let attributes_to_retrieve = other | ||||
|             .attributes_to_retrieve | ||||
|             .map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>()); | ||||
|  | ||||
|         let attributes_to_crop = other | ||||
|             .attributes_to_crop | ||||
|             .map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>()); | ||||
|  | ||||
|         let attributes_to_highlight = other | ||||
|             .attributes_to_highlight | ||||
|             .map(|attrs| attrs.split(",").map(String::from).collect::<HashSet<_>>()); | ||||
|  | ||||
|         let facet_distributions = other | ||||
|             .facet_distributions | ||||
|             .map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>()); | ||||
|  | ||||
|         let facet_filters = match other.facet_filters { | ||||
|             Some(ref f) => Some(serde_json::from_str(f)?), | ||||
|             None => None, | ||||
|         }; | ||||
|  | ||||
|         Ok(Self { | ||||
|             q: other.q, | ||||
|             offset: other.offset, | ||||
|             limit: other.limit.unwrap_or(DEFAULT_SEARCH_LIMIT), | ||||
|             attributes_to_retrieve, | ||||
|             attributes_to_crop, | ||||
|             crop_length: other.crop_length, | ||||
|             attributes_to_highlight, | ||||
|             filters: other.filters, | ||||
|             matches: other.matches, | ||||
|             facet_filters, | ||||
|             facet_distributions, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")] | ||||
| async fn search_with_url_query( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
|     params: web::Query<SearchQueryGet>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let query: SearchQuery = match params.into_inner().try_into() { | ||||
|         Ok(q) => q, | ||||
|         Err(e) => { | ||||
|             return Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     }; | ||||
|     let search_result = data.search(&path.index_uid, query); | ||||
|     match search_result { | ||||
|         Ok(docs) => { | ||||
|             let docs = serde_json::to_string(&docs).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(docs)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[post("/indexes/{index_uid}/search", wrap = "Authentication::Public")] | ||||
| async fn search_with_post( | ||||
|     data: web::Data<Data>, | ||||
|     path: web::Path<IndexParam>, | ||||
|     params: web::Json<SearchQuery>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let search_result = data.search(&path.index_uid, params.into_inner()); | ||||
|     match search_result { | ||||
|         Ok(docs) => { | ||||
|             let docs = serde_json::to_string(&docs).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(docs)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,43 @@ | ||||
| use actix_web::{web, HttpResponse, get}; | ||||
|  | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::make_update_delete_routes; | ||||
| use crate::Data; | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/attributes-for-faceting", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let index = data | ||||
|         .db | ||||
|         .load() | ||||
|         .open_index(&index_uid.as_ref()) | ||||
|         .ok_or(Error::index_not_found(&index_uid.as_ref()))?; | ||||
|  | ||||
|     let attributes_for_faceting = data.db.load().main_read::<_, _, ResponseError>(|reader| { | ||||
|         let schema = index.main.schema(reader)?; | ||||
|         let attrs = index.main.attributes_for_faceting(reader)?; | ||||
|         let attr_names = match (&schema, &attrs) { | ||||
|             (Some(schema), Some(attrs)) => attrs | ||||
|                 .iter() | ||||
|                 .filter_map(|&id| schema.name(id)) | ||||
|                 .map(str::to_string) | ||||
|                 .collect(), | ||||
|             _ => vec![], | ||||
|         }; | ||||
|         Ok(attr_names) | ||||
|     })?; | ||||
|  | ||||
|     Ok(HttpResponse::Ok().json(attributes_for_faceting)) | ||||
| } | ||||
|  | ||||
| make_update_delete_routes!( | ||||
|     "/indexes/{index_uid}/settings/attributes-for-faceting", | ||||
|     Vec<String>, | ||||
|     attributes_for_faceting | ||||
| ); | ||||
							
								
								
									
										25
									
								
								meilisearch-http/src/routes/settings/displayed_attributes.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								meilisearch-http/src/routes/settings/displayed_attributes.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| use std::collections::HashSet; | ||||
|  | ||||
| use actix_web::{web, HttpResponse, get}; | ||||
|  | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::make_update_delete_routes; | ||||
| use crate::Data; | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/displayed-attributes", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| make_update_delete_routes!( | ||||
|     "/indexes/{index_uid}/settings/displayed-attributes", | ||||
|     HashSet<String>, | ||||
|     displayed_attributes | ||||
| ); | ||||
							
								
								
									
										36
									
								
								meilisearch-http/src/routes/settings/distinct_attributes.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								meilisearch-http/src/routes/settings/distinct_attributes.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| use crate::make_update_delete_routes; | ||||
| use actix_web::{web, HttpResponse, get}; | ||||
|  | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::Data; | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/distinct-attribute", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let index = data | ||||
|         .db | ||||
|         .load() | ||||
|         .open_index(&index_uid.as_ref()) | ||||
|         .ok_or(Error::index_not_found(&index_uid.as_ref()))?; | ||||
|     let reader = data.db.load().main_read_txn()?; | ||||
|     let distinct_attribute_id = index.main.distinct_attribute(&reader)?; | ||||
|     let schema = index.main.schema(&reader)?; | ||||
|     let distinct_attribute = match (schema, distinct_attribute_id) { | ||||
|         (Some(schema), Some(id)) => schema.name(id).map(str::to_string), | ||||
|         _ => None, | ||||
|     }; | ||||
|  | ||||
|     Ok(HttpResponse::Ok().json(distinct_attribute)) | ||||
| } | ||||
|  | ||||
| make_update_delete_routes!( | ||||
|     "/indexes/{index_uid}/settings/distinct-attribute", | ||||
|     String, | ||||
|     distinct_attribute | ||||
| ); | ||||
							
								
								
									
										183
									
								
								meilisearch-http/src/routes/settings/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								meilisearch-http/src/routes/settings/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,183 @@ | ||||
| use actix_web::{web, HttpResponse, delete, get, post}; | ||||
|  | ||||
| use crate::Data; | ||||
| use crate::error::ResponseError; | ||||
| use crate::index_controller::Settings; | ||||
| use crate::helpers::Authentication; | ||||
|  | ||||
| #[macro_export] | ||||
| macro_rules! make_setting_route { | ||||
|     ($route:literal, $type:ty, $attr:ident) => { | ||||
|         mod $attr { | ||||
|             use actix_web::{web, HttpResponse}; | ||||
|  | ||||
|             use crate::data; | ||||
|             use crate::error::ResponseError; | ||||
|             use crate::helpers::Authentication; | ||||
|             use crate::index_controller::Settings; | ||||
|  | ||||
|             #[actix_web::delete($route, wrap = "Authentication::Private")] | ||||
|             pub async fn delete( | ||||
|                 data: web::Data<data::Data>, | ||||
|                 index_uid: web::Path<String>, | ||||
|             ) -> Result<HttpResponse, ResponseError> { | ||||
|                 use crate::index_controller::Settings; | ||||
|                 let settings = Settings { | ||||
|                     $attr: Some(None), | ||||
|                     ..Default::default() | ||||
|                 }; | ||||
|                 match data.update_settings(index_uid.into_inner(), settings).await { | ||||
|                     Ok(update_status) => { | ||||
|                         let json = serde_json::to_string(&update_status).unwrap(); | ||||
|                         Ok(HttpResponse::Ok().body(json)) | ||||
|                     } | ||||
|                     Err(e) => { | ||||
|                         Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             #[actix_web::post($route, wrap = "Authentication::Private")] | ||||
|             pub async fn update( | ||||
|                 data: actix_web::web::Data<data::Data>, | ||||
|                 index_uid: actix_web::web::Path<String>, | ||||
|                 body: actix_web::web::Json<Option<$type>>, | ||||
|             ) -> std::result::Result<HttpResponse, ResponseError> { | ||||
|                 let settings = Settings { | ||||
|                     $attr: Some(body.into_inner()), | ||||
|                     ..Default::default() | ||||
|                 }; | ||||
|  | ||||
|                 match data.update_settings(index_uid.into_inner(), settings).await { | ||||
|                     Ok(update_status) => { | ||||
|                         let json = serde_json::to_string(&update_status).unwrap(); | ||||
|                         Ok(HttpResponse::Ok().body(json)) | ||||
|                     } | ||||
|                     Err(e) => { | ||||
|                         Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             #[actix_web::get($route, wrap = "Authentication::Private")] | ||||
|             pub async fn get( | ||||
|                 data: actix_web::web::Data<data::Data>, | ||||
|                 index_uid: actix_web::web::Path<String>, | ||||
|             ) -> std::result::Result<HttpResponse, ResponseError> { | ||||
|                 match data.settings(index_uid.as_ref()) { | ||||
|                     Ok(settings) => { | ||||
|                         let setting = settings.$attr; | ||||
|                         let json = serde_json::to_string(&setting).unwrap(); | ||||
|                         Ok(HttpResponse::Ok().body(json)) | ||||
|                     } | ||||
|                     Err(e) => { | ||||
|                         Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| } | ||||
|  | ||||
| make_setting_route!( | ||||
|     "/indexes/{index_uid}/settings/attributes-for-faceting", | ||||
|     std::collections::HashMap<String, String>, | ||||
|     faceted_attributes | ||||
| ); | ||||
|  | ||||
| make_setting_route!( | ||||
|     "/indexes/{index_uid}/settings/displayed-attributes", | ||||
|     Vec<String>, | ||||
|     displayed_attributes | ||||
| ); | ||||
|  | ||||
| make_setting_route!( | ||||
|     "/indexes/{index_uid}/settings/searchable-attributes", | ||||
|     Vec<String>, | ||||
|     searchable_attributes | ||||
| ); | ||||
|  | ||||
| //make_setting_route!( | ||||
|     //"/indexes/{index_uid}/settings/distinct-attribute", | ||||
|     //String, | ||||
|     //distinct_attribute | ||||
| //); | ||||
|  | ||||
| //make_setting_route!( | ||||
|     //"/indexes/{index_uid}/settings/ranking-rules", | ||||
|     //Vec<String>, | ||||
|     //ranking_rules | ||||
| //); | ||||
|  | ||||
| macro_rules! create_services { | ||||
|     ($($mod:ident),*) => { | ||||
|         pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|             cfg | ||||
|                 .service(update_all) | ||||
|                 .service(get_all) | ||||
|                 .service(delete_all) | ||||
|                 $( | ||||
|                     .service($mod::get) | ||||
|                     .service($mod::update) | ||||
|                     .service($mod::delete) | ||||
|                 )*; | ||||
|         } | ||||
|     }; | ||||
| } | ||||
|  | ||||
| create_services!( | ||||
|     faceted_attributes, | ||||
|     displayed_attributes, | ||||
|     searchable_attributes | ||||
| ); | ||||
|  | ||||
| #[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")] | ||||
| async fn update_all( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
|     body: web::Json<Settings>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.update_settings(index_uid.into_inner(), body.into_inner()).await { | ||||
|         Ok(update_result) => { | ||||
|             let json = serde_json::to_string(&update_result).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[get("/indexes/{index_uid}/settings", wrap = "Authentication::Private")] | ||||
| async fn get_all( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     match data.settings(index_uid.as_ref()) { | ||||
|         Ok(settings) => { | ||||
|             let json = serde_json::to_string(&settings).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[delete("/indexes/{index_uid}/settings", wrap = "Authentication::Private")] | ||||
| async fn delete_all( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let settings = Settings::cleared(); | ||||
|     match data.update_settings(index_uid.into_inner(), settings).await { | ||||
|         Ok(update_result) => { | ||||
|             let json = serde_json::to_string(&update_result).unwrap(); | ||||
|             Ok(HttpResponse::Ok().body(json)) | ||||
|         } | ||||
|         Err(e) => { | ||||
|             Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
							
								
								
									
										23
									
								
								meilisearch-http/src/routes/settings/ranking_rules.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								meilisearch-http/src/routes/settings/ranking_rules.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| use crate::make_update_delete_routes; | ||||
| use actix_web::{web, HttpResponse, get}; | ||||
|  | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::Data; | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/ranking-rules", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| make_update_delete_routes!( | ||||
|     "/indexes/{index_uid}/settings/ranking-rules", | ||||
|     Vec<String>, | ||||
|     ranking_rules | ||||
| ); | ||||
| @@ -0,0 +1,34 @@ | ||||
| use actix_web::{web, HttpResponse, get}; | ||||
|  | ||||
| use crate::data::get_indexed_attributes; | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::make_update_delete_routes; | ||||
| use crate::Data; | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/searchable-attributes", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let index = data | ||||
|         .db | ||||
|         .load() | ||||
|         .open_index(&index_uid.as_ref()) | ||||
|  | ||||
|         .ok_or(Error::index_not_found(&index_uid.as_ref()))?; | ||||
|     let reader = data.db.load().main_read_txn()?; | ||||
|     let schema = index.main.schema(&reader)?; | ||||
|     let searchable_attributes: Option<Vec<String>> = schema.as_ref().map(get_indexed_attributes); | ||||
|  | ||||
|     Ok(HttpResponse::Ok().json(searchable_attributes)) | ||||
| } | ||||
|  | ||||
| make_update_delete_routes!( | ||||
|     "/indexes/{index_uid}/settings/searchable-attributes", | ||||
|     Vec<String>, | ||||
|     searchable_attributes | ||||
| ); | ||||
							
								
								
									
										33
									
								
								meilisearch-http/src/routes/settings/stop_words.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								meilisearch-http/src/routes/settings/stop_words.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| use std::collections::BTreeSet; | ||||
|  | ||||
| use crate::make_update_delete_routes; | ||||
| use actix_web::{web, HttpResponse, get}; | ||||
|  | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::Data; | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/stop-words", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let index = data | ||||
|         .db | ||||
|         .load() | ||||
|         .open_index(&index_uid.as_ref()) | ||||
|         .ok_or(Error::index_not_found(&index_uid.as_ref()))?; | ||||
|     let reader = data.db.load().main_read_txn()?; | ||||
|     let stop_words = index.main.stop_words(&reader)?; | ||||
|  | ||||
|     Ok(HttpResponse::Ok().json(stop_words)) | ||||
| } | ||||
|  | ||||
| make_update_delete_routes!( | ||||
|     "/indexes/{index_uid}/settings/stop-words", | ||||
|     BTreeSet<String>, | ||||
|     stop_words | ||||
| ); | ||||
							
								
								
									
										43
									
								
								meilisearch-http/src/routes/settings/synonyms.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								meilisearch-http/src/routes/settings/synonyms.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,43 @@ | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use actix_web::{web, HttpResponse, get}; | ||||
| use indexmap::IndexMap; | ||||
|  | ||||
| use crate::error::{Error, ResponseError}; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::make_update_delete_routes; | ||||
| use crate::Data; | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/synonyms", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     data: web::Data<Data>, | ||||
|     index_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let index = data | ||||
|         .db | ||||
|         .load() | ||||
|         .open_index(&index_uid.as_ref()) | ||||
|         .ok_or(Error::index_not_found(&index_uid.as_ref()))?; | ||||
|  | ||||
|     let reader = data.db.load().main_read_txn()?; | ||||
|  | ||||
|     let synonyms_list = index.main.synonyms(&reader)?; | ||||
|  | ||||
|     let mut synonyms = IndexMap::new(); | ||||
|     let index_synonyms = &index.synonyms; | ||||
|     for synonym in synonyms_list { | ||||
|         let list = index_synonyms.synonyms(&reader, synonym.as_bytes())?; | ||||
|         synonyms.insert(synonym, list); | ||||
|     } | ||||
|  | ||||
|     Ok(HttpResponse::Ok().json(synonyms)) | ||||
| } | ||||
|  | ||||
| make_update_delete_routes!( | ||||
|     "/indexes/{index_uid}/settings/synonyms", | ||||
|     BTreeMap<String, Vec<String>>, | ||||
|     synonyms | ||||
| ); | ||||
							
								
								
									
										60
									
								
								meilisearch-http/src/routes/stats.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								meilisearch-http/src/routes/stats.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| use std::collections::{HashMap, BTreeMap}; | ||||
|  | ||||
| use actix_web::web; | ||||
| use actix_web::HttpResponse; | ||||
| use actix_web::get; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use serde::Serialize; | ||||
|  | ||||
| use crate::error::ResponseError; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::routes::IndexParam; | ||||
| use crate::Data; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(index_stats) | ||||
|         .service(get_stats) | ||||
|         .service(get_version); | ||||
| } | ||||
|  | ||||
| #[derive(Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| struct IndexStatsResponse { | ||||
|     number_of_documents: u64, | ||||
|     is_indexing: bool, | ||||
|     fields_distribution: BTreeMap<String, usize>, | ||||
| } | ||||
|  | ||||
| #[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")] | ||||
| async fn index_stats( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[derive(Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| struct StatsResult { | ||||
|     database_size: u64, | ||||
|     last_update: Option<DateTime<Utc>>, | ||||
|     indexes: HashMap<String, IndexStatsResponse>, | ||||
| } | ||||
|  | ||||
| #[get("/stats", wrap = "Authentication::Private")] | ||||
| async fn get_stats(_data: web::Data<Data>) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[derive(Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| struct VersionResponse { | ||||
|     commit_sha: String, | ||||
|     build_date: String, | ||||
|     pkg_version: String, | ||||
| } | ||||
|  | ||||
| #[get("/version", wrap = "Authentication::Private")] | ||||
| async fn get_version() -> HttpResponse { | ||||
|     todo!() | ||||
| } | ||||
							
								
								
									
										46
									
								
								meilisearch-http/src/routes/stop_words.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								meilisearch-http/src/routes/stop_words.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| use actix_web::{web, HttpResponse}; | ||||
| use actix_web::{delete, get, post}; | ||||
| use std::collections::BTreeSet; | ||||
|  | ||||
| use crate::error::ResponseError; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::routes::IndexParam; | ||||
| use crate::Data; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(get).service(update).service(delete); | ||||
| } | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/stop-words", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[post( | ||||
|     "/indexes/{index_uid}/settings/stop-words", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn update( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
|     _body: web::Json<BTreeSet<String>>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[delete( | ||||
|     "/indexes/{index_uid}/settings/stop-words", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn delete( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
							
								
								
									
										47
									
								
								meilisearch-http/src/routes/synonym.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								meilisearch-http/src/routes/synonym.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use actix_web::{web, HttpResponse}; | ||||
| use actix_web::{delete, get, post}; | ||||
|  | ||||
| use crate::error::ResponseError; | ||||
| use crate::helpers::Authentication; | ||||
| use crate::routes::IndexParam; | ||||
| use crate::Data; | ||||
|  | ||||
| pub fn services(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(get).service(update).service(delete); | ||||
| } | ||||
|  | ||||
| #[get( | ||||
|     "/indexes/{index_uid}/settings/synonyms", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn get( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[post( | ||||
|     "/indexes/{index_uid}/settings/synonyms", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn update( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
|     _body: web::Json<BTreeMap<String, Vec<String>>>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
|  | ||||
| #[delete( | ||||
|     "/indexes/{index_uid}/settings/synonyms", | ||||
|     wrap = "Authentication::Private" | ||||
| )] | ||||
| async fn delete( | ||||
|     _data: web::Data<Data>, | ||||
|     _path: web::Path<IndexParam>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     todo!() | ||||
| } | ||||
							
								
								
									
										96
									
								
								meilisearch-http/src/snapshot.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								meilisearch-http/src/snapshot.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,96 @@ | ||||
| use crate::Data; | ||||
| use crate::error::Error; | ||||
| use crate::helpers::compression; | ||||
|  | ||||
| use log::error; | ||||
| use std::fs::create_dir_all; | ||||
| use std::path::Path; | ||||
| use std::thread; | ||||
| use std::time::{Duration}; | ||||
| use tempfile::TempDir; | ||||
|  | ||||
| pub fn load_snapshot( | ||||
|     db_path: &str, | ||||
|     snapshot_path: &Path, | ||||
|     ignore_snapshot_if_db_exists: bool, | ||||
|     ignore_missing_snapshot: bool | ||||
| ) -> Result<(), Error> { | ||||
|     let db_path = Path::new(db_path); | ||||
|  | ||||
|     if !db_path.exists() && snapshot_path.exists() { | ||||
|         compression::from_tar_gz(snapshot_path, db_path) | ||||
|     } else if db_path.exists() && !ignore_snapshot_if_db_exists { | ||||
|         Err(Error::Internal(format!("database already exists at {:?}, try to delete it or rename it", db_path.canonicalize().unwrap_or(db_path.into())))) | ||||
|     } else if !snapshot_path.exists() && !ignore_missing_snapshot { | ||||
|         Err(Error::Internal(format!("snapshot doesn't exist at {:?}", snapshot_path.canonicalize().unwrap_or(snapshot_path.into())))) | ||||
|     } else { | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn create_snapshot(data: &Data, snapshot_path: &Path) -> Result<(), Error> { | ||||
|     let tmp_dir = TempDir::new()?; | ||||
|  | ||||
|     data.db.copy_and_compact_to_path(tmp_dir.path())?; | ||||
|  | ||||
|     compression::to_tar_gz(tmp_dir.path(), snapshot_path).map_err(|e| Error::Internal(format!("something went wrong during snapshot compression: {}", e))) | ||||
| } | ||||
|  | ||||
| pub fn schedule_snapshot(data: Data, snapshot_dir: &Path, time_gap_s: u64) -> Result<(), Error> { | ||||
|     if snapshot_dir.file_name().is_none() {  | ||||
|         return Err(Error::Internal("invalid snapshot file path".to_string())); | ||||
|     } | ||||
|     let db_name = Path::new(&data.db_path).file_name().ok_or_else(|| Error::Internal("invalid database name".to_string()))?; | ||||
|     create_dir_all(snapshot_dir)?; | ||||
|     let snapshot_path = snapshot_dir.join(format!("{}.snapshot", db_name.to_str().unwrap_or("data.ms"))); | ||||
|      | ||||
|     thread::spawn(move || loop {  | ||||
|         if let Err(e) = create_snapshot(&data, &snapshot_path) { | ||||
|             error!("Unsuccessful snapshot creation: {}", e); | ||||
|         } | ||||
|         thread::sleep(Duration::from_secs(time_gap_s)); | ||||
|     }); | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|     use std::io::prelude::*; | ||||
|     use std::fs; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_pack_unpack() { | ||||
|         let tempdir = TempDir::new().unwrap(); | ||||
|  | ||||
|         let test_dir = tempdir.path(); | ||||
|         let src_dir = test_dir.join("src"); | ||||
|         let dest_dir = test_dir.join("complex/destination/path/"); | ||||
|         let archive_path = test_dir.join("archive.snapshot"); | ||||
|  | ||||
|         let file_1_relative = Path::new("file1.txt"); | ||||
|         let subdir_relative = Path::new("subdir/"); | ||||
|         let file_2_relative = Path::new("subdir/file2.txt"); | ||||
|          | ||||
|         create_dir_all(src_dir.join(subdir_relative)).unwrap(); | ||||
|         fs::File::create(src_dir.join(file_1_relative)).unwrap().write_all(b"Hello_file_1").unwrap(); | ||||
|         fs::File::create(src_dir.join(file_2_relative)).unwrap().write_all(b"Hello_file_2").unwrap(); | ||||
|  | ||||
|          | ||||
|         assert!(compression::to_tar_gz(&src_dir, &archive_path).is_ok()); | ||||
|         assert!(archive_path.exists()); | ||||
|         assert!(load_snapshot(&dest_dir.to_str().unwrap(), &archive_path, false, false).is_ok()); | ||||
|  | ||||
|         assert!(dest_dir.exists()); | ||||
|         assert!(dest_dir.join(file_1_relative).exists()); | ||||
|         assert!(dest_dir.join(subdir_relative).exists()); | ||||
|         assert!(dest_dir.join(file_2_relative).exists()); | ||||
|  | ||||
|         let contents = fs::read_to_string(dest_dir.join(file_1_relative)).unwrap(); | ||||
|         assert_eq!(contents, "Hello_file_1"); | ||||
|      | ||||
|         let contents = fs::read_to_string(dest_dir.join(file_2_relative)).unwrap(); | ||||
|         assert_eq!(contents, "Hello_file_2"); | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user