mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	introduce the index metadata
This commit is contained in:
		| @@ -21,6 +21,17 @@ struct Metadata { | ||||
|     pub dump_date: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexMetadata { | ||||
|     pub uid: String, | ||||
|     pub primary_key: Option<String>, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub created_at: OffsetDateTime, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub updated_at: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] | ||||
| pub enum Version { | ||||
|     V1, | ||||
| @@ -49,12 +60,21 @@ pub(crate) mod test { | ||||
|     use time::{macros::datetime, Duration}; | ||||
|     use uuid::Uuid; | ||||
|  | ||||
|     use crate::{reader, DumpWriter, Version}; | ||||
|     use crate::{reader, DumpWriter, IndexMetadata, Version}; | ||||
|  | ||||
|     pub fn create_test_instance_uid() -> Uuid { | ||||
|         Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap() | ||||
|     } | ||||
|  | ||||
|     pub fn create_test_index_metadata() -> IndexMetadata { | ||||
|         IndexMetadata { | ||||
|             uid: S("doggo"), | ||||
|             primary_key: None, | ||||
|             created_at: datetime!(2022-11-20 12:00 UTC), | ||||
|             updated_at: datetime!(2022-11-21 00:00 UTC), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn create_test_documents() -> Vec<Map<String, Value>> { | ||||
|         vec![ | ||||
|             json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 }) | ||||
| @@ -186,7 +206,9 @@ pub(crate) mod test { | ||||
|         let documents = create_test_documents(); | ||||
|         let settings = create_test_settings(); | ||||
|  | ||||
|         let mut index = dump.create_index("doggos").unwrap(); | ||||
|         let mut index = dump | ||||
|             .create_index("doggos", &create_test_index_metadata()) | ||||
|             .unwrap(); | ||||
|         for document in &documents { | ||||
|             index.push_document(document).unwrap(); | ||||
|         } | ||||
| @@ -217,7 +239,7 @@ pub(crate) mod test { | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_creating_dump() { | ||||
|     fn test_creating_and_read_dump() { | ||||
|         let mut file = create_test_dump(); | ||||
|         let mut dump = reader::open(&mut file).unwrap(); | ||||
|  | ||||
| @@ -234,12 +256,14 @@ pub(crate) mod test { | ||||
|         let mut index = indexes.next().unwrap().unwrap(); | ||||
|         assert!(indexes.next().is_none()); // there was only one index in the dump | ||||
|  | ||||
|         assert_eq!(index.name(), "doggos"); | ||||
|  | ||||
|         for (document, expected) in index.documents().unwrap().zip(create_test_documents()) { | ||||
|             assert_eq!(document.unwrap(), expected); | ||||
|         } | ||||
|         assert_eq!(index.settings().unwrap(), create_test_settings()); | ||||
|         assert_eq!(index.metadata(), &create_test_index_metadata()); | ||||
|  | ||||
|         drop(index); | ||||
|         drop(indexes); | ||||
|  | ||||
|         // ==== checking the task queue | ||||
|         for (task, expected) in dump.tasks().zip(create_test_tasks()) { | ||||
|   | ||||
| @@ -12,7 +12,7 @@ use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::{Result, Version}; | ||||
| use crate::{IndexMetadata, Result, Version}; | ||||
|  | ||||
| // use self::loaders::{v2, v3, v4, v5}; | ||||
|  | ||||
| @@ -20,6 +20,7 @@ use crate::{Result, Version}; | ||||
| // mod compat; | ||||
| // mod loaders; | ||||
| // mod v1; | ||||
| mod v5; | ||||
| mod v6; | ||||
|  | ||||
| pub fn open( | ||||
| @@ -98,9 +99,12 @@ pub trait DumpReader { | ||||
|         Box< | ||||
|             dyn Iterator< | ||||
|                     Item = Result< | ||||
|                     Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>, | ||||
|                         Box< | ||||
|                             dyn IndexReader<Document = Self::Document, Settings = Self::Settings> | ||||
|                                 + '_, | ||||
|                         >, | ||||
|                     >, | ||||
|                 > + '_, | ||||
|         >, | ||||
|     >; | ||||
|  | ||||
| @@ -117,7 +121,7 @@ pub trait IndexReader { | ||||
|     type Document; | ||||
|     type Settings; | ||||
|  | ||||
|     fn name(&self) -> &str; | ||||
|     fn metadata(&self) -> &IndexMetadata; | ||||
|     fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>>; | ||||
|     fn settings(&mut self) -> Result<Self::Settings>; | ||||
| } | ||||
|   | ||||
							
								
								
									
										14
									
								
								dump/src/reader/v5/meta.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								dump/src/reader/v5/meta.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug)] | ||||
| pub struct IndexUuid { | ||||
|     pub uid: String, | ||||
|     pub index_meta: IndexMeta, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug)] | ||||
| pub struct IndexMeta { | ||||
|     pub uuid: Uuid, | ||||
|     pub creation_task_id: usize, | ||||
| } | ||||
							
								
								
									
										221
									
								
								dump/src/reader/v5/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								dump/src/reader/v5/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,221 @@ | ||||
| //! Here is what a dump v5 look like. | ||||
| //! | ||||
| //! ```text | ||||
| //! . | ||||
| //! ├── indexes | ||||
| //! │   ├── 22c269d8-fbbd-4416-bd46-7c7c02849325 | ||||
| //! │   │   ├── documents.jsonl | ||||
| //! │   │   └── meta.json | ||||
| //! │   ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8 | ||||
| //! │   │   ├── documents.jsonl | ||||
| //! │   │   └── meta.json | ||||
| //! │   └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa | ||||
| //! │       ├── documents.jsonl | ||||
| //! │       └── meta.json | ||||
| //! ├── index_uuids | ||||
| //! │   └── data.jsonl | ||||
| //! ├── instance-uid | ||||
| //! ├── keys | ||||
| //! ├── metadata.json | ||||
| //! └── updates | ||||
| //!     ├── data.jsonl | ||||
| //!     └── updates_files | ||||
| //!         └── c83a004a-da98-4b94-b245-3256266c7281 | ||||
| //! ``` | ||||
| //! | ||||
| //! Here is what `index_uuids/data.jsonl` looks like; | ||||
| //! | ||||
| //! ```json | ||||
| //! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}} | ||||
| //! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}} | ||||
| //! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}} | ||||
| //! ``` | ||||
| //! | ||||
|  | ||||
| use std::{ | ||||
|     fs::{self, File}, | ||||
|     io::{BufRead, BufReader}, | ||||
|     path::Path, | ||||
| }; | ||||
|  | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::{IndexMetadata, Result, Version}; | ||||
|  | ||||
| use self::{ | ||||
|     meta::IndexUuid, | ||||
|     settings::{Checked, Settings, Unchecked}, | ||||
|     tasks::Task, | ||||
| }; | ||||
|  | ||||
| use super::{DumpReader, IndexReader}; | ||||
|  | ||||
| mod meta; | ||||
| mod settings; | ||||
| mod tasks; | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Metadata { | ||||
|     db_version: String, | ||||
|     index_db_size: usize, | ||||
|     update_db_size: usize, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     dump_date: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| pub struct V5Reader { | ||||
|     dump: TempDir, | ||||
|     metadata: Metadata, | ||||
|     tasks: BufReader<File>, | ||||
|     keys: BufReader<File>, | ||||
|     index_uuid: Vec<IndexUuid>, | ||||
| } | ||||
|  | ||||
| struct V5IndexReader { | ||||
|     metadata: IndexMetadata, | ||||
|  | ||||
|     documents: BufReader<File>, | ||||
|     settings: BufReader<File>, | ||||
| } | ||||
|  | ||||
| impl V5IndexReader { | ||||
|     pub fn new(name: String, path: &Path) -> Result<Self> { | ||||
|         let metadata = File::open(path.join("metadata.json"))?; | ||||
|  | ||||
|         let ret = V5IndexReader { | ||||
|             metadata: serde_json::from_reader(metadata)?, | ||||
|             documents: BufReader::new(File::open(path.join("documents.jsonl"))?), | ||||
|             settings: BufReader::new(File::open(path.join("settings.json"))?), | ||||
|         }; | ||||
|  | ||||
|         Ok(ret) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl V5Reader { | ||||
|     pub fn open(dump: TempDir) -> Result<Self> { | ||||
|         let meta_file = fs::read(dump.path().join("metadata.json"))?; | ||||
|         let metadata = serde_json::from_reader(&*meta_file)?; | ||||
|         let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; | ||||
|         let index_uuid = BufReader::new(index_uuid); | ||||
|         let index_uuid = index_uuid | ||||
|             .lines() | ||||
|             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) | ||||
|             .collect::<Result<Vec<_>>>()?; | ||||
|  | ||||
|         Ok(V5Reader { | ||||
|             metadata, | ||||
|             tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), | ||||
|             keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), | ||||
|             index_uuid, | ||||
|             dump, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DumpReader for V5Reader { | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = Settings<Checked>; | ||||
|  | ||||
|     type Task = Task; | ||||
|     type UpdateFile = File; | ||||
|  | ||||
|     // TODO: remove this | ||||
|     type Key = meilisearch_auth::Key; | ||||
|  | ||||
|     fn version(&self) -> Version { | ||||
|         Version::V5 | ||||
|     } | ||||
|  | ||||
|     fn date(&self) -> Option<OffsetDateTime> { | ||||
|         Some(self.metadata.dump_date) | ||||
|     } | ||||
|  | ||||
|     fn instance_uid(&self) -> Result<Option<Uuid>> { | ||||
|         let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?; | ||||
|         Ok(Some(Uuid::parse_str(&uuid)?)) | ||||
|     } | ||||
|  | ||||
|     fn indexes( | ||||
|         &self, | ||||
|     ) -> Result< | ||||
|         Box< | ||||
|             dyn Iterator< | ||||
|                     Item = Result< | ||||
|                         Box< | ||||
|                             dyn super::IndexReader< | ||||
|                                     Document = Self::Document, | ||||
|                                     Settings = Self::Settings, | ||||
|                                 > + '_, | ||||
|                         >, | ||||
|                     >, | ||||
|                 > + '_, | ||||
|         >, | ||||
|     > { | ||||
|         Ok(Box::new(self.index_uuid.iter().map(|index| -> Result<_> { | ||||
|             Ok(Box::new(V5IndexReader::new( | ||||
|                 index.uid.clone(), | ||||
|                 &self | ||||
|                     .dump | ||||
|                     .path() | ||||
|                     .join("indexes") | ||||
|                     .join(index.index_meta.uuid.to_string()), | ||||
|             )?) | ||||
|                 as Box< | ||||
|                     dyn IndexReader<Document = Self::Document, Settings = Self::Settings>, | ||||
|                 >) | ||||
|         }))) | ||||
|     } | ||||
|  | ||||
|     fn tasks( | ||||
|         &mut self, | ||||
|     ) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>> + '_> { | ||||
|         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { | ||||
|             let task: Self::Task = serde_json::from_str(&line?)?; | ||||
|             if let Some(uuid) = task.get_content_uuid() { | ||||
|                 let update_file_path = self | ||||
|                     .dump | ||||
|                     .path() | ||||
|                     .join("updates") | ||||
|                     .join("update_files") | ||||
|                     .join(uuid.to_string()); | ||||
|                 Ok((task, Some(File::open(update_file_path)?))) | ||||
|             } else { | ||||
|                 Ok((task, None)) | ||||
|             } | ||||
|         })) | ||||
|     } | ||||
|  | ||||
|     // TODO: do it | ||||
|     fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Self::Key>> + '_> { | ||||
|         Box::new( | ||||
|             (&mut self.keys) | ||||
|                 .lines() | ||||
|                 .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl IndexReader for V5IndexReader { | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = Settings<Checked>; | ||||
|  | ||||
|     fn metadata(&self) -> &IndexMetadata { | ||||
|         &self.metadata | ||||
|     } | ||||
|  | ||||
|     fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> { | ||||
|         Ok(Box::new((&mut self.documents).lines().map( | ||||
|             |line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }, | ||||
|         ))) | ||||
|     } | ||||
|  | ||||
|     fn settings(&mut self) -> Result<Self::Settings> { | ||||
|         let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||
|         Ok(settings.check()) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										251
									
								
								dump/src/reader/v5/settings.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										251
									
								
								dump/src/reader/v5/settings.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,251 @@ | ||||
| use std::{ | ||||
|     collections::{BTreeMap, BTreeSet}, | ||||
|     marker::PhantomData, | ||||
| }; | ||||
|  | ||||
| use serde::{Deserialize, Deserializer, Serialize, Serializer}; | ||||
|  | ||||
| #[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)] | ||||
| pub struct Checked; | ||||
|  | ||||
| #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] | ||||
| pub struct Unchecked; | ||||
|  | ||||
| /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings | ||||
| /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a | ||||
| /// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`. | ||||
| #[derive(Debug, Clone, Default, Deserialize, PartialEq)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] | ||||
| pub struct Settings<T> { | ||||
|     #[serde(default)] | ||||
|     pub displayed_attributes: Setting<Vec<String>>, | ||||
|  | ||||
|     #[serde(default)] | ||||
|     pub searchable_attributes: Setting<Vec<String>>, | ||||
|  | ||||
|     #[serde(default)] | ||||
|     pub filterable_attributes: Setting<BTreeSet<String>>, | ||||
|     #[serde(default)] | ||||
|     pub sortable_attributes: Setting<BTreeSet<String>>, | ||||
|     #[serde(default)] | ||||
|     pub ranking_rules: Setting<Vec<String>>, | ||||
|     #[serde(default)] | ||||
|     pub stop_words: Setting<BTreeSet<String>>, | ||||
|     #[serde(default)] | ||||
|     pub synonyms: Setting<BTreeMap<String, Vec<String>>>, | ||||
|     #[serde(default)] | ||||
|     pub distinct_attribute: Setting<String>, | ||||
|     #[serde(default)] | ||||
|     pub typo_tolerance: Setting<TypoSettings>, | ||||
|     #[serde(default)] | ||||
|     pub faceting: Setting<FacetingSettings>, | ||||
|     #[serde(default)] | ||||
|     pub pagination: Setting<PaginationSettings>, | ||||
|  | ||||
|     #[serde(skip)] | ||||
|     pub _kind: PhantomData<T>, | ||||
| } | ||||
|  | ||||
| fn serialize_with_wildcard<S>( | ||||
|     field: &Setting<Vec<String>>, | ||||
|     s: S, | ||||
| ) -> std::result::Result<S::Ok, S::Error> | ||||
| where | ||||
|     S: Serializer, | ||||
| { | ||||
|     let wildcard = vec!["*".to_string()]; | ||||
|     match field { | ||||
|         Setting::Set(value) => Some(value), | ||||
|         Setting::Reset => Some(&wildcard), | ||||
|         Setting::NotSet => None, | ||||
|     } | ||||
|     .serialize(s) | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Copy)] | ||||
| pub enum Setting<T> { | ||||
|     Set(T), | ||||
|     Reset, | ||||
|     NotSet, | ||||
| } | ||||
|  | ||||
| impl<T> Default for Setting<T> { | ||||
|     fn default() -> Self { | ||||
|         Self::NotSet | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<T> Setting<T> { | ||||
|     pub fn set(self) -> Option<T> { | ||||
|         match self { | ||||
|             Self::Set(value) => Some(value), | ||||
|             _ => None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub const fn as_ref(&self) -> Setting<&T> { | ||||
|         match *self { | ||||
|             Self::Set(ref value) => Setting::Set(value), | ||||
|             Self::Reset => Setting::Reset, | ||||
|             Self::NotSet => Setting::NotSet, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub const fn is_not_set(&self) -> bool { | ||||
|         matches!(self, Self::NotSet) | ||||
|     } | ||||
|  | ||||
|     /// If `Self` is `Reset`, then map self to `Set` with the provided `val`. | ||||
|     pub fn or_reset(self, val: T) -> Self { | ||||
|         match self { | ||||
|             Self::Reset => Self::Set(val), | ||||
|             otherwise => otherwise, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> { | ||||
|     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error> | ||||
|     where | ||||
|         D: Deserializer<'de>, | ||||
|     { | ||||
|         Deserialize::deserialize(deserializer).map(|x| match x { | ||||
|             Some(x) => Self::Set(x), | ||||
|             None => Self::Reset, // Reset is forced by sending null value | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Deserialize, PartialEq)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct MinWordSizeTyposSetting { | ||||
|     #[serde(default)] | ||||
|     pub one_typo: Setting<u8>, | ||||
|     #[serde(default)] | ||||
|     pub two_typos: Setting<u8>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Deserialize, PartialEq)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct TypoSettings { | ||||
|     #[serde(default)] | ||||
|     pub enabled: Setting<bool>, | ||||
|     #[serde(default)] | ||||
|     pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>, | ||||
|     #[serde(default)] | ||||
|     pub disable_on_words: Setting<BTreeSet<String>>, | ||||
|     #[serde(default)] | ||||
|     pub disable_on_attributes: Setting<BTreeSet<String>>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Deserialize, PartialEq)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct FacetingSettings { | ||||
|     #[serde(default)] | ||||
|     pub max_values_per_facet: Setting<usize>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Deserialize, PartialEq)] | ||||
| #[serde(deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct PaginationSettings { | ||||
|     #[serde(default)] | ||||
|     pub max_total_hits: Setting<usize>, | ||||
| } | ||||
|  | ||||
| impl Settings<Checked> { | ||||
|     pub fn cleared() -> Settings<Checked> { | ||||
|         Settings { | ||||
|             displayed_attributes: Setting::Reset, | ||||
|             searchable_attributes: Setting::Reset, | ||||
|             filterable_attributes: Setting::Reset, | ||||
|             sortable_attributes: Setting::Reset, | ||||
|             ranking_rules: Setting::Reset, | ||||
|             stop_words: Setting::Reset, | ||||
|             synonyms: Setting::Reset, | ||||
|             distinct_attribute: Setting::Reset, | ||||
|             typo_tolerance: Setting::Reset, | ||||
|             faceting: Setting::Reset, | ||||
|             pagination: Setting::Reset, | ||||
|             _kind: PhantomData, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn into_unchecked(self) -> Settings<Unchecked> { | ||||
|         let Self { | ||||
|             displayed_attributes, | ||||
|             searchable_attributes, | ||||
|             filterable_attributes, | ||||
|             sortable_attributes, | ||||
|             ranking_rules, | ||||
|             stop_words, | ||||
|             synonyms, | ||||
|             distinct_attribute, | ||||
|             typo_tolerance, | ||||
|             faceting, | ||||
|             pagination, | ||||
|             .. | ||||
|         } = self; | ||||
|  | ||||
|         Settings { | ||||
|             displayed_attributes, | ||||
|             searchable_attributes, | ||||
|             filterable_attributes, | ||||
|             sortable_attributes, | ||||
|             ranking_rules, | ||||
|             stop_words, | ||||
|             synonyms, | ||||
|             distinct_attribute, | ||||
|             typo_tolerance, | ||||
|             faceting, | ||||
|             pagination, | ||||
|             _kind: PhantomData, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Settings<Unchecked> { | ||||
|     pub fn check(self) -> Settings<Checked> { | ||||
|         let displayed_attributes = match self.displayed_attributes { | ||||
|             Setting::Set(fields) => { | ||||
|                 if fields.iter().any(|f| f == "*") { | ||||
|                     Setting::Reset | ||||
|                 } else { | ||||
|                     Setting::Set(fields) | ||||
|                 } | ||||
|             } | ||||
|             otherwise => otherwise, | ||||
|         }; | ||||
|  | ||||
|         let searchable_attributes = match self.searchable_attributes { | ||||
|             Setting::Set(fields) => { | ||||
|                 if fields.iter().any(|f| f == "*") { | ||||
|                     Setting::Reset | ||||
|                 } else { | ||||
|                     Setting::Set(fields) | ||||
|                 } | ||||
|             } | ||||
|             otherwise => otherwise, | ||||
|         }; | ||||
|  | ||||
|         Settings { | ||||
|             displayed_attributes, | ||||
|             searchable_attributes, | ||||
|             filterable_attributes: self.filterable_attributes, | ||||
|             sortable_attributes: self.sortable_attributes, | ||||
|             ranking_rules: self.ranking_rules, | ||||
|             stop_words: self.stop_words, | ||||
|             synonyms: self.synonyms, | ||||
|             distinct_attribute: self.distinct_attribute, | ||||
|             typo_tolerance: self.typo_tolerance, | ||||
|             faceting: self.faceting, | ||||
|             pagination: self.pagination, | ||||
|             _kind: PhantomData, | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										173
									
								
								dump/src/reader/v5/tasks.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								dump/src/reader/v5/tasks.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,173 @@ | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::settings::{Settings, Unchecked}; | ||||
|  | ||||
| pub type TaskId = u32; | ||||
| pub type BatchId = u32; | ||||
|  | ||||
| #[derive(Clone, Debug, Deserialize, PartialEq)] | ||||
| pub struct Task { | ||||
|     pub id: TaskId, | ||||
|     /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) | ||||
|     /// then this is None | ||||
|     // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of | ||||
|     // the TaskContent. | ||||
|     pub content: TaskContent, | ||||
|     pub events: Vec<TaskEvent>, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, Deserialize, PartialEq)] | ||||
| #[allow(clippy::large_enum_variant)] | ||||
| pub enum TaskContent { | ||||
|     DocumentAddition { | ||||
|         index_uid: IndexUid, | ||||
|         content_uuid: Uuid, | ||||
|         merge_strategy: IndexDocumentsMethod, | ||||
|         primary_key: Option<String>, | ||||
|         documents_count: usize, | ||||
|         allow_index_creation: bool, | ||||
|     }, | ||||
|     DocumentDeletion { | ||||
|         index_uid: IndexUid, | ||||
|         deletion: DocumentDeletion, | ||||
|     }, | ||||
|     SettingsUpdate { | ||||
|         index_uid: IndexUid, | ||||
|         settings: Settings<Unchecked>, | ||||
|         /// Indicates whether the task was a deletion | ||||
|         is_deletion: bool, | ||||
|         allow_index_creation: bool, | ||||
|     }, | ||||
|     IndexDeletion { | ||||
|         index_uid: IndexUid, | ||||
|     }, | ||||
|     IndexCreation { | ||||
|         index_uid: IndexUid, | ||||
|         primary_key: Option<String>, | ||||
|     }, | ||||
|     IndexUpdate { | ||||
|         index_uid: IndexUid, | ||||
|         primary_key: Option<String>, | ||||
|     }, | ||||
|     Dump { | ||||
|         uid: String, | ||||
|     }, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] | ||||
| pub struct IndexUid(String); | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] | ||||
| pub enum IndexDocumentsMethod { | ||||
|     /// Replace the previous document with the new one, | ||||
|     /// removing all the already known attributes. | ||||
|     ReplaceDocuments, | ||||
|  | ||||
|     /// Merge the previous version of the document with the new version, | ||||
|     /// replacing old attributes values with the new ones and add the new attributes. | ||||
|     UpdateDocuments, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] | ||||
| pub enum DocumentDeletion { | ||||
|     Clear, | ||||
|     Ids(Vec<String>), | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] | ||||
| pub enum TaskEvent { | ||||
|     Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), | ||||
|     Batched { | ||||
|         #[serde(with = "time::serde::rfc3339")] | ||||
|         timestamp: OffsetDateTime, | ||||
|         batch_id: BatchId, | ||||
|     }, | ||||
|     Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), | ||||
|     Succeeded { | ||||
|         result: TaskResult, | ||||
|         #[serde(with = "time::serde::rfc3339")] | ||||
|         timestamp: OffsetDateTime, | ||||
|     }, | ||||
|     Failed { | ||||
|         error: ResponseError, | ||||
|         #[serde(with = "time::serde::rfc3339")] | ||||
|         timestamp: OffsetDateTime, | ||||
|     }, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] | ||||
| pub enum TaskResult { | ||||
|     DocumentAddition { indexed_documents: u64 }, | ||||
|     DocumentDeletion { deleted_documents: u64 }, | ||||
|     ClearAll { deleted_documents: u64 }, | ||||
|     Other, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ResponseError { | ||||
|     message: String, | ||||
|     #[serde(rename = "code")] | ||||
|     error_code: String, | ||||
|     #[serde(rename = "type")] | ||||
|     error_type: String, | ||||
|     #[serde(rename = "link")] | ||||
|     error_link: String, | ||||
| } | ||||
|  | ||||
| impl Task { | ||||
|     /// Return true when a task is finished. | ||||
|     /// A task is finished when its last state is either `Succeeded` or `Failed`. | ||||
|     pub fn is_finished(&self) -> bool { | ||||
|         self.events.last().map_or(false, |event| { | ||||
|             matches!( | ||||
|                 event, | ||||
|                 TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. } | ||||
|             ) | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Return the content_uuid of the `Task` if there is one. | ||||
|     pub fn get_content_uuid(&self) -> Option<Uuid> { | ||||
|         match self { | ||||
|             Task { | ||||
|                 content: TaskContent::DocumentAddition { content_uuid, .. }, | ||||
|                 .. | ||||
|             } => Some(*content_uuid), | ||||
|             _ => None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn index_uid(&self) -> Option<&str> { | ||||
|         match &self.content { | ||||
|             TaskContent::DocumentAddition { index_uid, .. } | ||||
|             | TaskContent::DocumentDeletion { index_uid, .. } | ||||
|             | TaskContent::SettingsUpdate { index_uid, .. } | ||||
|             | TaskContent::IndexDeletion { index_uid } | ||||
|             | TaskContent::IndexCreation { index_uid, .. } | ||||
|             | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()), | ||||
|             TaskContent::Dump { .. } => None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl IndexUid { | ||||
|     pub fn into_inner(self) -> String { | ||||
|         self.0 | ||||
|     } | ||||
|  | ||||
|     /// Return a reference over the inner str. | ||||
|     pub fn as_str(&self) -> &str { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::ops::Deref for IndexUid { | ||||
|     type Target = str; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
| @@ -2,6 +2,7 @@ use std::{ | ||||
|     fs::{self, File}, | ||||
|     io::{BufRead, BufReader}, | ||||
|     path::Path, | ||||
|     str::FromStr, | ||||
| }; | ||||
|  | ||||
| use index::{Checked, Unchecked}; | ||||
| @@ -9,50 +10,35 @@ use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::{Error, Result, Version}; | ||||
| use crate::{Error, IndexMetadata, Result, Version}; | ||||
|  | ||||
| use super::{DumpReader, IndexReader}; | ||||
|  | ||||
| type Metadata = crate::Metadata; | ||||
|  | ||||
| pub fn date(dump: &Path) -> Result<OffsetDateTime> { | ||||
|     let metadata = fs::read(dump.join("metadata.json"))?; | ||||
|     let metadata: Metadata = serde_json::from_reader(&*metadata)?; | ||||
|     Ok(metadata.dump_date) | ||||
| } | ||||
|  | ||||
| pub struct V6Reader { | ||||
|     dump: TempDir, | ||||
|     instance_uid: Uuid, | ||||
|     metadata: Metadata, | ||||
|     tasks: BufReader<File>, | ||||
|     keys: BufReader<File>, | ||||
| } | ||||
|  | ||||
| struct V6IndexReader { | ||||
|     name: String, | ||||
|     metadata: IndexMetadata, | ||||
|     documents: BufReader<File>, | ||||
|     settings: BufReader<File>, | ||||
| } | ||||
|  | ||||
| impl V6IndexReader { | ||||
|     pub fn new(name: String, path: &Path) -> Result<Self> { | ||||
|         let ret = V6IndexReader { | ||||
|             name, | ||||
|             documents: BufReader::new(File::open(path.join("documents.jsonl"))?), | ||||
|             settings: BufReader::new(File::open(path.join("settings.json"))?), | ||||
|         }; | ||||
|  | ||||
|         Ok(ret) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl V6Reader { | ||||
|     pub fn open(dump: TempDir) -> Result<Self> { | ||||
|         let meta_file = fs::read(dump.path().join("metadata.json"))?; | ||||
|         let metadata = serde_json::from_reader(&*meta_file)?; | ||||
|         let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?; | ||||
|         let instance_uid = Uuid::from_str(&instance_uid)?; | ||||
|  | ||||
|         Ok(V6Reader { | ||||
|             metadata, | ||||
|             metadata: serde_json::from_reader(&*meta_file)?, | ||||
|             instance_uid, | ||||
|             tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), | ||||
|             keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), | ||||
|             dump, | ||||
| @@ -60,6 +46,20 @@ impl V6Reader { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl V6IndexReader { | ||||
|     pub fn new(name: String, path: &Path) -> Result<Self> { | ||||
|         let metadata = File::open(path.join("metadata.json"))?; | ||||
|  | ||||
|         let ret = V6IndexReader { | ||||
|             metadata: serde_json::from_reader(metadata)?, | ||||
|             documents: BufReader::new(File::open(path.join("documents.jsonl"))?), | ||||
|             settings: BufReader::new(File::open(path.join("settings.json"))?), | ||||
|         }; | ||||
|  | ||||
|         Ok(ret) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DumpReader for V6Reader { | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = index::Settings<Checked>; | ||||
| @@ -78,8 +78,7 @@ impl DumpReader for V6Reader { | ||||
|     } | ||||
|  | ||||
|     fn instance_uid(&self) -> Result<Option<Uuid>> { | ||||
|         let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?; | ||||
|         Ok(Some(Uuid::parse_str(&uuid)?)) | ||||
|         Ok(Some(self.instance_uid)) | ||||
|     } | ||||
|  | ||||
|     fn indexes( | ||||
| @@ -92,10 +91,10 @@ impl DumpReader for V6Reader { | ||||
|                             dyn super::IndexReader< | ||||
|                                     Document = Self::Document, | ||||
|                                     Settings = Self::Settings, | ||||
|                                 > + '_, | ||||
|                         >, | ||||
|                     >, | ||||
|                 >, | ||||
|             >, | ||||
|                 > + '_, | ||||
|         >, | ||||
|     > { | ||||
|         let entries = fs::read_dir(self.dump.path().join("indexes"))?; | ||||
| @@ -164,8 +163,8 @@ impl IndexReader for V6IndexReader { | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = index::Settings<Checked>; | ||||
|  | ||||
|     fn name(&self) -> &str { | ||||
|         &self.name | ||||
|     fn metadata(&self) -> &IndexMetadata { | ||||
|         &self.metadata | ||||
|     } | ||||
|  | ||||
|     fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> { | ||||
|   | ||||
| @@ -8,13 +8,12 @@ use flate2::{write::GzEncoder, Compression}; | ||||
| use index::{Checked, Settings}; | ||||
| use index_scheduler::TaskView; | ||||
| use meilisearch_auth::Key; | ||||
| use serde::Serialize; | ||||
| use serde_json::{Map, Value}; | ||||
| use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::{Metadata, Result, CURRENT_DUMP_VERSION}; | ||||
| use crate::{IndexMetadata, Metadata, Result, CURRENT_DUMP_VERSION}; | ||||
|  | ||||
| pub struct DumpWriter { | ||||
|     dir: TempDir, | ||||
| @@ -23,8 +22,9 @@ pub struct DumpWriter { | ||||
| impl DumpWriter { | ||||
|     pub fn new(instance_uuid: Uuid) -> Result<DumpWriter> { | ||||
|         let dir = TempDir::new()?; | ||||
|  | ||||
|         fs::write( | ||||
|             dir.path().join("instance-uid"), | ||||
|             dir.path().join("instance_uid.uuid"), | ||||
|             &instance_uuid.as_hyphenated().to_string(), | ||||
|         )?; | ||||
|  | ||||
| @@ -43,8 +43,8 @@ impl DumpWriter { | ||||
|         Ok(DumpWriter { dir }) | ||||
|     } | ||||
|  | ||||
|     pub fn create_index(&self, index_name: &str) -> Result<IndexWriter> { | ||||
|         IndexWriter::new(self.dir.path().join("indexes").join(index_name)) | ||||
|     pub fn create_index(&self, index_name: &str, metadata: &IndexMetadata) -> Result<IndexWriter> { | ||||
|         IndexWriter::new(self.dir.path().join("indexes").join(index_name), metadata) | ||||
|     } | ||||
|  | ||||
|     pub fn create_keys(&self) -> Result<KeyWriter> { | ||||
| @@ -126,9 +126,12 @@ pub struct IndexWriter { | ||||
| } | ||||
|  | ||||
| impl IndexWriter { | ||||
|     pub(crate) fn new(path: PathBuf) -> Result<Self> { | ||||
|     pub(self) fn new(path: PathBuf, metadata: &IndexMetadata) -> Result<Self> { | ||||
|         std::fs::create_dir(&path)?; | ||||
|  | ||||
|         let metadata_file = File::create(path.join("metadata.json"))?; | ||||
|         serde_json::to_writer(metadata_file, metadata)?; | ||||
|  | ||||
|         let documents = File::create(path.join("documents.jsonl"))?; | ||||
|         let settings = File::create(path.join("settings.json"))?; | ||||
|  | ||||
| @@ -243,14 +246,15 @@ pub(crate) mod test { | ||||
|         ├---- indexes/ | ||||
|         │    └---- doggos/ | ||||
|         │    │    ├---- settings.json | ||||
|         │    │    └---- documents.jsonl | ||||
|         │    │    ├---- documents.jsonl | ||||
|         │    │    └---- metadata.json | ||||
|         ├---- tasks/ | ||||
|         │    ├---- update_files/ | ||||
|         │    │    └---- 1 | ||||
|         │    └---- queue.jsonl | ||||
|         ├---- keys.jsonl | ||||
|         ├---- metadata.json | ||||
|         └---- instance-uid | ||||
|         └---- instance_uid.uuid | ||||
|         "###); | ||||
|  | ||||
|         // ==== checking the top level infos | ||||
| @@ -264,7 +268,7 @@ pub(crate) mod test { | ||||
|         } | ||||
|         "###); | ||||
|  | ||||
|         let instance_uid = fs::read_to_string(dump_path.join("instance-uid")).unwrap(); | ||||
|         let instance_uid = fs::read_to_string(dump_path.join("instance_uid.uuid")).unwrap(); | ||||
|         assert_eq!( | ||||
|             Uuid::from_str(&instance_uid).unwrap(), | ||||
|             create_test_instance_uid() | ||||
| @@ -284,6 +288,16 @@ pub(crate) mod test { | ||||
|             serde_json::from_str::<Settings<Unchecked>>(&test_settings).unwrap(), | ||||
|             create_test_settings().into_unchecked() | ||||
|         ); | ||||
|         let metadata = fs::read_to_string(dump_path.join("indexes/doggos/metadata.json")).unwrap(); | ||||
|         let metadata: IndexMetadata = serde_json::from_str(&metadata).unwrap(); | ||||
|         insta::assert_json_snapshot!(metadata, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }, @r###" | ||||
|         { | ||||
|           "uid": "doggo", | ||||
|           "primaryKey": null, | ||||
|           "createdAt": "[date]", | ||||
|           "updatedAt": "[date]" | ||||
|         } | ||||
|         "###); | ||||
|  | ||||
|         // ==== checking the task queue | ||||
|         let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user