mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 21:46:27 +00:00 
			
		
		
		
	implement the dump reader v6
This commit is contained in:
		| @@ -15,6 +15,10 @@ flate2 = "1.0.22" | ||||
| thiserror = "1.0.30" | ||||
| time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } | ||||
| tar = "0.4.38" | ||||
| anyhow = "1.0.65" | ||||
| log = "0.4.17" | ||||
| index-scheduler = { path = "../index-scheduler" } | ||||
| meilisearch-auth = { path = "../meilisearch-auth" } | ||||
|  | ||||
| [dev-dependencies] | ||||
| insta = { version = "1.19.1", features = ["json", "redactions"] } | ||||
|   | ||||
| @@ -2,6 +2,11 @@ use thiserror::Error; | ||||
|  | ||||
| #[derive(Debug, Error)] | ||||
| pub enum Error { | ||||
|     #[error("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")] | ||||
|     DumpV1Unsupported, | ||||
|     #[error("Bad index name")] | ||||
|     BadIndexName, | ||||
|  | ||||
|     #[error(transparent)] | ||||
|     Io(#[from] std::io::Error), | ||||
|     #[error(transparent)] | ||||
|   | ||||
| @@ -1,23 +1,32 @@ | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| // mod dump; | ||||
| mod error; | ||||
| mod reader; | ||||
| mod writer; | ||||
|  | ||||
| pub use error::Error; | ||||
| pub use writer::DumpWriter; | ||||
|  | ||||
| const CURRENT_DUMP_VERSION: &str = "V6"; | ||||
|  | ||||
| pub struct DumpReader; | ||||
| const CURRENT_DUMP_VERSION: Version = Version::V6; | ||||
|  | ||||
| type Result<T> = std::result::Result<T, Error>; | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| struct Metadata { | ||||
|     pub dump_version: String, | ||||
|     pub dump_version: Version, | ||||
|     pub db_version: String, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub dump_date: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] | ||||
| pub enum Version { | ||||
|     V1, | ||||
|     V2, | ||||
|     V3, | ||||
|     V4, | ||||
|     V5, | ||||
|     V6, | ||||
| } | ||||
|   | ||||
| @@ -17,18 +17,20 @@ use crate::{Result, Version}; | ||||
| // pub mod error; | ||||
| // mod compat; | ||||
| // mod loaders; | ||||
| mod v1; | ||||
| // mod v6; | ||||
| // mod v1; | ||||
| mod v6; | ||||
|  | ||||
| pub fn open( | ||||
|     dump_path: &Path, | ||||
| ) -> Result< | ||||
|     impl DumpReader< | ||||
|         Document = serde_json::Value, | ||||
|         Settings = Settings<Unchecked>, | ||||
|         Task = TaskView, | ||||
|         UpdateFile = (), | ||||
|         Key = Key, | ||||
|     Box< | ||||
|         dyn DumpReader< | ||||
|             Document = serde_json::Map<String, serde_json::Value>, | ||||
|             Settings = Settings<Unchecked>, | ||||
|             Task = TaskView, | ||||
|             UpdateFile = File, | ||||
|             Key = Key, | ||||
|         >, | ||||
|     >, | ||||
| > { | ||||
|     let path = TempDir::new()?; | ||||
| @@ -54,10 +56,21 @@ pub fn open( | ||||
|         Version::V3 => todo!(), | ||||
|         Version::V4 => todo!(), | ||||
|         Version::V5 => todo!(), | ||||
|         Version::V6 => todo!(), | ||||
|     }; | ||||
|         Version::V6 => { | ||||
|             let dump_reader = Box::new(v6::V6Reader::open(path)?) | ||||
|                 as Box< | ||||
|                     dyn DumpReader< | ||||
|                         Document = serde_json::Map<String, serde_json::Value>, | ||||
|                         Settings = Settings<Unchecked>, | ||||
|                         Task = TaskView, | ||||
|                         UpdateFile = File, | ||||
|                         Key = Key, | ||||
|                     >, | ||||
|                 >; | ||||
|  | ||||
|     todo!() | ||||
|             Ok(dump_reader) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub trait DumpReader { | ||||
| @@ -73,7 +86,7 @@ pub trait DumpReader { | ||||
|     fn version(&self) -> Version; | ||||
|  | ||||
|     /// Return at which date the index was created. | ||||
|     fn date(&self) -> Result<Option<OffsetDateTime>>; | ||||
|     fn date(&self) -> Option<OffsetDateTime>; | ||||
|  | ||||
|     /// Return an iterator over each indexes. | ||||
|     fn indexes( | ||||
| @@ -81,18 +94,20 @@ pub trait DumpReader { | ||||
|     ) -> Result< | ||||
|         Box< | ||||
|             dyn Iterator< | ||||
|                 Item = Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>, | ||||
|                 Item = Result< | ||||
|                     Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>, | ||||
|                 >, | ||||
|             >, | ||||
|         >, | ||||
|     >; | ||||
|  | ||||
|     /// Return all the tasks in the dump with a possible update file. | ||||
|     fn tasks( | ||||
|         &self, | ||||
|     ) -> Result<Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>>>>; | ||||
|         &mut self, | ||||
|     ) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>> + '_>; | ||||
|  | ||||
|     /// Return all the keys. | ||||
|     fn keys(&self) -> Result<Box<dyn Iterator<Item = Self::Key>>>; | ||||
|     fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Self::Key>> + '_>; | ||||
| } | ||||
|  | ||||
| pub trait IndexReader { | ||||
| @@ -100,6 +115,6 @@ pub trait IndexReader { | ||||
|     type Settings; | ||||
|  | ||||
|     fn name(&self) -> &str; | ||||
|     fn documents(&self) -> Result<Box<dyn Iterator<Item = Self::Document>>>; | ||||
|     fn settings(&self) -> Result<Self::Settings>; | ||||
|     fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>>; | ||||
|     fn settings(&mut self) -> Result<Self::Settings>; | ||||
| } | ||||
|   | ||||
| @@ -5,7 +5,6 @@ use std::{ | ||||
|     path::Path, | ||||
| }; | ||||
|  | ||||
| use serde::Deserialize; | ||||
| use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| @@ -26,9 +25,9 @@ pub struct V1Reader { | ||||
|  | ||||
| struct V1IndexReader { | ||||
|     name: String, | ||||
|     documents: File, | ||||
|     settings: File, | ||||
|     updates: File, | ||||
|     documents: BufReader<File>, | ||||
|     settings: BufReader<File>, | ||||
|     updates: BufReader<File>, | ||||
|  | ||||
|     current_update: Option<UpdateStatus>, | ||||
| } | ||||
| @@ -37,9 +36,9 @@ impl V1IndexReader { | ||||
|     pub fn new(name: String, path: &Path) -> Result<Self> { | ||||
|         let mut ret = V1IndexReader { | ||||
|             name, | ||||
|             documents: File::open(path.join("documents.jsonl"))?, | ||||
|             settings: File::open(path.join("settings.json"))?, | ||||
|             updates: File::open(path.join("updates.jsonl"))?, | ||||
|             documents: BufReader::new(File::open(path.join("documents.jsonl"))?), | ||||
|             settings: BufReader::new(File::open(path.join("settings.json"))?), | ||||
|             updates: BufReader::new(File::open(path.join("updates.jsonl"))?), | ||||
|             current_update: None, | ||||
|         }; | ||||
|         ret.next_update(); | ||||
| @@ -48,10 +47,7 @@ impl V1IndexReader { | ||||
|     } | ||||
|  | ||||
|     pub fn next_update(&mut self) -> Result<Option<UpdateStatus>> { | ||||
|         let mut tasks = self.updates; | ||||
|         let mut reader = BufReader::new(&mut tasks); | ||||
|  | ||||
|         let current_update = if let Some(line) = reader.lines().next() { | ||||
|         let current_update = if let Some(line) = self.updates.lines().next() { | ||||
|             Some(serde_json::from_str(&line?)?) | ||||
|         } else { | ||||
|             None | ||||
| @@ -90,10 +86,6 @@ impl V1Reader { | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn date(&self) -> Result<Option<OffsetDateTime>> { | ||||
|         Ok(None) | ||||
|     } | ||||
|  | ||||
|     fn next_update(&mut self) -> Result<Option<UpdateStatus>> { | ||||
|         if let Some((idx, _)) = self | ||||
|             .indexes | ||||
| @@ -111,14 +103,14 @@ impl V1Reader { | ||||
| } | ||||
|  | ||||
| impl IndexReader for &V1IndexReader { | ||||
|     type Document = serde_json::Value; | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = settings::Settings; | ||||
|  | ||||
|     fn name(&self) -> &str { | ||||
|         todo!() | ||||
|     } | ||||
|  | ||||
|     fn documents(&self) -> Result<Box<dyn Iterator<Item = Self::Document>>> { | ||||
|     fn documents(&self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>>>> { | ||||
|         todo!() | ||||
|     } | ||||
|  | ||||
| @@ -128,16 +120,16 @@ impl IndexReader for &V1IndexReader { | ||||
| } | ||||
|  | ||||
| impl DumpReader for V1Reader { | ||||
|     type Document = serde_json::Value; | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = settings::Settings; | ||||
|  | ||||
|     type Task = update::UpdateStatus; | ||||
|     type UpdateFile = (); | ||||
|     type UpdateFile = Infallible; | ||||
|  | ||||
|     type Key = Infallible; | ||||
|  | ||||
|     fn date(&self) -> Result<Option<OffsetDateTime>> { | ||||
|         Ok(None) | ||||
|     fn date(&self) -> Option<OffsetDateTime> { | ||||
|         None | ||||
|     } | ||||
|  | ||||
|     fn version(&self) -> Version { | ||||
| @@ -149,29 +141,33 @@ impl DumpReader for V1Reader { | ||||
|     ) -> Result< | ||||
|         Box< | ||||
|             dyn Iterator< | ||||
|                 Item = Box< | ||||
|                     dyn super::IndexReader<Document = Self::Document, Settings = Self::Settings>, | ||||
|                 Item = Result< | ||||
|                     Box< | ||||
|                         dyn super::IndexReader< | ||||
|                             Document = Self::Document, | ||||
|                             Settings = Self::Settings, | ||||
|                         >, | ||||
|                     >, | ||||
|                 >, | ||||
|             >, | ||||
|         >, | ||||
|     > { | ||||
|         Ok(Box::new(self.indexes.iter().map(|index| { | ||||
|             Box::new(index) | ||||
|                 as Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>> | ||||
|             let index = Box::new(index) | ||||
|                 as Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>; | ||||
|             Ok(index) | ||||
|         }))) | ||||
|     } | ||||
|  | ||||
|     fn tasks( | ||||
|         &self, | ||||
|     ) -> Result<Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>>>> { | ||||
|         Ok(Box::new(std::iter::from_fn(|| { | ||||
|     fn tasks(&self) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>>> { | ||||
|         Box::new(std::iter::from_fn(|| { | ||||
|             self.next_update() | ||||
|                 .transpose() | ||||
|                 .map(|result| result.map(|task| (task, None))) | ||||
|         }))) | ||||
|         })) | ||||
|     } | ||||
|  | ||||
|     fn keys(&self) -> Result<Box<dyn Iterator<Item = Self::Key>>> { | ||||
|         Ok(Box::new(std::iter::empty())) | ||||
|     fn keys(&self) -> Box<dyn Iterator<Item = Result<Self::Key>>> { | ||||
|         Box::new(std::iter::empty()) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,16 +1,170 @@ | ||||
| use std::{ | ||||
|     fs::{self}, | ||||
|     fs::{self, File}, | ||||
|     io::{BufRead, BufReader}, | ||||
|     path::Path, | ||||
| }; | ||||
|  | ||||
| use index::Unchecked; | ||||
| use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use crate::Result; | ||||
| use crate::{Error, Result, Version}; | ||||
|  | ||||
| use super::{DumpReader, IndexReader}; | ||||
|  | ||||
| type Metadata = crate::Metadata; | ||||
|  | ||||
| pub fn date(dump: &Path) -> Result<OffsetDateTime> { | ||||
|     let metadata = fs::read(dump.join("metadata.json"))?; | ||||
|     let metadata: Metadata = serde_json::from_reader(metadata)?; | ||||
|     let metadata: Metadata = serde_json::from_reader(&*metadata)?; | ||||
|     Ok(metadata.dump_date) | ||||
| } | ||||
|  | ||||
| pub struct V6Reader { | ||||
|     dump: TempDir, | ||||
|     metadata: Metadata, | ||||
|     tasks: BufReader<File>, | ||||
|     keys: BufReader<File>, | ||||
| } | ||||
|  | ||||
| struct V6IndexReader { | ||||
|     name: String, | ||||
|     documents: BufReader<File>, | ||||
|     settings: BufReader<File>, | ||||
| } | ||||
|  | ||||
| impl V6IndexReader { | ||||
|     pub fn new(name: String, path: &Path) -> Result<Self> { | ||||
|         let ret = V6IndexReader { | ||||
|             name, | ||||
|             documents: BufReader::new(File::open(path.join("documents.jsonl"))?), | ||||
|             settings: BufReader::new(File::open(path.join("settings.json"))?), | ||||
|         }; | ||||
|  | ||||
|         Ok(ret) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl V6Reader { | ||||
|     pub fn open(dump: TempDir) -> Result<Self> { | ||||
|         let meta_file = fs::read(dump.path().join("metadata.json"))?; | ||||
|         let metadata = serde_json::from_reader(&*meta_file)?; | ||||
|  | ||||
|         Ok(V6Reader { | ||||
|             metadata, | ||||
|             tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), | ||||
|             keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), | ||||
|             dump, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DumpReader for V6Reader { | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = index::Settings<Unchecked>; | ||||
|  | ||||
|     type Task = index_scheduler::TaskView; | ||||
|     type UpdateFile = File; | ||||
|  | ||||
|     type Key = meilisearch_auth::Key; | ||||
|  | ||||
|     fn version(&self) -> Version { | ||||
|         Version::V6 | ||||
|     } | ||||
|  | ||||
|     fn date(&self) -> Option<OffsetDateTime> { | ||||
|         Some(self.metadata.dump_date) | ||||
|     } | ||||
|  | ||||
|     fn indexes( | ||||
|         &self, | ||||
|     ) -> Result< | ||||
|         Box< | ||||
|             dyn Iterator< | ||||
|                 Item = Result< | ||||
|                     Box< | ||||
|                         dyn super::IndexReader< | ||||
|                             Document = Self::Document, | ||||
|                             Settings = Self::Settings, | ||||
|                         >, | ||||
|                     >, | ||||
|                 >, | ||||
|             >, | ||||
|         >, | ||||
|     > { | ||||
|         let entries = fs::read_dir(self.dump.path().join("indexes"))?; | ||||
|         Ok(Box::new( | ||||
|             entries | ||||
|                 .map(|entry| -> Result<Option<_>> { | ||||
|                     let entry = entry?; | ||||
|                     if entry.file_type()?.is_dir() { | ||||
|                         let index = Box::new(V6IndexReader::new( | ||||
|                             entry | ||||
|                                 .file_name() | ||||
|                                 .to_str() | ||||
|                                 .ok_or(Error::BadIndexName)? | ||||
|                                 .to_string(), | ||||
|                             &entry.path(), | ||||
|                         )?) | ||||
|                             as Box< | ||||
|                                 dyn IndexReader< | ||||
|                                     Document = Self::Document, | ||||
|                                     Settings = Self::Settings, | ||||
|                                 >, | ||||
|                             >; | ||||
|                         Ok(Some(index)) | ||||
|                     } else { | ||||
|                         Ok(None) | ||||
|                     } | ||||
|                 }) | ||||
|                 .filter_map(|entry| entry.transpose()), | ||||
|         )) | ||||
|     } | ||||
|  | ||||
|     fn tasks( | ||||
|         &mut self, | ||||
|     ) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>> + '_> { | ||||
|         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { | ||||
|             let task: index_scheduler::TaskView = serde_json::from_str(&line?)?; | ||||
|             let update_file_path = self | ||||
|                 .dump | ||||
|                 .path() | ||||
|                 .join("tasks") | ||||
|                 .join("update_files") | ||||
|                 .join(task.uid.to_string()); | ||||
|  | ||||
|             if update_file_path.exists() { | ||||
|                 Ok((task, Some(File::open(update_file_path)?))) | ||||
|             } else { | ||||
|                 Ok((task, None)) | ||||
|             } | ||||
|         })) | ||||
|     } | ||||
|  | ||||
|     fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Self::Key>> + '_> { | ||||
|         Box::new( | ||||
|             (&mut self.keys) | ||||
|                 .lines() | ||||
|                 .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl IndexReader for V6IndexReader { | ||||
|     type Document = serde_json::Map<String, serde_json::Value>; | ||||
|     type Settings = index::Settings<Unchecked>; | ||||
|  | ||||
|     fn name(&self) -> &str { | ||||
|         &self.name | ||||
|     } | ||||
|  | ||||
|     fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> { | ||||
|         Ok(Box::new((&mut self.documents).lines().map( | ||||
|             |line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }, | ||||
|         ))) | ||||
|     } | ||||
|  | ||||
|     fn settings(&mut self) -> Result<Self::Settings> { | ||||
|         Ok(serde_json::from_reader(&mut self.settings)?) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -5,15 +5,13 @@ use std::{ | ||||
| }; | ||||
|  | ||||
| use flate2::{write::GzEncoder, Compression}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde::Serialize; | ||||
| use tempfile::TempDir; | ||||
| use thiserror::Error; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::{Metadata, Result, CURRENT_DUMP_VERSION}; | ||||
|  | ||||
| #[must_use] | ||||
| pub struct DumpWriter { | ||||
|     dir: TempDir, | ||||
| } | ||||
| @@ -27,7 +25,7 @@ impl DumpWriter { | ||||
|         )?; | ||||
|  | ||||
|         let metadata = Metadata { | ||||
|             dump_version: CURRENT_DUMP_VERSION.to_string(), | ||||
|             dump_version: CURRENT_DUMP_VERSION, | ||||
|             db_version: env!("CARGO_PKG_VERSION").to_string(), | ||||
|             dump_date: OffsetDateTime::now_utc(), | ||||
|         }; | ||||
| @@ -45,17 +43,14 @@ impl DumpWriter { | ||||
|         IndexWriter::new(self.dir.path().join("indexes").join(index_name)) | ||||
|     } | ||||
|  | ||||
|     #[must_use] | ||||
|     pub fn create_keys(&self) -> Result<KeyWriter> { | ||||
|         KeyWriter::new(self.dir.path().to_path_buf()) | ||||
|     } | ||||
|  | ||||
|     #[must_use] | ||||
|     pub fn create_tasks_queue(&self) -> Result<TaskWriter> { | ||||
|         TaskWriter::new(self.dir.path().join("tasks")) | ||||
|     } | ||||
|  | ||||
|     #[must_use] | ||||
|     pub fn persist_to(self, mut writer: impl Write) -> Result<()> { | ||||
|         let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); | ||||
|         let mut tar_encoder = tar::Builder::new(gz_encoder); | ||||
| @@ -68,7 +63,6 @@ impl DumpWriter { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[must_use] | ||||
| pub struct KeyWriter { | ||||
|     file: File, | ||||
| } | ||||
| @@ -86,7 +80,6 @@ impl KeyWriter { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[must_use] | ||||
| pub struct TaskWriter { | ||||
|     queue: File, | ||||
|     update_files: PathBuf, | ||||
| @@ -124,7 +117,6 @@ impl TaskWriter { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[must_use] | ||||
| pub struct IndexWriter { | ||||
|     documents: File, | ||||
|     settings: File, | ||||
| @@ -149,7 +141,6 @@ impl IndexWriter { | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[must_use] | ||||
|     pub fn settings(mut self, settings: impl Serialize) -> Result<()> { | ||||
|         self.settings.write_all(&serde_json::to_vec(&settings)?)?; | ||||
|         Ok(()) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user