mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge #2098
2098: feat(dump): Provide the same cli options as the snapshots r=MarinPostma a=irevoire Add two cli options for the dump: - `--ignore-missing-dump` - `--ignore-dump-if-db-exists` Fix #2087 Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
		| @@ -30,11 +30,15 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> { | ||||
|     meilisearch | ||||
|         .set_max_index_size(opt.max_index_size.get_bytes() as usize) | ||||
|         .set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize) | ||||
|         // snapshot | ||||
|         .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) | ||||
|         .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) | ||||
|         .set_dump_dst(opt.dumps_dir.clone()) | ||||
|         .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) | ||||
|         .set_snapshot_dir(opt.snapshot_dir.clone()); | ||||
|         .set_snapshot_dir(opt.snapshot_dir.clone()) | ||||
|         // dump | ||||
|         .set_ignore_missing_dump(opt.ignore_missing_dump) | ||||
|         .set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists) | ||||
|         .set_dump_dst(opt.dumps_dir.clone()); | ||||
|  | ||||
|     if let Some(ref path) = opt.import_snapshot { | ||||
|         meilisearch.set_import_snapshot(path.clone()); | ||||
|   | ||||
| @@ -124,14 +124,22 @@ pub struct Opt { | ||||
|     #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h | ||||
|     pub snapshot_interval_sec: u64, | ||||
|  | ||||
|     /// Folder where dumps are created when the dump route is called. | ||||
|     #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] | ||||
|     pub dumps_dir: PathBuf, | ||||
|  | ||||
|     /// Import a dump from the specified path, must be a `.dump` file. | ||||
|     #[clap(long, conflicts_with = "import-snapshot")] | ||||
|     pub import_dump: Option<PathBuf>, | ||||
|  | ||||
|     /// If the dump doesn't exists, load or create the database specified by `db-path` instead. | ||||
|     #[clap(long, requires = "import-dump")] | ||||
|     pub ignore_missing_dump: bool, | ||||
|  | ||||
|     /// Ignore the dump if a database already exists, and load that database instead. | ||||
|     #[clap(long, requires = "import-dump")] | ||||
|     pub ignore_dump_if_db_exists: bool, | ||||
|  | ||||
|     /// Folder where dumps are created when the dump route is called. | ||||
|     #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] | ||||
|     pub dumps_dir: PathBuf, | ||||
|  | ||||
|     /// Set the log level | ||||
|     #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] | ||||
|     pub log_level: String, | ||||
|   | ||||
| @@ -148,6 +148,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt { | ||||
|         schedule_snapshot: false, | ||||
|         snapshot_interval_sec: 0, | ||||
|         import_dump: None, | ||||
|         ignore_missing_dump: false, | ||||
|         ignore_dump_if_db_exists: false, | ||||
|         indexer_options: IndexerOpts { | ||||
|             // memory has to be unlimited because several meilisearch are running in test context. | ||||
|             max_memory: MaxMemory::unlimited(), | ||||
|   | ||||
| @@ -1,14 +1,16 @@ | ||||
| use std::fs::File; | ||||
| use std::path::{Path, PathBuf}; | ||||
|  | ||||
| use anyhow::bail; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use log::{info, trace, warn}; | ||||
| use log::{info, trace}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| pub use actor::DumpActor; | ||||
| pub use handle_impl::*; | ||||
| use meilisearch_auth::AuthController; | ||||
| pub use message::DumpMsg; | ||||
| use tempfile::TempDir; | ||||
| use tokio::fs::create_dir_all; | ||||
| use tokio::sync::oneshot; | ||||
|  | ||||
| @@ -79,6 +81,47 @@ pub enum MetadataVersion { | ||||
| } | ||||
|  | ||||
| impl MetadataVersion { | ||||
|     pub fn load_dump( | ||||
|         self, | ||||
|         src: impl AsRef<Path>, | ||||
|         dst: impl AsRef<Path>, | ||||
|         index_db_size: usize, | ||||
|         meta_env_size: usize, | ||||
|         indexing_options: &IndexerOpts, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         match self { | ||||
|             MetadataVersion::V1(_meta) => { | ||||
|                 anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") | ||||
|             } | ||||
|             MetadataVersion::V2(meta) => v2::load_dump( | ||||
|                 meta, | ||||
|                 src, | ||||
|                 dst, | ||||
|                 index_db_size, | ||||
|                 meta_env_size, | ||||
|                 indexing_options, | ||||
|             )?, | ||||
|             MetadataVersion::V3(meta) => v3::load_dump( | ||||
|                 meta, | ||||
|                 src, | ||||
|                 dst, | ||||
|                 index_db_size, | ||||
|                 meta_env_size, | ||||
|                 indexing_options, | ||||
|             )?, | ||||
|             MetadataVersion::V4(meta) => v4::load_dump( | ||||
|                 meta, | ||||
|                 src, | ||||
|                 dst, | ||||
|                 index_db_size, | ||||
|                 meta_env_size, | ||||
|                 indexing_options, | ||||
|             )?, | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self { | ||||
|         let meta = Metadata::new(index_db_size, update_db_size); | ||||
|         Self::V4(meta) | ||||
| @@ -160,10 +203,46 @@ impl DumpInfo { | ||||
| pub fn load_dump( | ||||
|     dst_path: impl AsRef<Path>, | ||||
|     src_path: impl AsRef<Path>, | ||||
|     ignore_dump_if_db_exists: bool, | ||||
|     ignore_missing_dump: bool, | ||||
|     index_db_size: usize, | ||||
|     update_db_size: usize, | ||||
|     indexer_opts: &IndexerOpts, | ||||
| ) -> anyhow::Result<()> { | ||||
|     let empty_db = crate::is_empty_db(&dst_path); | ||||
|     let src_path_exists = src_path.as_ref().exists(); | ||||
|  | ||||
|     if empty_db && src_path_exists { | ||||
|         let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?; | ||||
|         meta.load_dump( | ||||
|             tmp_src.path(), | ||||
|             tmp_dst.path(), | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|             indexer_opts, | ||||
|         )?; | ||||
|         persist_dump(&dst_path, tmp_dst)?; | ||||
|         Ok(()) | ||||
|     } else if !empty_db && !ignore_dump_if_db_exists { | ||||
|         bail!( | ||||
|             "database already exists at {:?}, try to delete it or rename it", | ||||
|             dst_path | ||||
|                 .as_ref() | ||||
|                 .canonicalize() | ||||
|                 .unwrap_or_else(|_| dst_path.as_ref().to_owned()) | ||||
|         ) | ||||
|     } else if !src_path_exists && !ignore_missing_dump { | ||||
|         bail!("dump doesn't exist at {:?}", src_path.as_ref()) | ||||
|     } else { | ||||
|         // there is nothing to do | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn extract_dump( | ||||
|     dst_path: impl AsRef<Path>, | ||||
|     src_path: impl AsRef<Path>, | ||||
| ) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> { | ||||
|     // Setup a temp directory path in the same path as the database, to prevent cross devices | ||||
|     // references. | ||||
|     let temp_path = dst_path | ||||
| @@ -201,40 +280,14 @@ pub fn load_dump( | ||||
|         meta.version() | ||||
|     ); | ||||
|  | ||||
|     match meta { | ||||
|         MetadataVersion::V1(_meta) => { | ||||
|             anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") | ||||
|         } | ||||
|         MetadataVersion::V2(meta) => v2::load_dump( | ||||
|             meta, | ||||
|             &tmp_src_path, | ||||
|             tmp_dst.path(), | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|             indexer_opts, | ||||
|         )?, | ||||
|         MetadataVersion::V3(meta) => v3::load_dump( | ||||
|             meta, | ||||
|             &tmp_src_path, | ||||
|             tmp_dst.path(), | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|             indexer_opts, | ||||
|         )?, | ||||
|         MetadataVersion::V4(meta) => v4::load_dump( | ||||
|             meta, | ||||
|             &tmp_src_path, | ||||
|             tmp_dst.path(), | ||||
|             index_db_size, | ||||
|             update_db_size, | ||||
|             indexer_opts, | ||||
|         )?, | ||||
|     } | ||||
|     Ok((tmp_src, tmp_dst, meta)) | ||||
| } | ||||
|  | ||||
| fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<()> { | ||||
|     let persisted_dump = tmp_dst.into_path(); | ||||
|  | ||||
|     // Delete everything in the `data.ms` except the tempdir. | ||||
|     if dst_path.as_ref().exists() { | ||||
|         warn!("Overwriting database at {}", dst_path.as_ref().display()); | ||||
|         for file in dst_path.as_ref().read_dir().unwrap() { | ||||
|             let file = file.unwrap().path(); | ||||
|             if file.file_name() == persisted_dump.file_name() { | ||||
|   | ||||
| @@ -150,6 +150,8 @@ pub struct IndexControllerBuilder { | ||||
|     schedule_snapshot: bool, | ||||
|     dump_src: Option<PathBuf>, | ||||
|     dump_dst: Option<PathBuf>, | ||||
|     ignore_dump_if_db_exists: bool, | ||||
|     ignore_missing_dump: bool, | ||||
| } | ||||
|  | ||||
| impl IndexControllerBuilder { | ||||
| @@ -186,6 +188,8 @@ impl IndexControllerBuilder { | ||||
|             load_dump( | ||||
|                 db_path.as_ref(), | ||||
|                 src_path, | ||||
|                 self.ignore_dump_if_db_exists, | ||||
|                 self.ignore_missing_dump, | ||||
|                 index_size, | ||||
|                 task_store_size, | ||||
|                 &indexer_options, | ||||
| @@ -296,18 +300,6 @@ impl IndexControllerBuilder { | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's dump src. | ||||
|     pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { | ||||
|         self.dump_src.replace(dump_src); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's dump dst. | ||||
|     pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { | ||||
|         self.dump_dst.replace(dump_dst); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's import snapshot. | ||||
|     pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { | ||||
|         self.import_snapshot.replace(import_snapshot); | ||||
| @@ -325,6 +317,30 @@ impl IndexControllerBuilder { | ||||
|         self.schedule_snapshot = true; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's dump src. | ||||
|     pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { | ||||
|         self.dump_src.replace(dump_src); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's dump dst. | ||||
|     pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { | ||||
|         self.dump_dst.replace(dump_dst); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's ignore dump if db exists. | ||||
|     pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self { | ||||
|         self.ignore_dump_if_db_exists = ignore_dump_if_db_exists; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Set the index controller builder's ignore missing dump. | ||||
|     pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self { | ||||
|         self.ignore_missing_dump = ignore_missing_dump; | ||||
|         self | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<U, I> IndexController<U, I> | ||||
|   | ||||
| @@ -10,6 +10,8 @@ mod snapshot; | ||||
| pub mod tasks; | ||||
| mod update_file_store; | ||||
|  | ||||
| use std::path::Path; | ||||
|  | ||||
| pub use index_controller::MeiliSearch; | ||||
|  | ||||
| pub use milli; | ||||
| @@ -33,3 +35,19 @@ impl EnvSizer for heed::Env { | ||||
|             .fold(0, |acc, m| acc + m.len()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Check if a db is empty. It does not provide any information on the | ||||
| /// validity of the data in it. | ||||
| /// We consider a database as non empty when it's a non empty directory. | ||||
| pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool { | ||||
|     let db_path = db_path.as_ref(); | ||||
|  | ||||
|     if !db_path.exists() { | ||||
|         true | ||||
|     // if we encounter an error or if the db is a file we consider the db non empty | ||||
|     } else if let Ok(dir) = db_path.read_dir() { | ||||
|         dir.count() == 0 | ||||
|     } else { | ||||
|         true | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -49,7 +49,10 @@ pub fn load_snapshot( | ||||
|     ignore_snapshot_if_db_exists: bool, | ||||
|     ignore_missing_snapshot: bool, | ||||
| ) -> anyhow::Result<()> { | ||||
|     if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { | ||||
|     let empty_db = crate::is_empty_db(&db_path); | ||||
|     let snapshot_path_exists = snapshot_path.as_ref().exists(); | ||||
|  | ||||
|     if empty_db && snapshot_path_exists { | ||||
|         match from_tar_gz(snapshot_path, &db_path) { | ||||
|             Ok(()) => Ok(()), | ||||
|             Err(e) => { | ||||
| @@ -58,7 +61,7 @@ pub fn load_snapshot( | ||||
|                 Err(e) | ||||
|             } | ||||
|         } | ||||
|     } else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists { | ||||
|     } else if !empty_db && !ignore_snapshot_if_db_exists { | ||||
|         bail!( | ||||
|             "database already exists at {:?}, try to delete it or rename it", | ||||
|             db_path | ||||
| @@ -66,14 +69,8 @@ pub fn load_snapshot( | ||||
|                 .canonicalize() | ||||
|                 .unwrap_or_else(|_| db_path.as_ref().to_owned()) | ||||
|         ) | ||||
|     } else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot { | ||||
|         bail!( | ||||
|             "snapshot doesn't exist at {:?}", | ||||
|             snapshot_path | ||||
|                 .as_ref() | ||||
|                 .canonicalize() | ||||
|                 .unwrap_or_else(|_| snapshot_path.as_ref().to_owned()) | ||||
|         ) | ||||
|     } else if !snapshot_path_exists && !ignore_missing_snapshot { | ||||
|         bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref()) | ||||
|     } else { | ||||
|         Ok(()) | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user