mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	feat(dump): Provide the same cli options as the snapshots
Add two cli options for the dump: - `--ignore-missing-dump` - `--ignore-dump-if-db-exists` Fix #2087
This commit is contained in:
		| @@ -30,11 +30,15 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> { | |||||||
|     meilisearch |     meilisearch | ||||||
|         .set_max_index_size(opt.max_index_size.get_bytes() as usize) |         .set_max_index_size(opt.max_index_size.get_bytes() as usize) | ||||||
|         .set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize) |         .set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize) | ||||||
|  |         // snapshot | ||||||
|         .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) |         .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) | ||||||
|         .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) |         .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) | ||||||
|         .set_dump_dst(opt.dumps_dir.clone()) |  | ||||||
|         .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) |         .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) | ||||||
|         .set_snapshot_dir(opt.snapshot_dir.clone()); |         .set_snapshot_dir(opt.snapshot_dir.clone()) | ||||||
|  |         // dump | ||||||
|  |         .set_ignore_missing_dump(opt.ignore_missing_dump) | ||||||
|  |         .set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists) | ||||||
|  |         .set_dump_dst(opt.dumps_dir.clone()); | ||||||
|  |  | ||||||
|     if let Some(ref path) = opt.import_snapshot { |     if let Some(ref path) = opt.import_snapshot { | ||||||
|         meilisearch.set_import_snapshot(path.clone()); |         meilisearch.set_import_snapshot(path.clone()); | ||||||
|   | |||||||
| @@ -112,14 +112,22 @@ pub struct Opt { | |||||||
|     #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h |     #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h | ||||||
|     pub snapshot_interval_sec: u64, |     pub snapshot_interval_sec: u64, | ||||||
|  |  | ||||||
|     /// Folder where dumps are created when the dump route is called. |  | ||||||
|     #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] |  | ||||||
|     pub dumps_dir: PathBuf, |  | ||||||
|  |  | ||||||
|     /// Import a dump from the specified path, must be a `.dump` file. |     /// Import a dump from the specified path, must be a `.dump` file. | ||||||
|     #[clap(long, conflicts_with = "import-snapshot")] |     #[clap(long, conflicts_with = "import-snapshot")] | ||||||
|     pub import_dump: Option<PathBuf>, |     pub import_dump: Option<PathBuf>, | ||||||
|  |  | ||||||
|  |     /// If the dump doesn't exists, load or create the database specified by `db-path` instead. | ||||||
|  |     #[clap(long, requires = "import-dump")] | ||||||
|  |     pub ignore_missing_dump: bool, | ||||||
|  |  | ||||||
|  |     /// Ignore the dump if a database already exists, and load that database instead. | ||||||
|  |     #[clap(long, requires = "import-dump")] | ||||||
|  |     pub ignore_dump_if_db_exists: bool, | ||||||
|  |  | ||||||
|  |     /// Folder where dumps are created when the dump route is called. | ||||||
|  |     #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] | ||||||
|  |     pub dumps_dir: PathBuf, | ||||||
|  |  | ||||||
|     /// Set the log level |     /// Set the log level | ||||||
|     #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] |     #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] | ||||||
|     pub log_level: String, |     pub log_level: String, | ||||||
|   | |||||||
| @@ -148,6 +148,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt { | |||||||
|         schedule_snapshot: false, |         schedule_snapshot: false, | ||||||
|         snapshot_interval_sec: 0, |         snapshot_interval_sec: 0, | ||||||
|         import_dump: None, |         import_dump: None, | ||||||
|  |         ignore_missing_dump: false, | ||||||
|  |         ignore_dump_if_db_exists: false, | ||||||
|         indexer_options: IndexerOpts { |         indexer_options: IndexerOpts { | ||||||
|             // memory has to be unlimited because several meilisearch are running in test context. |             // memory has to be unlimited because several meilisearch are running in test context. | ||||||
|             max_memory: MaxMemory::unlimited(), |             max_memory: MaxMemory::unlimited(), | ||||||
|   | |||||||
| @@ -1,14 +1,16 @@ | |||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::path::{Path, PathBuf}; | use std::path::{Path, PathBuf}; | ||||||
|  |  | ||||||
|  | use anyhow::bail; | ||||||
| use chrono::{DateTime, Utc}; | use chrono::{DateTime, Utc}; | ||||||
| use log::{info, trace, warn}; | use log::{info, trace}; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| pub use actor::DumpActor; | pub use actor::DumpActor; | ||||||
| pub use handle_impl::*; | pub use handle_impl::*; | ||||||
| use meilisearch_auth::AuthController; | use meilisearch_auth::AuthController; | ||||||
| pub use message::DumpMsg; | pub use message::DumpMsg; | ||||||
|  | use tempfile::TempDir; | ||||||
| use tokio::fs::create_dir_all; | use tokio::fs::create_dir_all; | ||||||
| use tokio::sync::oneshot; | use tokio::sync::oneshot; | ||||||
|  |  | ||||||
| @@ -79,6 +81,47 @@ pub enum MetadataVersion { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl MetadataVersion { | impl MetadataVersion { | ||||||
|  |     pub fn load_dump( | ||||||
|  |         self, | ||||||
|  |         src: impl AsRef<Path>, | ||||||
|  |         dst: impl AsRef<Path>, | ||||||
|  |         index_db_size: usize, | ||||||
|  |         meta_env_size: usize, | ||||||
|  |         indexing_options: &IndexerOpts, | ||||||
|  |     ) -> anyhow::Result<()> { | ||||||
|  |         match self { | ||||||
|  |             MetadataVersion::V1(_meta) => { | ||||||
|  |                 anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") | ||||||
|  |             } | ||||||
|  |             MetadataVersion::V2(meta) => v2::load_dump( | ||||||
|  |                 meta, | ||||||
|  |                 src, | ||||||
|  |                 dst, | ||||||
|  |                 index_db_size, | ||||||
|  |                 meta_env_size, | ||||||
|  |                 indexing_options, | ||||||
|  |             )?, | ||||||
|  |             MetadataVersion::V3(meta) => v3::load_dump( | ||||||
|  |                 meta, | ||||||
|  |                 src, | ||||||
|  |                 dst, | ||||||
|  |                 index_db_size, | ||||||
|  |                 meta_env_size, | ||||||
|  |                 indexing_options, | ||||||
|  |             )?, | ||||||
|  |             MetadataVersion::V4(meta) => v4::load_dump( | ||||||
|  |                 meta, | ||||||
|  |                 src, | ||||||
|  |                 dst, | ||||||
|  |                 index_db_size, | ||||||
|  |                 meta_env_size, | ||||||
|  |                 indexing_options, | ||||||
|  |             )?, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self { |     pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self { | ||||||
|         let meta = Metadata::new(index_db_size, update_db_size); |         let meta = Metadata::new(index_db_size, update_db_size); | ||||||
|         Self::V4(meta) |         Self::V4(meta) | ||||||
| @@ -160,10 +203,46 @@ impl DumpInfo { | |||||||
| pub fn load_dump( | pub fn load_dump( | ||||||
|     dst_path: impl AsRef<Path>, |     dst_path: impl AsRef<Path>, | ||||||
|     src_path: impl AsRef<Path>, |     src_path: impl AsRef<Path>, | ||||||
|  |     ignore_dump_if_db_exists: bool, | ||||||
|  |     ignore_missing_dump: bool, | ||||||
|     index_db_size: usize, |     index_db_size: usize, | ||||||
|     update_db_size: usize, |     update_db_size: usize, | ||||||
|     indexer_opts: &IndexerOpts, |     indexer_opts: &IndexerOpts, | ||||||
| ) -> anyhow::Result<()> { | ) -> anyhow::Result<()> { | ||||||
|  |     let empty_db = crate::is_empty_db(&dst_path); | ||||||
|  |     let src_path_exists = src_path.as_ref().exists(); | ||||||
|  |  | ||||||
|  |     if empty_db && src_path_exists { | ||||||
|  |         let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?; | ||||||
|  |         meta.load_dump( | ||||||
|  |             tmp_src.path(), | ||||||
|  |             tmp_dst.path(), | ||||||
|  |             index_db_size, | ||||||
|  |             update_db_size, | ||||||
|  |             indexer_opts, | ||||||
|  |         )?; | ||||||
|  |         persist_dump(&dst_path, tmp_dst)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } else if !empty_db && !ignore_dump_if_db_exists { | ||||||
|  |         bail!( | ||||||
|  |             "database already exists at {:?}, try to delete it or rename it", | ||||||
|  |             dst_path | ||||||
|  |                 .as_ref() | ||||||
|  |                 .canonicalize() | ||||||
|  |                 .unwrap_or_else(|_| dst_path.as_ref().to_owned()) | ||||||
|  |         ) | ||||||
|  |     } else if !src_path_exists && !ignore_missing_dump { | ||||||
|  |         bail!("dump doesn't exist at {:?}", src_path.as_ref()) | ||||||
|  |     } else { | ||||||
|  |         // there is nothing to do | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn extract_dump( | ||||||
|  |     dst_path: impl AsRef<Path>, | ||||||
|  |     src_path: impl AsRef<Path>, | ||||||
|  | ) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> { | ||||||
|     // Setup a temp directory path in the same path as the database, to prevent cross devices |     // Setup a temp directory path in the same path as the database, to prevent cross devices | ||||||
|     // references. |     // references. | ||||||
|     let temp_path = dst_path |     let temp_path = dst_path | ||||||
| @@ -201,40 +280,14 @@ pub fn load_dump( | |||||||
|         meta.version() |         meta.version() | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     match meta { |     Ok((tmp_src, tmp_dst, meta)) | ||||||
|         MetadataVersion::V1(_meta) => { |  | ||||||
|             anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") |  | ||||||
|         } |  | ||||||
|         MetadataVersion::V2(meta) => v2::load_dump( |  | ||||||
|             meta, |  | ||||||
|             &tmp_src_path, |  | ||||||
|             tmp_dst.path(), |  | ||||||
|             index_db_size, |  | ||||||
|             update_db_size, |  | ||||||
|             indexer_opts, |  | ||||||
|         )?, |  | ||||||
|         MetadataVersion::V3(meta) => v3::load_dump( |  | ||||||
|             meta, |  | ||||||
|             &tmp_src_path, |  | ||||||
|             tmp_dst.path(), |  | ||||||
|             index_db_size, |  | ||||||
|             update_db_size, |  | ||||||
|             indexer_opts, |  | ||||||
|         )?, |  | ||||||
|         MetadataVersion::V4(meta) => v4::load_dump( |  | ||||||
|             meta, |  | ||||||
|             &tmp_src_path, |  | ||||||
|             tmp_dst.path(), |  | ||||||
|             index_db_size, |  | ||||||
|             update_db_size, |  | ||||||
|             indexer_opts, |  | ||||||
|         )?, |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<()> { | ||||||
|     let persisted_dump = tmp_dst.into_path(); |     let persisted_dump = tmp_dst.into_path(); | ||||||
|  |  | ||||||
|     // Delete everything in the `data.ms` except the tempdir. |     // Delete everything in the `data.ms` except the tempdir. | ||||||
|     if dst_path.as_ref().exists() { |     if dst_path.as_ref().exists() { | ||||||
|         warn!("Overwriting database at {}", dst_path.as_ref().display()); |  | ||||||
|         for file in dst_path.as_ref().read_dir().unwrap() { |         for file in dst_path.as_ref().read_dir().unwrap() { | ||||||
|             let file = file.unwrap().path(); |             let file = file.unwrap().path(); | ||||||
|             if file.file_name() == persisted_dump.file_name() { |             if file.file_name() == persisted_dump.file_name() { | ||||||
|   | |||||||
| @@ -150,6 +150,8 @@ pub struct IndexControllerBuilder { | |||||||
|     schedule_snapshot: bool, |     schedule_snapshot: bool, | ||||||
|     dump_src: Option<PathBuf>, |     dump_src: Option<PathBuf>, | ||||||
|     dump_dst: Option<PathBuf>, |     dump_dst: Option<PathBuf>, | ||||||
|  |     ignore_dump_if_db_exists: bool, | ||||||
|  |     ignore_missing_dump: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl IndexControllerBuilder { | impl IndexControllerBuilder { | ||||||
| @@ -186,6 +188,8 @@ impl IndexControllerBuilder { | |||||||
|             load_dump( |             load_dump( | ||||||
|                 db_path.as_ref(), |                 db_path.as_ref(), | ||||||
|                 src_path, |                 src_path, | ||||||
|  |                 self.ignore_dump_if_db_exists, | ||||||
|  |                 self.ignore_missing_dump, | ||||||
|                 index_size, |                 index_size, | ||||||
|                 task_store_size, |                 task_store_size, | ||||||
|                 &indexer_options, |                 &indexer_options, | ||||||
| @@ -296,18 +300,6 @@ impl IndexControllerBuilder { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Set the index controller builder's dump src. |  | ||||||
|     pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { |  | ||||||
|         self.dump_src.replace(dump_src); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Set the index controller builder's dump dst. |  | ||||||
|     pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { |  | ||||||
|         self.dump_dst.replace(dump_dst); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Set the index controller builder's import snapshot. |     /// Set the index controller builder's import snapshot. | ||||||
|     pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { |     pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { | ||||||
|         self.import_snapshot.replace(import_snapshot); |         self.import_snapshot.replace(import_snapshot); | ||||||
| @@ -325,6 +317,30 @@ impl IndexControllerBuilder { | |||||||
|         self.schedule_snapshot = true; |         self.schedule_snapshot = true; | ||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Set the index controller builder's dump src. | ||||||
|  |     pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { | ||||||
|  |         self.dump_src.replace(dump_src); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Set the index controller builder's dump dst. | ||||||
|  |     pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { | ||||||
|  |         self.dump_dst.replace(dump_dst); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Set the index controller builder's ignore dump if db exists. | ||||||
|  |     pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self { | ||||||
|  |         self.ignore_dump_if_db_exists = ignore_dump_if_db_exists; | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Set the index controller builder's ignore missing dump. | ||||||
|  |     pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self { | ||||||
|  |         self.ignore_missing_dump = ignore_missing_dump; | ||||||
|  |         self | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<U, I> IndexController<U, I> | impl<U, I> IndexController<U, I> | ||||||
|   | |||||||
| @@ -10,6 +10,8 @@ mod snapshot; | |||||||
| pub mod tasks; | pub mod tasks; | ||||||
| mod update_file_store; | mod update_file_store; | ||||||
|  |  | ||||||
|  | use std::path::Path; | ||||||
|  |  | ||||||
| pub use index_controller::MeiliSearch; | pub use index_controller::MeiliSearch; | ||||||
|  |  | ||||||
| pub use milli; | pub use milli; | ||||||
| @@ -33,3 +35,19 @@ impl EnvSizer for heed::Env { | |||||||
|             .fold(0, |acc, m| acc + m.len()) |             .fold(0, |acc, m| acc + m.len()) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Check if a db is empty. It does not provide any information on the | ||||||
|  | /// validity of the data in it. | ||||||
|  | /// We consider a database as non empty when it's a non empty directory. | ||||||
|  | pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool { | ||||||
|  |     let db_path = db_path.as_ref(); | ||||||
|  |  | ||||||
|  |     if !db_path.exists() { | ||||||
|  |         true | ||||||
|  |     // if we encounter an error or if the db is a file we consider the db non empty | ||||||
|  |     } else if let Ok(dir) = db_path.read_dir() { | ||||||
|  |         dir.count() == 0 | ||||||
|  |     } else { | ||||||
|  |         true | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -49,7 +49,10 @@ pub fn load_snapshot( | |||||||
|     ignore_snapshot_if_db_exists: bool, |     ignore_snapshot_if_db_exists: bool, | ||||||
|     ignore_missing_snapshot: bool, |     ignore_missing_snapshot: bool, | ||||||
| ) -> anyhow::Result<()> { | ) -> anyhow::Result<()> { | ||||||
|     if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { |     let empty_db = crate::is_empty_db(&db_path); | ||||||
|  |     let snapshot_path_exists = snapshot_path.as_ref().exists(); | ||||||
|  |  | ||||||
|  |     if empty_db && snapshot_path_exists { | ||||||
|         match from_tar_gz(snapshot_path, &db_path) { |         match from_tar_gz(snapshot_path, &db_path) { | ||||||
|             Ok(()) => Ok(()), |             Ok(()) => Ok(()), | ||||||
|             Err(e) => { |             Err(e) => { | ||||||
| @@ -58,7 +61,7 @@ pub fn load_snapshot( | |||||||
|                 Err(e) |                 Err(e) | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists { |     } else if !empty_db && !ignore_snapshot_if_db_exists { | ||||||
|         bail!( |         bail!( | ||||||
|             "database already exists at {:?}, try to delete it or rename it", |             "database already exists at {:?}, try to delete it or rename it", | ||||||
|             db_path |             db_path | ||||||
| @@ -66,14 +69,8 @@ pub fn load_snapshot( | |||||||
|                 .canonicalize() |                 .canonicalize() | ||||||
|                 .unwrap_or_else(|_| db_path.as_ref().to_owned()) |                 .unwrap_or_else(|_| db_path.as_ref().to_owned()) | ||||||
|         ) |         ) | ||||||
|     } else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot { |     } else if !snapshot_path_exists && !ignore_missing_snapshot { | ||||||
|         bail!( |         bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref()) | ||||||
|             "snapshot doesn't exist at {:?}", |  | ||||||
|             snapshot_path |  | ||||||
|                 .as_ref() |  | ||||||
|                 .canonicalize() |  | ||||||
|                 .unwrap_or_else(|_| snapshot_path.as_ref().to_owned()) |  | ||||||
|         ) |  | ||||||
|     } else { |     } else { | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user