mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Merge #5235
5235: Introduce a compaction subcommand in meilitool r=dureuill a=Kerollmops This PR proposes a change to the meilitool helper, introducing the `compact-index` subcommand to reduce the size of the indexes. While working on this tool, I discovered that the current heed `Env::copy_to_file` API is not very temp file friendly and [could be improved](https://github.com/meilisearch/heed/issues/306). Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
		| @@ -1,14 +1,17 @@ | |||||||
| use std::fs::{read_dir, read_to_string, remove_file, File}; | use std::fs::{read_dir, read_to_string, remove_file, File}; | ||||||
| use std::io::BufWriter; | use std::io::BufWriter; | ||||||
| use std::path::PathBuf; | use std::path::PathBuf; | ||||||
|  | use std::time::Instant; | ||||||
|  |  | ||||||
| use anyhow::Context; | use anyhow::{bail, Context}; | ||||||
| use clap::{Parser, Subcommand}; | use clap::{Parser, Subcommand}; | ||||||
| use dump::{DumpWriter, IndexMetadata}; | use dump::{DumpWriter, IndexMetadata}; | ||||||
| use file_store::FileStore; | use file_store::FileStore; | ||||||
| use meilisearch_auth::AuthController; | use meilisearch_auth::AuthController; | ||||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | use meilisearch_types::heed::types::{SerdeJson, Str}; | ||||||
| use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; | use meilisearch_types::heed::{ | ||||||
|  |     CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, | ||||||
|  | }; | ||||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; | use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; | ||||||
| use meilisearch_types::milli::{obkv_to_json, BEU32}; | use meilisearch_types::milli::{obkv_to_json, BEU32}; | ||||||
| use meilisearch_types::tasks::{Status, Task}; | use meilisearch_types::tasks::{Status, Task}; | ||||||
| @@ -78,6 +81,27 @@ enum Command { | |||||||
|         #[arg(long)] |         #[arg(long)] | ||||||
|         target_version: String, |         target_version: String, | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
|  |     /// Compact the index by using LMDB. | ||||||
|  |     /// | ||||||
|  |     /// You must run this command while Meilisearch is off. The reason is that Meilisearch keep the | ||||||
|  |     /// indexes opened and this compaction operation writes into another file. Meilisearch will not | ||||||
|  |     /// switch to the new file. | ||||||
|  |     /// | ||||||
|  |     /// **Another possibility** is to keep Meilisearch running to serve search requests, run the | ||||||
|  |     /// compaction and once done, close and immediately reopen Meilisearch. This way Meilisearch | ||||||
|  |     /// will reopened the data.mdb file when rebooting and see the newly compacted file, ignoring | ||||||
|  |     /// the previous non-compacted data. | ||||||
|  |     /// | ||||||
|  |     /// Note that the compaction will open the index, copy and compact the index into another file | ||||||
|  |     /// **on the same disk as the index** and replace the previous index with the newly compacted | ||||||
|  |     /// one. This means that the disk must have enough room for at most two times the index size. | ||||||
|  |     /// | ||||||
|  |     /// To make sure not to lose any data, this tool takes a mutable transaction on the index | ||||||
|  |     /// before running the copy and compaction. This way the current indexation must finish before | ||||||
|  |     /// the compaction operation can start. Once the compaction is done, the big index is replaced | ||||||
|  |     /// by the compacted one and the mutable transaction is released. | ||||||
|  |     CompactIndex { index_name: String }, | ||||||
| } | } | ||||||
|  |  | ||||||
| fn main() -> anyhow::Result<()> { | fn main() -> anyhow::Result<()> { | ||||||
| @@ -94,6 +118,7 @@ fn main() -> anyhow::Result<()> { | |||||||
|             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; |             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; | ||||||
|             OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() |             OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() | ||||||
|         } |         } | ||||||
|  |         Command::CompactIndex { index_name } => compact_index(db_path, &index_name), | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -347,3 +372,74 @@ fn export_a_dump( | |||||||
|  |  | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { | ||||||
|  |     let index_scheduler_path = db_path.join("tasks"); | ||||||
|  |     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||||
|  |         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||||
|  |  | ||||||
|  |     let rtxn = env.read_txn()?; | ||||||
|  |     let index_mapping: Database<Str, UuidCodec> = | ||||||
|  |         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||||
|  |  | ||||||
|  |     for result in index_mapping.iter(&rtxn)? { | ||||||
|  |         let (uid, uuid) = result?; | ||||||
|  |  | ||||||
|  |         if uid != index_name { | ||||||
|  |             eprintln!("Found index {uid} and skipping it"); | ||||||
|  |             continue; | ||||||
|  |         } else { | ||||||
|  |             eprintln!("Found index {uid} 🎉"); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||||
|  |         let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||||
|  |             format!("While trying to open the index at path {:?}", index_path.display()) | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         eprintln!("Awaiting for a mutable transaction..."); | ||||||
|  |         let _wtxn = index.write_txn().context("While awaiting for a write transaction")?; | ||||||
|  |  | ||||||
|  |         // We create and immediately drop the file because the | ||||||
|  |         let non_compacted_index_file_path = index_path.join("data.mdb"); | ||||||
|  |         let compacted_index_file_path = index_path.join("data.mdb.cpy"); | ||||||
|  |  | ||||||
|  |         eprintln!("Compacting the index..."); | ||||||
|  |         let before_compaction = Instant::now(); | ||||||
|  |         let new_file = index | ||||||
|  |             .copy_to_file(&compacted_index_file_path, CompactionOption::Enabled) | ||||||
|  |             .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; | ||||||
|  |  | ||||||
|  |         let after_size = new_file.metadata()?.len(); | ||||||
|  |         let before_size = std::fs::metadata(&non_compacted_index_file_path) | ||||||
|  |             .with_context(|| { | ||||||
|  |                 format!( | ||||||
|  |                     "While retrieving the metadata of {}", | ||||||
|  |                     non_compacted_index_file_path.display(), | ||||||
|  |                 ) | ||||||
|  |             })? | ||||||
|  |             .len(); | ||||||
|  |  | ||||||
|  |         let reduction = before_size as f64 / after_size as f64; | ||||||
|  |         println!("Compaction successful. Took around {:.2?}", before_compaction.elapsed()); | ||||||
|  |         eprintln!("The index went from {before_size} bytes to {after_size} bytes ({reduction:.2}x reduction)"); | ||||||
|  |  | ||||||
|  |         eprintln!("Replacing the non-compacted index by the compacted one..."); | ||||||
|  |         std::fs::rename(&compacted_index_file_path, &non_compacted_index_file_path).with_context( | ||||||
|  |             || { | ||||||
|  |                 format!( | ||||||
|  |                     "While renaming {} into {}", | ||||||
|  |                     compacted_index_file_path.display(), | ||||||
|  |                     non_compacted_index_file_path.display(), | ||||||
|  |                 ) | ||||||
|  |             }, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         drop(new_file); | ||||||
|  |  | ||||||
|  |         println!("Everything's done 🎉"); | ||||||
|  |         return Ok(()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bail!("Target index {index_name} not found!") | ||||||
|  | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user