mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Merge #5235
5235: Introduce a compaction subcommand in meilitool r=dureuill a=Kerollmops This PR proposes a change to the meilitool helper, introducing the `compact-index` subcommand to reduce the size of the indexes. While working on this tool, I discovered that the current heed `Env::copy_to_file` API is not very temp file friendly and [could be improved](https://github.com/meilisearch/heed/issues/306). Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
		| @@ -1,14 +1,17 @@ | ||||
| use std::fs::{read_dir, read_to_string, remove_file, File}; | ||||
| use std::io::BufWriter; | ||||
| use std::path::PathBuf; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use anyhow::{bail, Context}; | ||||
| use clap::{Parser, Subcommand}; | ||||
| use dump::{DumpWriter, IndexMetadata}; | ||||
| use file_store::FileStore; | ||||
| use meilisearch_auth::AuthController; | ||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | ||||
| use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; | ||||
| use meilisearch_types::heed::{ | ||||
|     CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, | ||||
| }; | ||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; | ||||
| use meilisearch_types::milli::{obkv_to_json, BEU32}; | ||||
| use meilisearch_types::tasks::{Status, Task}; | ||||
| @@ -78,6 +81,27 @@ enum Command { | ||||
|         #[arg(long)] | ||||
|         target_version: String, | ||||
|     }, | ||||
|  | ||||
|     /// Compact the index by using LMDB. | ||||
|     /// | ||||
|     /// You must run this command while Meilisearch is off. The reason is that Meilisearch keep the | ||||
|     /// indexes opened and this compaction operation writes into another file. Meilisearch will not | ||||
|     /// switch to the new file. | ||||
|     /// | ||||
|     /// **Another possibility** is to keep Meilisearch running to serve search requests, run the | ||||
|     /// compaction and once done, close and immediately reopen Meilisearch. This way Meilisearch | ||||
|     /// will reopened the data.mdb file when rebooting and see the newly compacted file, ignoring | ||||
|     /// the previous non-compacted data. | ||||
|     /// | ||||
|     /// Note that the compaction will open the index, copy and compact the index into another file | ||||
|     /// **on the same disk as the index** and replace the previous index with the newly compacted | ||||
|     /// one. This means that the disk must have enough room for at most two times the index size. | ||||
|     /// | ||||
|     /// To make sure not to lose any data, this tool takes a mutable transaction on the index | ||||
|     /// before running the copy and compaction. This way the current indexation must finish before | ||||
|     /// the compaction operation can start. Once the compaction is done, the big index is replaced | ||||
|     /// by the compacted one and the mutable transaction is released. | ||||
|     CompactIndex { index_name: String }, | ||||
| } | ||||
|  | ||||
| fn main() -> anyhow::Result<()> { | ||||
| @@ -94,6 +118,7 @@ fn main() -> anyhow::Result<()> { | ||||
|             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; | ||||
|             OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() | ||||
|         } | ||||
|         Command::CompactIndex { index_name } => compact_index(db_path, &index_name), | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -347,3 +372,74 @@ fn export_a_dump( | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { | ||||
|     let index_scheduler_path = db_path.join("tasks"); | ||||
|     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||
|         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||
|  | ||||
|     let rtxn = env.read_txn()?; | ||||
|     let index_mapping: Database<Str, UuidCodec> = | ||||
|         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||
|  | ||||
|     for result in index_mapping.iter(&rtxn)? { | ||||
|         let (uid, uuid) = result?; | ||||
|  | ||||
|         if uid != index_name { | ||||
|             eprintln!("Found index {uid} and skipping it"); | ||||
|             continue; | ||||
|         } else { | ||||
|             eprintln!("Found index {uid} 🎉"); | ||||
|         } | ||||
|  | ||||
|         let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||
|         let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||
|             format!("While trying to open the index at path {:?}", index_path.display()) | ||||
|         })?; | ||||
|  | ||||
|         eprintln!("Awaiting for a mutable transaction..."); | ||||
|         let _wtxn = index.write_txn().context("While awaiting for a write transaction")?; | ||||
|  | ||||
|         // We create and immediately drop the file because the | ||||
|         let non_compacted_index_file_path = index_path.join("data.mdb"); | ||||
|         let compacted_index_file_path = index_path.join("data.mdb.cpy"); | ||||
|  | ||||
|         eprintln!("Compacting the index..."); | ||||
|         let before_compaction = Instant::now(); | ||||
|         let new_file = index | ||||
|             .copy_to_file(&compacted_index_file_path, CompactionOption::Enabled) | ||||
|             .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; | ||||
|  | ||||
|         let after_size = new_file.metadata()?.len(); | ||||
|         let before_size = std::fs::metadata(&non_compacted_index_file_path) | ||||
|             .with_context(|| { | ||||
|                 format!( | ||||
|                     "While retrieving the metadata of {}", | ||||
|                     non_compacted_index_file_path.display(), | ||||
|                 ) | ||||
|             })? | ||||
|             .len(); | ||||
|  | ||||
|         let reduction = before_size as f64 / after_size as f64; | ||||
|         println!("Compaction successful. Took around {:.2?}", before_compaction.elapsed()); | ||||
|         eprintln!("The index went from {before_size} bytes to {after_size} bytes ({reduction:.2}x reduction)"); | ||||
|  | ||||
|         eprintln!("Replacing the non-compacted index by the compacted one..."); | ||||
|         std::fs::rename(&compacted_index_file_path, &non_compacted_index_file_path).with_context( | ||||
|             || { | ||||
|                 format!( | ||||
|                     "While renaming {} into {}", | ||||
|                     compacted_index_file_path.display(), | ||||
|                     non_compacted_index_file_path.display(), | ||||
|                 ) | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
|         drop(new_file); | ||||
|  | ||||
|         println!("Everything's done 🎉"); | ||||
|         return Ok(()); | ||||
|     } | ||||
|  | ||||
|     bail!("Target index {index_name} not found!") | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user