mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-17 00:56:27 +00:00
Merge pull request #5946 from meilisearch/fix-compaction-issues
Improve compaction behaviors
This commit is contained in:
@@ -199,7 +199,7 @@ impl IndexMapper {
|
||||
let uuid = Uuid::new_v4();
|
||||
self.index_mapping.put(&mut wtxn, name, &uuid)?;
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
fs::create_dir_all(&index_path)?;
|
||||
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
@@ -286,7 +286,7 @@ impl IndexMapper {
|
||||
};
|
||||
|
||||
let index_map = self.index_map.clone();
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
let index_name = name.to_string();
|
||||
thread::Builder::new()
|
||||
.name(String::from("index_deleter"))
|
||||
@@ -408,7 +408,7 @@ impl IndexMapper {
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
// take the lock to reopen the environment.
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
@@ -425,7 +425,7 @@ impl IndexMapper {
|
||||
// if it's not already there.
|
||||
match index_map.get(&uuid) {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
|
||||
break index_map
|
||||
.create(
|
||||
@@ -452,6 +452,14 @@ impl IndexMapper {
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
/// Returns the path of the index.
|
||||
///
|
||||
/// The folder located at this path is containing the data.mdb,
|
||||
/// the lock.mdb and an optional data.mdb.cpy file.
|
||||
pub fn index_path(&self, uuid: Uuid) -> PathBuf {
|
||||
self.base_path.join(uuid.to_string())
|
||||
}
|
||||
|
||||
pub fn rollback_index(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
@@ -492,7 +500,7 @@ impl IndexMapper {
|
||||
};
|
||||
}
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
Index::rollback(milli::heed::EnvOpenOptions::new().read_txn_without_tls(), index_path, to)
|
||||
.map_err(|err| crate::Error::from_milli(err, Some(name.to_string())))
|
||||
}
|
||||
|
@@ -75,6 +75,7 @@ make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
CancelingUpgrade,
|
||||
CleaningCompactionLeftover,
|
||||
UpdatingTasks,
|
||||
}
|
||||
}
|
||||
|
@@ -25,7 +25,6 @@ enum AutobatchKind {
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
IndexCompaction,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
@@ -69,14 +68,14 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::IndexCompaction { .. } => AutobatchKind::IndexCompaction,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
KindWithContent::IndexCompaction { .. }
|
||||
| KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -120,9 +119,6 @@ pub enum BatchKind {
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexCompaction {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
@@ -188,13 +184,6 @@ impl BatchKind {
|
||||
)),
|
||||
false,
|
||||
),
|
||||
K::IndexCompaction => (
|
||||
Break((
|
||||
BatchKind::IndexCompaction { id: task_id },
|
||||
BatchStopReason::TaskCannotBeBatched { kind, id: task_id },
|
||||
)),
|
||||
false,
|
||||
),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
@@ -300,7 +289,7 @@ impl BatchKind {
|
||||
|
||||
match (self, autobatch_kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::IndexCompaction) => {
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => {
|
||||
Break((this, BatchStopReason::TaskCannotBeBatched { kind, id }))
|
||||
},
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
@@ -497,7 +486,6 @@ impl BatchKind {
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::IndexCompaction { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
|
@@ -437,12 +437,6 @@ impl IndexScheduler {
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexSwap { task }))
|
||||
}
|
||||
BatchKind::IndexCompaction { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexCompaction { index_uid, task }))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -525,17 +519,33 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the export.
|
||||
// 3. we get the next task to compact
|
||||
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
|
||||
if let Some(task_id) = to_compact.min() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexCompaction,
|
||||
id: task_id,
|
||||
});
|
||||
let index_uid =
|
||||
task.index_uid().expect("Compaction task must have an index uid").to_owned();
|
||||
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
current_batch.processing([&mut task]);
|
||||
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||
current_batch
|
||||
.reason(BatchStopReason::TaskCannotBeBatched { kind: Kind::Export, id: task_id });
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the snapshot.
|
||||
// 5. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -545,7 +555,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 5. we batch the dumps.
|
||||
// 6. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -558,7 +568,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
@@ -1,5 +1,6 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::io::{Seek, SeekFrom};
|
||||
use std::fs::{remove_file, File};
|
||||
use std::io::{ErrorKind, Seek, SeekFrom};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
@@ -13,7 +14,7 @@ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::PersistError;
|
||||
use tempfile::{PersistError, TempPath};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
@@ -28,6 +29,9 @@ use crate::utils::{
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
/// The name of the copy of the data.mdb file used during compaction.
|
||||
const DATA_MDB_COPY_NAME: &str = "data.mdb.cpy";
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
@@ -558,11 +562,12 @@ impl IndexScheduler {
|
||||
.set_currently_updating_index(Some((index_uid.to_string(), index.clone())));
|
||||
|
||||
progress.update_progress(IndexCompaction::CreateTemporaryFile);
|
||||
let pre_size = std::fs::metadata(index.path().join("data.mdb"))?.len();
|
||||
let mut file = tempfile::Builder::new()
|
||||
.suffix("data.")
|
||||
.prefix(".mdb.cpy")
|
||||
.tempfile_in(index.path())?;
|
||||
let src_path = index.path().join("data.mdb");
|
||||
let pre_size = std::fs::metadata(&src_path)?.len();
|
||||
|
||||
let dst_path = TempPath::from_path(index.path().join(DATA_MDB_COPY_NAME));
|
||||
let file = File::create(&dst_path)?;
|
||||
let mut file = tempfile::NamedTempFile::from_parts(file, dst_path);
|
||||
|
||||
// 3. We copy the index data to the temporary file
|
||||
progress.update_progress(IndexCompaction::CopyAndCompactTheIndex);
|
||||
@@ -574,7 +579,7 @@ impl IndexScheduler {
|
||||
|
||||
// 4. We replace the index data file with the temporary file
|
||||
progress.update_progress(IndexCompaction::PersistTheCompactedIndex);
|
||||
match file.persist(index.path().join("data.mdb")) {
|
||||
match file.persist(src_path) {
|
||||
Ok(file) => file.sync_all()?,
|
||||
// TODO see if we have a _resource busy_ error and probably handle this by:
|
||||
// 1. closing the index, 2. replacing and 3. reopening it
|
||||
@@ -910,9 +915,10 @@ impl IndexScheduler {
|
||||
|
||||
let enqueued_tasks = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
|
||||
// 0. Check if any upgrade task was matched.
|
||||
// 0. Check if any upgrade or compaction tasks were matched.
|
||||
// If so, we cancel all the failed or enqueued upgrade tasks.
|
||||
let upgrade_tasks = &self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)?;
|
||||
let compaction_tasks = &self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)?;
|
||||
let is_canceling_upgrade = !matched_tasks.is_disjoint(upgrade_tasks);
|
||||
if is_canceling_upgrade {
|
||||
let failed_tasks = self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
@@ -977,7 +983,33 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
// 3. We now have a list of tasks to cancel, cancel them
|
||||
// 3. If we are cancelling a compaction task, remove the tempfiles after incomplete compactions
|
||||
for compaction_task in &tasks_to_cancel & compaction_tasks {
|
||||
progress.update_progress(TaskCancelationProgress::CleaningCompactionLeftover);
|
||||
let task = self.queue.tasks.get_task(rtxn, compaction_task)?.unwrap();
|
||||
let Some(Details::IndexCompaction {
|
||||
index_uid,
|
||||
pre_compaction_size: _,
|
||||
post_compaction_size: _,
|
||||
}) = task.details
|
||||
else {
|
||||
unreachable!("wrong details for compaction task {compaction_task}")
|
||||
};
|
||||
|
||||
let index_path = match self.index_mapper.index_mapping.get(rtxn, &index_uid)? {
|
||||
Some(index_uuid) => self.index_mapper.index_path(index_uuid),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if let Err(e) = remove_file(index_path.join(DATA_MDB_COPY_NAME)) {
|
||||
match e.kind() {
|
||||
ErrorKind::NotFound => (),
|
||||
_ => return Err(Error::IoError(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. We now have a list of tasks to cancel, cancel them
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
|
Reference in New Issue
Block a user