add the batch_id to the tasks

This commit is contained in:
Tamo
2024-11-13 11:27:12 +01:00
parent 057fcb3993
commit 6062914654
126 changed files with 755 additions and 158 deletions

View File

@ -24,6 +24,7 @@ use std::fs::{self, File};
use std::io::BufWriter;
use dump::IndexMetadata;
use meilisearch_types::batches::BatchId;
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@ -279,18 +280,22 @@ impl IndexScheduler {
rtxn: &RoTxn,
index_uid: String,
batch: BatchKind,
batch_id: BatchId,
must_create_index: bool,
) -> Result<Option<Batch>> {
match batch {
BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentClear {
tasks: self.get_existing_tasks(rtxn, ids)?,
tasks: self.get_existing_tasks_with_batch_id(rtxn, batch_id, ids)?,
index_uid,
},
must_create_index,
})),
BatchKind::DocumentEdition { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
let task = self
.get_task(rtxn, id)?
.ok_or(Error::CorruptedTaskQueue)?
.with_batch_id(batch_id);
match &task.kind {
KindWithContent::DocumentEdition { index_uid, .. } => {
Ok(Some(Batch::IndexOperation {
@ -305,7 +310,7 @@ impl IndexScheduler {
}
}
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let tasks = self.get_existing_tasks_with_batch_id(rtxn, batch_id, operation_ids)?;
let primary_key = tasks
.iter()
.find_map(|task| match task.kind {
@ -352,7 +357,7 @@ impl IndexScheduler {
}))
}
BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => {
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
let tasks = self.get_existing_tasks_with_batch_id(rtxn, batch_id, deletion_ids)?;
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentDeletion { index_uid, tasks },
@ -360,7 +365,7 @@ impl IndexScheduler {
}))
}
BatchKind::Settings { settings_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, settings_ids)?;
let tasks = self.get_existing_tasks_with_batch_id(rtxn, batch_id, settings_ids)?;
let mut settings = Vec::new();
for task in &tasks {
@ -383,6 +388,7 @@ impl IndexScheduler {
rtxn,
index_uid,
BatchKind::Settings { settings_ids, allow_index_creation },
batch_id,
must_create_index,
)?
.unwrap()
@ -398,6 +404,7 @@ impl IndexScheduler {
rtxn,
index_uid,
BatchKind::DocumentClear { ids: other },
batch_id,
must_create_index,
)?
.unwrap()
@ -430,6 +437,7 @@ impl IndexScheduler {
rtxn,
index_uid.clone(),
BatchKind::Settings { settings_ids, allow_index_creation },
batch_id,
must_create_index,
)?;
@ -442,6 +450,7 @@ impl IndexScheduler {
primary_key,
operation_ids,
},
batch_id,
must_create_index,
)?;
@ -479,7 +488,10 @@ impl IndexScheduler {
}
}
BatchKind::IndexCreation { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
let task = self
.get_task(rtxn, id)?
.ok_or(Error::CorruptedTaskQueue)?
.with_batch_id(batch_id);
let (index_uid, primary_key) = match &task.kind {
KindWithContent::IndexCreation { index_uid, primary_key } => {
(index_uid.clone(), primary_key.clone())
@ -489,7 +501,10 @@ impl IndexScheduler {
Ok(Some(Batch::IndexCreation { index_uid, primary_key, task }))
}
BatchKind::IndexUpdate { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
let task = self
.get_task(rtxn, id)?
.ok_or(Error::CorruptedTaskQueue)?
.with_batch_id(batch_id);
let primary_key = match &task.kind {
KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(),
_ => unreachable!(),
@ -499,10 +514,13 @@ impl IndexScheduler {
BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion {
index_uid,
index_has_been_created: must_create_index,
tasks: self.get_existing_tasks(rtxn, ids)?,
tasks: self.get_existing_tasks_with_batch_id(rtxn, batch_id, ids)?,
})),
BatchKind::IndexSwap { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
let task = self
.get_task(rtxn, id)?
.ok_or(Error::CorruptedTaskQueue)?
.with_batch_id(batch_id);
Ok(Some(Batch::IndexSwap { task }))
}
}
@ -515,10 +533,11 @@ impl IndexScheduler {
/// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<(Batch, BatchId)>> {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
let batch_id = self.next_batch_id(rtxn)?;
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
@ -526,39 +545,65 @@ impl IndexScheduler {
if let Some(task_id) = to_cancel.max() {
// We retrieve the tasks that were processing before this tasks cancelation started.
// We must *not* reset the processing tasks before calling this method.
let ProcessingTasks { started_at, processing } =
// Displaying the `batch_id` would make a strange error message since this task cancelation is going to
// replace the canceled batch. It's better to avoid mentioning it in the error message.
let ProcessingTasks { started_at, batch_id: _, processing } =
&*self.processing_tasks.read().unwrap();
return Ok(Some(Batch::TaskCancelation {
task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?,
previous_started_at: *started_at,
previous_processing_tasks: processing.clone(),
}));
return Ok(Some((
Batch::TaskCancelation {
task: self
.get_task(rtxn, task_id)?
.ok_or(Error::CorruptedTaskQueue)?
.with_batch_id(batch_id),
previous_started_at: *started_at,
previous_processing_tasks: processing.clone(),
},
batch_id,
)));
}
// 2. we get the next task to delete
let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() {
let tasks = self.get_existing_tasks(rtxn, to_delete)?;
return Ok(Some(Batch::TaskDeletions(tasks)));
let tasks = self
.get_existing_tasks(rtxn, to_delete)?
.into_iter()
.map(|task| task.with_batch_id(batch_id))
.collect();
return Ok(Some((Batch::TaskDeletions(tasks), batch_id)));
}
// 3. we batch the snapshot.
let to_snapshot = self.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
if !to_snapshot.is_empty() {
return Ok(Some(Batch::SnapshotCreation(self.get_existing_tasks(rtxn, to_snapshot)?)));
return Ok(Some((
Batch::SnapshotCreation(
self.get_existing_tasks(rtxn, to_snapshot)?
.into_iter()
.map(|task| task.with_batch_id(batch_id))
.collect(),
),
batch_id,
)));
}
// 4. we batch the dumps.
let to_dump = self.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
if let Some(to_dump) = to_dump.min() {
return Ok(Some(Batch::Dump(
self.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?,
return Ok(Some((
Batch::Dump(
self.get_task(rtxn, to_dump)?
.ok_or(Error::CorruptedTaskQueue)?
.with_batch_id(batch_id),
),
batch_id,
)));
}
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
let task =
self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?.with_batch_id(batch_id);
// If the task is not associated with any index, verify that it is an index swap and
// create the batch directly. Otherwise, get the index name associated with the task
@ -568,7 +613,7 @@ impl IndexScheduler {
index_name
} else {
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
return Ok(Some(Batch::IndexSwap { task }));
return Ok(Some((Batch::IndexSwap { task }, batch_id)));
};
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
@ -599,12 +644,15 @@ impl IndexScheduler {
if let Some((batchkind, create_index)) =
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
{
return self.create_next_batch_index(
rtxn,
index_name.to_string(),
batchkind,
create_index,
);
return Ok(self
.create_next_batch_index(
rtxn,
index_name.to_string(),
batchkind,
batch_id,
create_index,
)?
.map(|batch| (batch, batch_id)));
}
// If we found no tasks then we were notified for something that got autobatched