Enhance index update functionality to support renaming by adding new_uid field. Update related structures and methods to handle the new index UID during updates, ensuring backward compatibility with existing index operations.

This commit is contained in:
Quentin de Quelen
2025-08-05 19:18:05 +02:00
committed by Tamo
parent 0f1c78b185
commit ae2d0a67a4
15 changed files with 547 additions and 119 deletions

View File

@ -197,9 +197,10 @@ impl<'a> Dump<'a> {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
primary_key,
},
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
KindDump::IndexUpdate { primary_key, new_uid } => KindWithContent::IndexUpdate {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
primary_key,
new_index_uid: new_uid,
},
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
KindDump::TaskCancelation { query, tasks } => {

View File

@ -274,8 +274,8 @@ fn snapshot_details(d: &Details) -> String {
Details::SettingsUpdate { settings } => {
format!("{{ settings: {settings:?} }}")
}
Details::IndexInfo { primary_key } => {
format!("{{ primary_key: {primary_key:?} }}")
Details::IndexInfo { primary_key, new_uid } => {
format!("{{ primary_key: {primary_key:?}, new_uid: {new_uid:?} }}")
}
Details::DocumentDeletion {
provided_ids: received_document_ids,

View File

@ -125,12 +125,6 @@ make_enum_progress! {
}
}
make_enum_progress! {
pub enum RenameIndexProgress {
RenamingTheIndex,
}
}
make_enum_progress! {
pub enum DeleteIndexProgress {
DeletingTheIndex,

View File

@ -24,7 +24,6 @@ enum AutobatchKind {
IndexCreation,
IndexDeletion,
IndexUpdate,
IndexRename,
IndexSwap,
}
@ -68,7 +67,6 @@ impl From<KindWithContent> for AutobatchKind {
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
KindWithContent::IndexRename { .. } => AutobatchKind::IndexRename,
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
@ -117,9 +115,6 @@ pub enum BatchKind {
IndexUpdate {
id: TaskId,
},
IndexRename {
id: TaskId,
},
IndexSwap {
id: TaskId,
},
@ -181,13 +176,6 @@ impl BatchKind {
)),
false,
),
K::IndexRename => (
Break((
BatchKind::IndexRename { id: task_id },
BatchStopReason::TaskCannotBeBatched { kind, id: task_id },
)),
false,
),
K::IndexSwap => (
Break((
BatchKind::IndexSwap { id: task_id },
@ -299,8 +287,8 @@ impl BatchKind {
};
match (self, autobatch_kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexRename | K::IndexSwap | K::DocumentEdition) => Break((this, BatchStopReason::TaskCannotBeBatched { kind, id })),
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break((this, BatchStopReason::TaskCannotBeBatched { kind, id })),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break((this, BatchStopReason::IndexCreationMismatch { id }))

View File

@ -75,7 +75,11 @@ fn idx_create() -> KindWithContent {
}
fn idx_update() -> KindWithContent {
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
KindWithContent::IndexUpdate {
index_uid: String::from("doggo"),
primary_key: None,
new_index_uid: None,
}
}
fn idx_del() -> KindWithContent {

View File

@ -38,11 +38,7 @@ pub(crate) enum Batch {
IndexUpdate {
index_uid: String,
primary_key: Option<String>,
task: Task,
},
IndexRename {
index_uid: String,
new_index_uid: String,
new_index_uid: Option<String>,
task: Task,
},
IndexDeletion {
@ -113,8 +109,7 @@ impl Batch {
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::Export { task }
| Batch::IndexUpdate { task, .. }
| Batch::IndexRename { task, .. } => {
| Batch::IndexUpdate { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
Batch::SnapshotCreation(tasks)
@ -159,7 +154,6 @@ impl Batch {
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexRename { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
@ -178,7 +172,6 @@ impl fmt::Display for Batch {
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexRename { .. } => f.write_str("IndexRename")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
Batch::Export { .. } => f.write_str("Export")?,
@ -413,21 +406,13 @@ impl IndexScheduler {
let mut task =
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
let primary_key = match &task.kind {
KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(),
let (primary_key, new_index_uid) = match &task.kind {
KindWithContent::IndexUpdate { primary_key, new_index_uid, .. } => {
(primary_key.clone(), new_index_uid.clone())
}
_ => unreachable!(),
};
Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task }))
}
BatchKind::IndexRename { id } => {
let mut task =
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
let (new_uid) = match &task.kind {
KindWithContent::IndexRename { new_index_uid, .. } => new_index_uid.clone(),
_ => unreachable!(),
};
Ok(Some(Batch::IndexRename { index_uid, new_index_uid: new_uid, task }))
Ok(Some(Batch::IndexUpdate { index_uid, primary_key, new_index_uid, task }))
}
BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion {
index_uid,

View File

@ -15,7 +15,7 @@ use super::create_batch::Batch;
use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress, RenameIndexProgress,
UpdateIndexProgress,
};
use crate::utils::{
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
@ -224,38 +224,47 @@ impl IndexScheduler {
self.index_mapper.create_index(wtxn, &index_uid, None)?;
self.process_batch(
Batch::IndexUpdate { index_uid, primary_key, task },
Batch::IndexUpdate { index_uid, primary_key, new_index_uid: None, task },
current_batch,
progress,
)
}
Batch::IndexRename { index_uid, new_index_uid, mut task } => {
progress.update_progress(RenameIndexProgress::RenamingTheIndex);
let mut wtxn = self.env.write_txn()?;
self.index_mapper.rename(&mut wtxn, &index_uid, &new_index_uid)?;
self.queue.tasks.update_index(&mut wtxn, &new_index_uid, |bm| {
let old = self.queue.tasks.index_tasks(&wtxn, &index_uid).unwrap_or_default();
*bm |= &old;
})?;
self.queue.tasks.update_index(&mut wtxn, &index_uid, |bm| bm.clear())?;
wtxn.commit()?;
task.status = Status::Succeeded;
task.details = Some(Details::IndexRename(IndexRenameDetails { old_uid: index_uid, new_uid: new_index_uid }));
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
Batch::IndexUpdate { index_uid, primary_key, new_index_uid, mut task } => {
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
let rtxn = self.env.read_txn()?;
let index = self.index_mapper.index(&rtxn, &index_uid)?;
if let Some(primary_key) = primary_key.clone() {
// Handle rename if new_index_uid is provided
let final_index_uid = if let Some(new_uid) = &new_index_uid {
let mut wtxn = self.env.write_txn()?;
// Rename the index
self.index_mapper.rename(&mut wtxn, &index_uid, new_uid)?;
// Update the task index mappings
let old_tasks =
self.queue.tasks.index_tasks(&wtxn, &index_uid).unwrap_or_default();
self.queue.tasks.update_index(&mut wtxn, new_uid, |bm| {
*bm |= &old_tasks;
})?;
self.queue.tasks.update_index(&mut wtxn, &index_uid, |bm| bm.clear())?;
wtxn.commit()?;
new_uid.clone()
} else {
index_uid.clone()
};
// Get the index (renamed or not)
let rtxn = self.env.read_txn()?;
let index = self.index_mapper.index(&rtxn, &final_index_uid)?;
// Handle primary key update if provided
if let Some(ref primary_key) = primary_key {
let mut index_wtxn = index.write_txn()?;
let mut builder = MilliSettings::new(
&mut index_wtxn,
&index,
self.index_mapper.indexer_config(),
);
builder.set_primary_key(primary_key);
builder.set_primary_key(primary_key.clone());
let must_stop_processing = self.scheduler.must_stop_processing.clone();
builder
@ -264,7 +273,7 @@ impl IndexScheduler {
&progress,
current_batch.embedder_stats.clone(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
.map_err(|e| Error::from_milli(e, Some(final_index_uid.to_string())))?;
index_wtxn.commit()?;
}
@ -272,7 +281,10 @@ impl IndexScheduler {
rtxn.commit()?;
task.status = Status::Succeeded;
task.details = Some(Details::IndexInfo { primary_key });
task.details = Some(Details::IndexInfo {
primary_key: primary_key.clone(),
new_uid: new_index_uid.clone(),
});
// if the update processed successfully, we're going to store the new
// stats of the index. Since the tasks have already been processed and
@ -282,8 +294,8 @@ impl IndexScheduler {
let mut wtxn = self.env.write_txn()?;
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
.map_err(|e| Error::from_milli(e, Some(final_index_uid.clone())))?;
self.index_mapper.store_stats_of(&mut wtxn, &final_index_uid, &stats)?;
wtxn.commit()?;
Ok(())
}();

View File

@ -264,7 +264,12 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
K::IndexCreation { index_uid, .. } => index_uids.push(index_uid),
K::IndexUpdate { index_uid, .. } => index_uids.push(index_uid),
K::IndexUpdate { index_uid, new_index_uid, .. } => {
index_uids.push(index_uid);
if let Some(new_uid) = new_index_uid {
index_uids.push(new_uid);
}
}
K::IndexSwap { swaps } => {
for IndexSwap { indexes: (lhs, rhs) } in swaps.iter_mut() {
if lhs == swap.0 || lhs == swap.1 {
@ -496,9 +501,9 @@ impl crate::IndexScheduler {
Details::SettingsUpdate { settings: _ } => {
assert_eq!(kind.as_kind(), Kind::SettingsUpdate);
}
Details::IndexInfo { primary_key: pk1 } => match &kind {
Details::IndexInfo { primary_key: pk1, .. } => match &kind {
KindWithContent::IndexCreation { index_uid, primary_key: pk2 }
| KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => {
| KindWithContent::IndexUpdate { index_uid, primary_key: pk2, .. } => {
self.queue
.tasks
.index_tasks