diff --git a/Cargo.lock b/Cargo.lock index b2f935f58..b456a1219 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3206,6 +3206,7 @@ dependencies = [ "enum-iterator", "file-store", "flate2", + "hashbrown 0.15.5", "indexmap", "insta", "maplit", @@ -4123,6 +4124,7 @@ dependencies = [ "flate2", "fst", "insta", + "itertools 0.14.0", "meili-snap", "memmap2", "milli", diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs index 025e2e441..73bea6478 100644 --- a/crates/dump/src/lib.rs +++ b/crates/dump/src/lib.rs @@ -9,8 +9,9 @@ use meilisearch_types::error::ResponseError; use meilisearch_types::keys::Key; use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::settings::Unchecked; +use meilisearch_types::tasks::enterprise_edition::network::{NetworkTopologyChange, DbTaskNetwork}; use meilisearch_types::tasks::{ - Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork, + Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, }; use meilisearch_types::InstanceUid; use roaring::RoaringBitmap; @@ -95,7 +96,7 @@ pub struct TaskDump { )] pub finished_at: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub network: Option, + pub network: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub custom_metadata: Option, } @@ -163,6 +164,7 @@ pub enum KindDump { IndexCompaction { index_uid: String, }, + NetworkTopologyChange(NetworkTopologyChange), } impl From for TaskDump { @@ -249,6 +251,9 @@ impl From for KindDump { KindWithContent::IndexCompaction { index_uid } => { KindDump::IndexCompaction { index_uid } } + KindWithContent::NetworkTopologyChange(network_topology_change) => { + KindDump::NetworkTopologyChange(network_topology_change) + } } } } @@ -560,7 +565,8 @@ pub(crate) mod test { Network { local: Some("myself".to_string()), remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }}, - sharding: false, + leader: None, + version: Default::default(), } } diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml index fb8b6ff7b..36bd7cf12 100644 --- a/crates/index-scheduler/Cargo.toml +++ b/crates/index-scheduler/Cargo.toml @@ -24,6 +24,7 @@ dump = { path = "../dump" } enum-iterator = "2.1.0" file-store = { path = "../file-store" } flate2 = "1.1.2" +hashbrown = "0.15.4" indexmap = "2.9.0" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } diff --git a/crates/index-scheduler/src/dump.rs b/crates/index-scheduler/src/dump.rs index eb6b8f207..4a345e78a 100644 --- a/crates/index-scheduler/src/dump.rs +++ b/crates/index-scheduler/src/dump.rs @@ -238,6 +238,9 @@ impl<'a> Dump<'a> { KindDump::IndexCompaction { index_uid } => { KindWithContent::IndexCompaction { index_uid } } + KindDump::NetworkTopologyChange(network_topology_change) => { + KindWithContent::NetworkTopologyChange(network_topology_change) + } }, }; diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs index 11d6f6e4c..74c794047 100644 --- a/crates/index-scheduler/src/error.rs +++ b/crates/index-scheduler/src/error.rs @@ -3,10 +3,13 @@ use std::fmt::Display; use meilisearch_types::batches::BatchId; use meilisearch_types::error::{Code, ErrorCode}; use meilisearch_types::milli::index::RollbackOutcome; +use meilisearch_types::milli::DocumentId; +use meilisearch_types::tasks::enterprise_edition::network::ReceiveTaskError; use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::{heed, milli}; use reqwest::StatusCode; use thiserror::Error; +use uuid::Uuid; use crate::TaskId; @@ -191,6 +194,15 @@ pub enum Error { #[error(transparent)] HeedTransaction(heed::Error), + #[error("No network topology change task is currently enqueued or processing")] + ImportTaskWithoutNetworkTask, + #[error("The network task version (`{network_task}`) does not match the import task version (`{import_task}`)")] + NetworkVersionMismatch { network_task: Uuid, import_task: Uuid }, + #[error("The import task emanates from an unknown remote `{0}`")] + ImportTaskUnknownRemote(String), + #[error("The import task with key `{0}` was already received")] + ImportTaskAlreadyReceived(DocumentId), + #[cfg(test)] #[error("Planned failure for tests.")] PlannedFailure, @@ -248,6 +260,10 @@ impl Error { | Error::Persist(_) | Error::FeatureNotEnabled(_) | Error::Export(_) + | Error::ImportTaskWithoutNetworkTask + | Error::NetworkVersionMismatch { .. } + | Error::ImportTaskAlreadyReceived(_) + | Error::ImportTaskUnknownRemote(_) | Error::Anyhow(_) => true, Error::CreateBatch(_) | Error::CorruptedTaskQueue @@ -307,6 +323,10 @@ impl ErrorCode for Error { Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters, Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters, Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, + Error::ImportTaskWithoutNetworkTask => Code::ImportTaskWithoutNetworkTask, + Error::NetworkVersionMismatch { .. } => Code::NetworkVersionMismatch, + Error::ImportTaskAlreadyReceived(_) => Code::ImportTaskAlreadyReceived, + Error::ImportTaskUnknownRemote(_) => Code::ImportTaskUnknownRemote, Error::S3Error { status, .. } if status.is_client_error() => { Code::InvalidS3SnapshotRequest } @@ -345,3 +365,12 @@ impl ErrorCode for Error { } } } + +impl From for Error { + fn from(value: ReceiveTaskError) -> Self { + match value { + ReceiveTaskError::UnknownRemote(unknown) => Error::ImportTaskUnknownRemote(unknown), + ReceiveTaskError::DuplicateTask(dup) => Error::ImportTaskAlreadyReceived(dup), + } + } +} diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs index b64b75e4c..7d29c4262 100644 --- a/crates/index-scheduler/src/index_mapper/mod.rs +++ b/crates/index-scheduler/src/index_mapper/mod.rs @@ -361,6 +361,11 @@ impl IndexMapper { Ok(()) } + /// The number of indexes in the database + pub fn index_count(&self, rtxn: &RoTxn) -> Result { + Ok(self.index_mapping.len(rtxn)?) + } + /// Return an index, may open it if it wasn't already opened. pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result { if let Some((current_name, current_index)) = diff --git a/crates/index-scheduler/src/insta_snapshot.rs b/crates/index-scheduler/src/insta_snapshot.rs index 2f554adf4..cf3caaa34 100644 --- a/crates/index-scheduler/src/insta_snapshot.rs +++ b/crates/index-scheduler/src/insta_snapshot.rs @@ -325,6 +325,9 @@ fn snapshot_details(d: &Details) -> String { Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => { format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}") } + Details::NetworkTopologyChange { moved_documents, received_documents, message } => { + format!("{{ moved_documents: {moved_documents:?}, received_documents: {received_documents:?}, message: {message:?}") + } } } diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index 759a1ae6e..07033389f 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -68,10 +68,12 @@ use meilisearch_types::milli::vector::{ }; use meilisearch_types::milli::{self, Index}; use meilisearch_types::task_view::TaskView; -use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork}; +use meilisearch_types::tasks::enterprise_edition::network::{ + DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork, +}; +use meilisearch_types::tasks::{KindWithContent, Task}; use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView}; use milli::vector::db::IndexEmbeddingConfig; -use processing::ProcessingTasks; pub use queue::Query; use queue::Queue; use roaring::RoaringBitmap; @@ -82,6 +84,7 @@ use uuid::Uuid; use versioning::Versioning; use crate::index_mapper::IndexMapper; +use crate::processing::ProcessingTasks; use crate::utils::clamp_to_page_size; pub(crate) type BEI128 = I128; @@ -700,14 +703,14 @@ impl IndexScheduler { self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing) } - pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> { + pub fn set_task_network(&self, task_id: TaskId, network: DbTaskNetwork) -> Result { let mut wtxn = self.env.write_txn()?; let mut task = self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?; task.network = Some(network); self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?; wtxn.commit()?; - Ok(()) + Ok(task) } /// Return the batches matching the query from the user's point of view along @@ -757,18 +760,30 @@ impl IndexScheduler { task_id: Option, dry_run: bool, ) -> Result { - self.register_with_custom_metadata(kind, task_id, None, dry_run) + self.register_with_custom_metadata(kind, task_id, None, dry_run, None) } /// Register a new task in the scheduler, with metadata. /// /// If it fails and data was associated with the task, it tries to delete the associated data. + /// + /// # Parameters + /// + /// - task_network: network of the task to check. + /// + /// If the task is an import task, only accept it if: + /// + /// 1. There is an ongoing network topology change task + /// 2. The task to register matches the network version of the network topology change task + /// + /// Always accept the task if it is not an import task. pub fn register_with_custom_metadata( &self, kind: KindWithContent, task_id: Option, custom_metadata: Option, dry_run: bool, + task_network: Option, ) -> Result { // if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty()) @@ -779,13 +794,29 @@ impl IndexScheduler { } let mut wtxn = self.env.write_txn()?; - let task = self.queue.register(&mut wtxn, &kind, task_id, custom_metadata, dry_run)?; + + if let Some(TaskNetwork::Import { import_from, network_change, metadata }) = &task_network { + self.update_network_task(&mut wtxn, import_from, network_change, metadata)?; + } + + let task = self.queue.register( + &mut wtxn, + &kind, + task_id, + custom_metadata, + dry_run, + task_network.map(DbTaskNetwork::from), + )?; // If the registered task is a task cancelation // we inform the processing tasks to stop (if necessary). if let KindWithContent::TaskCancelation { tasks, .. } = kind { let tasks_to_cancel = RoaringBitmap::from_iter(tasks); - if self.processing_tasks.read().unwrap().must_cancel_processing_tasks(&tasks_to_cancel) + if self + .processing_tasks + .read() + .unwrap() + .must_cancel_processing_tasks(&tasks_to_cancel) { self.scheduler.must_stop_processing.must_stop(); } @@ -801,6 +832,91 @@ impl IndexScheduler { Ok(task) } + pub fn network_no_index_for_remote( + &self, + remote_name: String, + origin: Origin, + ) -> Result<(), Error> { + let mut wtxn = self.env.write_txn()?; + + self.update_network_task( + &mut wtxn, + &ImportData { remote_name, index_name: "null".into(), document_count: 0 }, + &origin, + &ImportMetadata { index_count: 0, task_key: 0, total_index_documents: 0 }, + )?; + + wtxn.commit()?; + + // wake up the scheduler as the task state has changed + self.scheduler.wake_up.signal(); + + Ok(()) + } + + fn update_network_task( + &self, + wtxn: &mut heed::RwTxn<'_>, + import_from: &ImportData, + network_change: &Origin, + metadata: &ImportMetadata, + ) -> Result<(), Error> { + let mut network_tasks = self + .queue + .tasks + .get_kind(&*wtxn, meilisearch_types::tasks::Kind::NetworkTopologyChange)?; + if network_tasks.is_empty() { + return Err(Error::ImportTaskWithoutNetworkTask); + } + let network_task = { + let processing = self.processing_tasks.read().unwrap().processing.clone(); + if processing.is_disjoint(&network_tasks) { + let enqueued = self + .queue + .tasks + .get_status(&*wtxn, meilisearch_types::tasks::Status::Enqueued)?; + + network_tasks &= enqueued; + if let Some(network_task) = network_tasks.into_iter().next() { + network_task + } else { + return Err(Error::ImportTaskWithoutNetworkTask); + } + } else { + network_tasks &= &*processing; + network_tasks.into_iter().next().unwrap() + } + }; + let mut network_task = self.queue.tasks.get_task(&*wtxn, network_task)?.unwrap(); + let network_task_version = network_task + .network + .as_ref() + .map(|network| network.network_version()) + .unwrap_or_default(); + if network_task_version != network_change.network_version { + return Err(Error::NetworkVersionMismatch { + network_task: network_task_version, + import_task: network_change.network_version, + }); + } + let KindWithContent::NetworkTopologyChange(network_topology_change) = + &mut network_task.kind + else { + tracing::error!("unexpected network kind for network task while registering task"); + return Err(Error::CorruptedTaskQueue); + }; + network_topology_change.receive_remote_task( + &import_from.remote_name, + &import_from.index_name, + metadata.task_key, + import_from.document_count, + metadata.index_count, + metadata.total_index_documents, + )?; + self.queue.tasks.update_task(wtxn, &mut network_task)?; + Ok(()) + } + /// Register a new task coming from a dump in the scheduler. /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. pub fn register_dumped_task(&mut self) -> Result> { diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs index 16de63244..476eb6dfe 100644 --- a/crates/index-scheduler/src/processing.rs +++ b/crates/index-scheduler/src/processing.rs @@ -42,12 +42,10 @@ impl ProcessingTasks { /// Set the processing tasks to an empty list pub fn stop_processing(&mut self) -> Self { - self.progress = None; - Self { batch: std::mem::take(&mut self.batch), processing: std::mem::take(&mut self.processing), - progress: None, + progress: std::mem::take(&mut self.progress), } } diff --git a/crates/index-scheduler/src/queue/mod.rs b/crates/index-scheduler/src/queue/mod.rs index 6a2d38960..ff8064636 100644 --- a/crates/index-scheduler/src/queue/mod.rs +++ b/crates/index-scheduler/src/queue/mod.rs @@ -15,6 +15,7 @@ use file_store::FileStore; use meilisearch_types::batches::BatchId; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::enterprise_edition::network::DbTaskNetwork; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use roaring::RoaringBitmap; use time::format_description::well_known::Rfc3339; @@ -259,6 +260,7 @@ impl Queue { task_id: Option, custom_metadata: Option, dry_run: bool, + network: Option, ) -> Result { let next_task_id = self.tasks.next_task_id(wtxn)?; @@ -280,7 +282,7 @@ impl Queue { details: kind.default_details(), status: Status::Enqueued, kind: kind.clone(), - network: None, + network, custom_metadata, }; // For deletion and cancelation tasks, we want to make extra sure that they @@ -348,6 +350,7 @@ impl Queue { None, None, false, + None, )?; Ok(()) diff --git a/crates/index-scheduler/src/queue/tasks.rs b/crates/index-scheduler/src/queue/tasks.rs index 83c698ebe..853fd6cc6 100644 --- a/crates/index-scheduler/src/queue/tasks.rs +++ b/crates/index-scheduler/src/queue/tasks.rs @@ -3,7 +3,8 @@ use std::ops::{Bound, RangeBounds}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; -use meilisearch_types::tasks::{Kind, Status, Task}; +use meilisearch_types::tasks::enterprise_edition::network::DbTaskNetwork; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use roaring::{MultiOps, RoaringBitmap}; use time::OffsetDateTime; @@ -143,6 +144,17 @@ impl TaskQueue { })?; } + // Avoids rewriting part of the network topology change because of TOCTOU errors + if let ( + KindWithContent::NetworkTopologyChange(old_state), + KindWithContent::NetworkTopologyChange(new_state), + ) = (old_task.kind, &mut task.kind) + { + new_state.merge(old_state); + // the state possibly just changed, rewrite the details + task.details = Some(new_state.to_details()); + } + assert_eq!( old_task.enqueued_at, task.enqueued_at, "Cannot update a task's enqueued_at time" @@ -175,7 +187,16 @@ impl TaskQueue { task.network = match (old_task.network, task.network.take()) { (None, None) => None, (None, Some(network)) | (Some(network), None) => Some(network), - (Some(_), Some(network)) => Some(network), + (Some(left), Some(right)) => Some(match (left, right) { + ( + DbTaskNetwork::Remotes { remote_tasks: mut left, network_version: _ }, + DbTaskNetwork::Remotes { remote_tasks: mut right, network_version }, + ) => { + left.append(&mut right); + DbTaskNetwork::Remotes { remote_tasks: left, network_version } + } + (_, right) => right, + }), }; self.all_tasks.put(wtxn, &task.uid, task)?; diff --git a/crates/index-scheduler/src/queue/test.rs b/crates/index-scheduler/src/queue/test.rs index 7582da0d6..7ff62d2c1 100644 --- a/crates/index-scheduler/src/queue/test.rs +++ b/crates/index-scheduler/src/queue/test.rs @@ -203,26 +203,30 @@ fn test_disable_auto_deletion_of_tasks() { ) .unwrap(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap(); - let tasks = - index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); - let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); - drop(rtxn); - drop(proc); + { + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = index_scheduler + .queue + .get_task_ids(&rtxn, &Query { ..Default::default() }, &proc) + .unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + } // now we're above the max number of tasks // and if we try to advance in the tick function no new task deletion should be enqueued handle.advance_till([Start, BatchCreated]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap(); - let tasks = - index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); - let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); - drop(rtxn); - drop(proc); + { + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = index_scheduler + .queue + .get_task_ids(&rtxn, &Query { ..Default::default() }, &proc) + .unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); + } } #[test] @@ -267,59 +271,69 @@ fn test_auto_deletion_of_tasks() { ) .unwrap(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap(); - let tasks = - index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); - let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); - drop(rtxn); - drop(proc); + { + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = index_scheduler + .queue + .get_task_ids(&rtxn, &Query { ..Default::default() }, &proc) + .unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + } - // now we're above the max number of tasks - // and if we try to advance in the tick function a new task deletion should be enqueued - handle.advance_till([Start, BatchCreated]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap(); - let tasks = - index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); - let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); - drop(rtxn); - drop(proc); + { + // now we're above the max number of tasks + // and if we try to advance in the tick function a new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = index_scheduler + .queue + .get_task_ids(&rtxn, &Query { ..Default::default() }, &proc) + .unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); + } - handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap(); - let tasks = - index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); - let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); - drop(rtxn); - drop(proc); + { + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = index_scheduler + .queue + .get_task_ids(&rtxn, &Query { ..Default::default() }, &proc) + .unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); + } handle.advance_one_failed_batch(); // a new task deletion has been enqueued handle.advance_one_successful_batch(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap(); - let tasks = - index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); - let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); - drop(rtxn); - drop(proc); + { + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = index_scheduler + .queue + .get_task_ids(&rtxn, &Query { ..Default::default() }, &proc) + .unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); + } handle.advance_one_failed_batch(); handle.advance_one_successful_batch(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap(); - let tasks = - index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); - let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); - drop(rtxn); - drop(proc); + { + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = index_scheduler + .queue + .get_task_ids(&rtxn, &Query { ..Default::default() }, &proc) + .unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); + } } #[test] diff --git a/crates/index-scheduler/src/scheduler/autobatcher.rs b/crates/index-scheduler/src/scheduler/autobatcher.rs index 87818c5aa..7d7b0cbc9 100644 --- a/crates/index-scheduler/src/scheduler/autobatcher.rs +++ b/crates/index-scheduler/src/scheduler/autobatcher.rs @@ -74,6 +74,7 @@ impl From for AutobatchKind { | KindWithContent::DumpCreation { .. } | KindWithContent::Export { .. } | KindWithContent::UpgradeDatabase { .. } + | KindWithContent::NetworkTopologyChange(_) | KindWithContent::SnapshotCreation => { panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.") } diff --git a/crates/index-scheduler/src/scheduler/create_batch.rs b/crates/index-scheduler/src/scheduler/create_batch.rs index 24a2340cb..f79b03de0 100644 --- a/crates/index-scheduler/src/scheduler/create_batch.rs +++ b/crates/index-scheduler/src/scheduler/create_batch.rs @@ -4,6 +4,7 @@ use std::io::ErrorKind; use meilisearch_types::heed::RoTxn; use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::settings::{Settings, Unchecked}; +use meilisearch_types::tasks::enterprise_edition::network::NetworkTopologyState; use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task}; use roaring::RoaringBitmap; use uuid::Uuid; @@ -59,6 +60,13 @@ pub(crate) enum Batch { index_uid: String, task: Task, }, + NetworkIndexBatch { + network_task: Task, + inner_batch: Box, + }, + NetworkReady { + task: Task, + }, } #[derive(Debug)] @@ -140,9 +148,14 @@ impl Batch { .. } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)), }, - Batch::IndexSwap { task } => { + Batch::IndexSwap { task } | Batch::NetworkReady { task } => { RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() } + Batch::NetworkIndexBatch { network_task, inner_batch } => { + let mut tasks = inner_batch.ids(); + tasks.insert(network_task.uid); + tasks + } } } @@ -156,12 +169,14 @@ impl Batch { | Dump(_) | Export { .. } | UpgradeDatabase { .. } + | NetworkReady { .. } | IndexSwap { .. } => None, IndexOperation { op, .. } => Some(op.index_uid()), IndexCreation { index_uid, .. } | IndexUpdate { index_uid, .. } | IndexDeletion { index_uid, .. } | IndexCompaction { index_uid, .. } => Some(index_uid), + NetworkIndexBatch { network_task: _, inner_batch } => inner_batch.index_uid(), } } } @@ -184,6 +199,8 @@ impl fmt::Display for Batch { Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?, Batch::Export { .. } => f.write_str("Export")?, Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?, + Batch::NetworkIndexBatch { .. } => f.write_str("NetworkTopologyChange")?, + Batch::NetworkReady { .. } => f.write_str("NetworkTopologyChange")?, }; match index_uid { Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), @@ -452,6 +469,7 @@ impl IndexScheduler { pub(crate) fn create_next_batch( &self, rtxn: &RoTxn, + processing_network_tasks: &RoaringBitmap, ) -> Result> { #[cfg(test)] self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?; @@ -460,7 +478,6 @@ impl IndexScheduler { let mut current_batch = ProcessingBatch::new(batch_id); let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?; - let count_total_enqueued = enqueued.len(); let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?; // 0. we get the last task to cancel. @@ -509,7 +526,15 @@ impl IndexScheduler { ))); } - // 2. we get the next task to delete + // 2. Check for enqueued network topology changes + let network_changes = self.queue.tasks.get_kind(rtxn, Kind::NetworkTopologyChange)? + & (enqueued | processing_network_tasks); + if let Some(task_id) = network_changes.iter().next() { + let task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap(); + return self.start_processing_network(rtxn, task, enqueued, current_batch); + } + + // 3. we get the next task to delete let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; if !to_delete.is_empty() { let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?; @@ -519,7 +544,7 @@ impl IndexScheduler { return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); } - // 3. we get the next task to compact + // 4. we get the next task to compact let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued; if let Some(task_id) = to_compact.min() { let mut task = @@ -534,7 +559,7 @@ impl IndexScheduler { return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch))); } - // 4. we batch the export. + // 5. we batch the export. let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued; if !to_export.is_empty() { let task_id = to_export.iter().next().expect("There must be at least one export task"); @@ -545,7 +570,7 @@ impl IndexScheduler { return Ok(Some((Batch::Export { task }, current_batch))); } - // 5. we batch the snapshot. + // 6. we batch the snapshot. let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; if !to_snapshot.is_empty() { let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?; @@ -555,7 +580,7 @@ impl IndexScheduler { return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); } - // 6. we batch the dumps. + // 7. we batch the dumps. let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued; if let Some(to_dump) = to_dump.min() { let mut task = @@ -568,25 +593,64 @@ impl IndexScheduler { return Ok(Some((Batch::Dump(task), current_batch))); } - // 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. - let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; - let mut task = - self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + let network = self.network(); - // If the task is not associated with any index, verify that it is an index swap and - // create the batch directly. Otherwise, get the index name associated with the task - // and use the autobatcher to batch the enqueued tasks associated with it + // 8. We make a batch from the unprioritised tasks. + let (batch, current_batch) = + self.create_next_batch_unprioritized(rtxn, &enqueued, current_batch, |task| { + let is_task_from_the_future = task + .network + .as_ref() + .map(|task_network| task_network.network_version() >= network.version) + // tasks without versions are not from the future + .unwrap_or_default(); - let index_name = if let Some(&index_name) = task.indexes().first() { - index_name - } else { - assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); - current_batch.processing(Some(&mut task)); - current_batch.reason(BatchStopReason::TaskCannotBeBatched { - kind: Kind::IndexSwap, - id: task.uid, - }); - return Ok(Some((Batch::IndexSwap { task }, current_batch))); + is_task_from_the_future + })?; + Ok(batch.map(|batch| (batch, current_batch))) + } + + fn create_next_batch_unprioritized( + &self, + rtxn: &RoTxn, + enqueued: &RoaringBitmap, + mut current_batch: ProcessingBatch, + mut skip_if: F, + ) -> Result<(Option, ProcessingBatch)> + where + F: FnMut(&Task) -> bool, + { + let count_total_enqueued = enqueued.len(); + + let mut enqueued_it = enqueued.iter(); + let mut task; + let index_name = loop { + let Some(task_id) = enqueued_it.next() else { + return Ok((None, current_batch)); + }; + task = + self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + if skip_if(&task) { + continue; + } + // If the task is not associated with any index, verify that it is an index swap and + // create the batch directly. Otherwise, get the index name associated with the task + // and use the autobatcher to batch the enqueued tasks associated with it + + if let Some(&index_name) = task.indexes().first() { + break index_name; + } else { + assert!( + matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()) + ); + current_batch.processing(Some(&mut task)); + current_batch.reason(BatchStopReason::TaskCannotBeBatched { + kind: Kind::IndexSwap, + id: task.uid, + }); + return Ok((Some(Batch::IndexSwap { task }), current_batch)); + }; }; let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; @@ -621,6 +685,10 @@ impl IndexScheduler { .get_task(rtxn, task_id) .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?; + if skip_if(&task) { + continue; + } + if let Some(uuid) = task.content_uuid() { let content_size = match self.queue.file_store.compute_size(uuid) { Ok(content_size) => content_size, @@ -651,7 +719,7 @@ impl IndexScheduler { autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref()) { current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason)); - return Ok(self + let batch = self .create_next_batch_index( rtxn, index_name.to_string(), @@ -659,11 +727,103 @@ impl IndexScheduler { &mut current_batch, create_index, )? - .map(|batch| (batch, current_batch))); + .map(|batch| batch); + return Ok((batch, current_batch)); } // If we found no tasks then we were notified for something that got autobatched // somehow and there is nothing to do. - Ok(None) + Ok((None, current_batch)) + } + + fn start_processing_network( + &self, + rtxn: &RoTxn, + mut task: Task, + enqueued: &RoaringBitmap, + mut current_batch: ProcessingBatch, + ) -> Result> { + current_batch.processing(Some(&mut task)); + + let change_version = + task.network.as_ref().map(|network| network.network_version()).unwrap_or_default(); + let KindWithContent::NetworkTopologyChange(network_topology_change) = &task.kind else { + panic!("inconsistent kind with content") + }; + + match network_topology_change.state() { + NetworkTopologyState::WaitingForOlderTasks => { + let res = + self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| { + let has_index = task.index_uid().is_some(); + + if !has_index { + return true; + } + + let has_older_network_version = task + .network + .as_ref() + .map(|network| network.network_version() < change_version) + // if there is no version, we never retain the task + .unwrap_or_default(); + + !has_older_network_version + }); + + let (batch, current_batch) = res?; + + let batch = match batch { + Some(batch) => { + let inner_batch = Box::new(batch); + + Batch::NetworkIndexBatch { network_task: task, inner_batch } + } + None => Batch::NetworkReady { task }, + }; + + Ok(Some((batch, current_batch))) + } + NetworkTopologyState::ImportingDocuments => { + // if the import is done we need to go to the next state + if network_topology_change.is_import_finished() { + return Ok(Some((Batch::NetworkReady { task }, current_batch))); + } + + let res = + self.create_next_batch_unprioritized(rtxn, &enqueued, current_batch, |task| { + let has_index = task.index_uid().is_some(); + + if !has_index { + return true; + } + + let is_import_task = task + .network + .as_ref() + .map(|network| { + network.network_version() == change_version + && network.import_data().is_some() + }) + // if there is no version, we never retain the task + .unwrap_or_default(); + + !is_import_task + }); + + let (batch, current_batch) = res?; + + let batch = batch.map(|batch| { + let inner_batch = Box::new(batch); + + (Batch::NetworkIndexBatch { network_task: task, inner_batch }, current_batch) + }); + + Ok(batch) + } + NetworkTopologyState::ExportingDocuments | NetworkTopologyState::Finished => { + Ok(Some((Batch::NetworkReady { task }, current_batch))) + } + } } } diff --git a/crates/index-scheduler/src/scheduler/enterprise_edition/mod.rs b/crates/index-scheduler/src/scheduler/enterprise_edition/mod.rs new file mode 100644 index 000000000..823a5b033 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/enterprise_edition/mod.rs @@ -0,0 +1,268 @@ +use std::collections::BTreeMap; +use std::time::Duration; + +use bumpalo::Bump; +use meilisearch_types::enterprise_edition::network::Remote; +use meilisearch_types::milli::documents::PrimaryKey; +use meilisearch_types::milli::progress::{EmbedderStats, Progress}; +use meilisearch_types::milli::update::new::indexer; +use meilisearch_types::milli::update::new::indexer::enterprise_edition::sharding::Shards; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::enterprise_edition::network::{NetworkTopologyState, Origin}; +use meilisearch_types::tasks::{KindWithContent, Status, Task}; +use roaring::RoaringBitmap; + +use super::create_batch::Batch; +use crate::scheduler::process_batch::ProcessBatchInfo; +use crate::scheduler::process_export::{ExportContext, ExportOptions, TargetInstance}; +use crate::utils::ProcessingBatch; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + pub(super) fn process_network_index_batch( + &self, + mut network_task: Task, + inner_batch: Box, + current_batch: &mut ProcessingBatch, + progress: Progress, + ) -> Result<(Vec, ProcessBatchInfo)> { + let (mut tasks, info) = self.process_batch(*inner_batch, current_batch, progress)?; + let KindWithContent::NetworkTopologyChange(network_topology_change) = + &mut network_task.kind + else { + tracing::error!("unexpected network kind for network task while processing batch"); + return Err(Error::CorruptedTaskQueue); + }; + for task in &tasks { + let Some(network) = task.network.as_ref() else { + continue; + }; + let Some(import) = network.import_data() else { + continue; + }; + network_topology_change.process_remote_tasks( + &import.remote_name, + &import.index_name, + import.document_count, + ); + } + network_task.details = Some(network_topology_change.to_details()); + + tasks.push(network_task); + Ok((tasks, info)) + } + + pub(super) fn process_network_ready( + &self, + mut task: Task, + progress: Progress, + ) -> Result<(Vec, ProcessBatchInfo)> { + let KindWithContent::NetworkTopologyChange(network_topology_change) = &mut task.kind else { + tracing::error!("network topology change task has the wrong kind with content"); + return Err(Error::CorruptedTaskQueue); + }; + + let Some(task_network) = &task.network else { + tracing::error!("network topology change task has no network"); + return Err(Error::CorruptedTaskQueue); + }; + + let origin; + let origin = match task_network.origin() { + Some(origin) => origin, + None => { + let myself = network_topology_change.in_name().expect("origin is not the leader"); + origin = Origin { + remote_name: myself.to_string(), + task_uid: task.uid, + network_version: task_network.network_version(), + }; + &origin + } + }; + + if let Some((remotes, out_name)) = network_topology_change.export_to_process() { + self.balance_documents( + remotes, + out_name, + network_topology_change.in_name(), + origin, + &progress, + &self.scheduler.must_stop_processing, + )?; + } + network_topology_change.update_state(); + if network_topology_change.state() == NetworkTopologyState::Finished { + task.status = Status::Succeeded; + } + + task.details = Some(network_topology_change.to_details()); + Ok((vec![task], Default::default())) + } + + fn balance_documents( + &self, + remotes: &BTreeMap, + out_name: &str, + in_name: Option<&str>, + network_change_origin: &Origin, + progress: &Progress, + must_stop_processing: &crate::scheduler::MustStopProcessing, + ) -> crate::Result<()> { + let new_shards = Shards::from_remotes_local( + remotes.keys().map(String::as_str).chain(in_name.into_iter()), + in_name, + ); + + // TECHDEBT: this spawns a `ureq` agent additionally to `reqwest`. We probably want to harmonize all of this. + let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build(); + + let mut indexer_alloc = Bump::new(); + + let scheduler_rtxn = self.env.read_txn()?; + + let index_count = self.index_mapper.index_count(&scheduler_rtxn)?; + + // when the instance is empty, we still need to that to remotes, as they cannot know of that fact. + if index_count == 0 { + for remote in remotes.values() { + let target = TargetInstance { + base_url: &remote.url, + api_key: remote.write_api_key.as_deref(), + }; + + self.export_no_index( + target, + out_name, + network_change_origin, + &agent, + must_stop_processing, + )?; + } + return Ok(()); + } + + let _: Vec<()> = self.index_mapper.try_for_each_index( + &scheduler_rtxn, + |index_uid, index| -> crate::Result<()> { + indexer_alloc.reset(); + let err = |err| Error::from_milli(err, Some(index_uid.to_string())); + let index_rtxn = index.read_txn()?; + let all_docids = index.external_documents_ids(); + let mut documents_to_move_to: hashbrown::HashMap = + hashbrown::HashMap::new(); + let mut documents_to_delete = RoaringBitmap::new(); + + for res in all_docids.iter(&index_rtxn)? { + let (external_docid, docid) = res?; + match new_shards.processing_shard(external_docid) { + Some(shard) if shard.is_own => continue, + Some(shard) => { + documents_to_move_to.entry_ref(&shard.name).or_default().insert(docid); + } + None => { + documents_to_delete.insert(docid); + } + } + } + + let fields_ids_map = index.fields_ids_map(&index_rtxn)?; + + for (remote_name, remote) in remotes { + let documents_to_move = + documents_to_move_to.remove(remote_name).unwrap_or_default(); + + let target = TargetInstance { + base_url: &remote.url, + api_key: remote.write_api_key.as_deref(), + }; + let options = ExportOptions { + index_uid, + payload_size: None, + override_settings: false, + export_mode: super::process_export::ExportMode::NetworkBalancing { + index_count, + export_old_remote_name: out_name, + network_change_origin, + }, + }; + let ctx = ExportContext { + index, + index_rtxn: &index_rtxn, + universe: &documents_to_move, + progress, + agent: &agent, + must_stop_processing, + }; + + let res = self.export_one_index(target, options, ctx); + + match res { + Ok(_) =>{ documents_to_delete |= documents_to_move;} + Err(err) => { + tracing::warn!("Could not export documents to `{remote_name}` due to error: {err}\n - Note: Documents will be kept"); + } + } + + + } + + if documents_to_delete.is_empty() { + return Ok(()); + } + + let mut new_fields_ids_map = fields_ids_map.clone(); + + // candidates not empty => index not empty => a primary key is set + let primary_key = index.primary_key(&index_rtxn)?.unwrap(); + + let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) + .map_err(milli::Error::from) + .map_err(err)?; + + let mut index_wtxn = index.write_txn()?; + + let mut indexer = indexer::DocumentDeletion::new(); + indexer.delete_documents_by_docids(documents_to_delete); + let document_changes = indexer.into_changes(&indexer_alloc, primary_key); + let embedders = index + .embedding_configs() + .embedding_configs(&index_wtxn) + .map_err(milli::Error::from) + .map_err(err)?; + let embedders = self.embedders(index_uid.to_string(), embedders)?; + let indexer_config = self.index_mapper.indexer_config(); + let pool = &indexer_config.thread_pool; + + indexer::index( + &mut index_wtxn, + index, + pool, + indexer_config.grenad_parameters(), + &fields_ids_map, + new_fields_ids_map, + None, // document deletion never changes primary key + &document_changes, + embedders, + &|| must_stop_processing.get(), + &progress, + &EmbedderStats::default(), + ) + .map_err(err)?; + + // update stats + let mut mapper_wtxn = self.env.write_txn()?; + let stats = + crate::index_mapper::IndexStats::new(&index, &index_wtxn).map_err(err)?; + self.index_mapper.store_stats_of(&mut mapper_wtxn, index_uid, &stats)?; + + index_wtxn.commit()?; + // update stats after committing changes to index + mapper_wtxn.commit()?; + + Ok(()) + }, + )?; + Ok(()) + } +} diff --git a/crates/index-scheduler/src/scheduler/mod.rs b/crates/index-scheduler/src/scheduler/mod.rs index bfbab3869..8409c49c1 100644 --- a/crates/index-scheduler/src/scheduler/mod.rs +++ b/crates/index-scheduler/src/scheduler/mod.rs @@ -8,6 +8,7 @@ mod process_export; mod process_index_operation; mod process_snapshot_creation; mod process_upgrade; +mod enterprise_edition; #[cfg(test)] mod test; #[cfg(test)] @@ -21,7 +22,6 @@ use std::path::PathBuf; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::Arc; -use convert_case::{Case, Casing as _}; use meilisearch_types::error::ResponseError; use meilisearch_types::heed::{Env, WithoutTls}; use meilisearch_types::milli; @@ -178,6 +178,9 @@ impl IndexScheduler { self.breakpoint(crate::test_utils::Breakpoint::Start); } + let previous_processing_batch = + self.processing_tasks.write().unwrap().stop_processing(); + if self.cleanup_enabled { let mut wtxn = self.env.write_txn()?; self.queue.cleanup_task_queue(&mut wtxn)?; @@ -185,11 +188,16 @@ impl IndexScheduler { } let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; - let (batch, mut processing_batch) = - match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { - Some(batch) => batch, - None => return Ok(TickOutcome::WaitForSignal), - }; + let (batch, mut processing_batch) = match self + .create_next_batch(&rtxn, &previous_processing_batch.processing) + .map_err(|e| Error::CreateBatch(Box::new(e)))? + { + Some(batch) => batch, + None => { + *self.processing_tasks.write().unwrap() = previous_processing_batch; + return Ok(TickOutcome::WaitForSignal); + } + }; let index_uid = batch.index_uid().map(ToOwned::to_owned); drop(rtxn); @@ -260,7 +268,14 @@ impl IndexScheduler { self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?; progress.update_progress(BatchProgress::WritingTasksToDisk); + processing_batch.finished(); + // whether the batch made progress. + // a batch make progress if it failed or if it contains at least one fully processed (or cancelled) task. + // + // if a batch did not make progress, it means that all of its tasks are waiting on the scheduler to make progress, + // and so we must wait for new tasks. Such a batch is not persisted to DB, and is resumed on the next tick. + let mut batch_made_progress = false; let mut stop_scheduler_forever = false; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut canceled = RoaringBitmap::new(); @@ -281,7 +296,11 @@ impl IndexScheduler { #[allow(unused_variables)] for (i, mut task) in tasks.into_iter().enumerate() { task_progress.fetch_add(1, Ordering::Relaxed); - processing_batch.update(&mut task); + processing_batch.update_from_task(&task); + if !matches!(task.status, Status::Processing | Status::Enqueued) { + batch_made_progress = true; + processing_batch.finish_task(&mut task); + } if task.status == Status::Canceled { canceled.insert(task.uid); canceled_by = task.canceled_by; @@ -348,6 +367,9 @@ impl IndexScheduler { } // In case of a failure we must get back and patch all the tasks with the error. Err(err) => { + // always persist failed batches + batch_made_progress = true; + #[cfg(test)] self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed); let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); @@ -371,7 +393,10 @@ impl IndexScheduler { task.status = Status::Failed; task.error = Some(error.clone()); task.details = task.details.map(|d| d.to_failed()); - processing_batch.update(&mut task); + processing_batch.update_from_task(&task); + if !matches!(task.status, Status::Processing | Status::Enqueued) { + processing_batch.finish_task(&mut task); + } #[cfg(test)] self.maybe_fail( @@ -394,44 +419,12 @@ impl IndexScheduler { let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } = process_batch_info; - processing_batch.stats.progress_trace = - progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect(); - processing_batch.stats.write_channel_congestion = congestion.map(|congestion| { - let mut congestion_info = serde_json::Map::new(); - congestion_info.insert("attempts".into(), congestion.attempts.into()); - congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into()); - congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into()); - congestion_info - }); - processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes - .iter() - .flat_map(|(dbname, pre_size)| { - post_commit_dabases_sizes - .get(dbname) - .map(|post_size| { - use std::cmp::Ordering::{Equal, Greater, Less}; - - use byte_unit::Byte; - use byte_unit::UnitType::Binary; - - let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary); - let diff_size = post_size.abs_diff(*pre_size) as u64; - let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary); - let sign = match post_size.cmp(pre_size) { - Equal => return None, - Greater => "+", - Less => "-", - }; - - Some(( - dbname.to_case(Case::Camel), - format!("{post:#.2} ({sign}{diff:#.2})").into(), - )) - }) - .into_iter() - .flatten() - }) - .collect(); + processing_batch.write_stats( + &progress, + congestion, + pre_commit_dabases_sizes, + post_commit_dabases_sizes, + ); if let Some(congestion) = congestion { tracing::debug!( @@ -444,46 +437,49 @@ impl IndexScheduler { tracing::debug!("call trace: {:?}", progress.accumulated_durations()); - self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; + if batch_made_progress { + self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; + } #[cfg(test)] self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?; wtxn.commit().map_err(Error::HeedTransaction)?; - // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task - // and then become « not found » for some time until the commit everything is written and the final commit is made. - self.processing_tasks.write().unwrap().stop_processing(); + if batch_made_progress { + // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task + // and then become « not found » for some time until the commit everything is written and the final commit is made. + self.processing_tasks.write().unwrap().stop_processing(); - // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart - tracing::debug!("Deleting the update files"); + // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart + tracing::debug!("Deleting the update files"); - //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap - let idx = AtomicU32::new(0); - (0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> { - let rtxn = self.read_txn()?; - while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) { - let task = self - .queue - .tasks - .get_task(&rtxn, id) - .map_err(|e| Error::UnrecoverableError(Box::new(e)))? - .ok_or(Error::CorruptedTaskQueue)?; - if let Err(e) = self.queue.delete_persisted_task_data(&task) { - tracing::error!( + //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap + let idx = AtomicU32::new(0); + (0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> { + let rtxn = self.read_txn()?; + while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) { + let task = self + .queue + .tasks + .get_task(&rtxn, id) + .map_err(|e| Error::UnrecoverableError(Box::new(e)))? + .ok_or(Error::CorruptedTaskQueue)?; + if let Err(e) = self.queue.delete_persisted_task_data(&task) { + tracing::error!( "Failure to delete the content files associated with task {}. Error: {e}", task.uid ); + } } - } - Ok(()) - })?; + Ok(()) + })?; - self.notify_webhooks(ids); + self.notify_webhooks(ids); + } #[cfg(test)] self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing); - if stop_scheduler_forever { Ok(TickOutcome::StopProcessingForever) } else { diff --git a/crates/index-scheduler/src/scheduler/process_batch.rs b/crates/index-scheduler/src/scheduler/process_batch.rs index 318b01736..966fca9aa 100644 --- a/crates/index-scheduler/src/scheduler/process_batch.rs +++ b/crates/index-scheduler/src/scheduler/process_batch.rs @@ -539,6 +539,10 @@ impl IndexScheduler { Ok((tasks, ProcessBatchInfo::default())) } + Batch::NetworkIndexBatch { network_task, inner_batch } => { + self.process_network_index_batch(network_task, inner_batch, current_batch, progress) + } + Batch::NetworkReady { task } => self.process_network_ready(task, progress), } } diff --git a/crates/index-scheduler/src/scheduler/process_export.rs b/crates/index-scheduler/src/scheduler/process_export.rs index 4d6211523..7a34e2d48 100644 --- a/crates/index-scheduler/src/scheduler/process_export.rs +++ b/crates/index-scheduler/src/scheduler/process_export.rs @@ -1,5 +1,6 @@ use std::collections::BTreeMap; use std::io::{self, Write as _}; +use std::ops::ControlFlow; use std::sync::atomic; use std::time::Duration; @@ -7,6 +8,7 @@ use backoff::ExponentialBackoff; use byte_unit::Byte; use flate2::write::GzEncoder; use flate2::Compression; +use meilisearch_types::error::Code; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; use meilisearch_types::milli::index::EmbeddingsWithMetadata; @@ -15,7 +17,11 @@ use meilisearch_types::milli::update::{request_threads, Setting}; use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError}; use meilisearch_types::settings::{self, SecretPolicy}; +use meilisearch_types::tasks::enterprise_edition::network::{ + headers, ImportData, ImportMetadata, Origin, +}; use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings}; +use roaring::RoaringBitmap; use serde::Deserialize; use ureq::{json, Response}; @@ -50,6 +56,7 @@ impl IndexScheduler { let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build(); let must_stop_processing = self.scheduler.must_stop_processing.clone(); for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() { + let err = |err| Error::from_milli(err, Some(uid.to_string())); if must_stop_processing.get() { return Err(Error::AbortedTask); } @@ -61,104 +68,31 @@ impl IndexScheduler { )); let ExportIndexSettings { filter, override_settings } = export_settings; + let index = self.index(uid)?; let index_rtxn = index.read_txn()?; - let bearer = api_key.map(|api_key| format!("Bearer {api_key}")); - - // First, check if the index already exists - let url = format!("{base_url}/indexes/{uid}"); - let response = retry(&must_stop_processing, || { - let mut request = agent.get(&url); - if let Some(bearer) = &bearer { - request = request.set("Authorization", bearer); - } - - request.send_bytes(Default::default()).map_err(into_backoff_error) - }); - let index_exists = match response { - Ok(response) => response.status() == 200, - Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => { - false - } - Err(e) => return Err(e), - }; - - let primary_key = index - .primary_key(&index_rtxn) - .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; - - // Create the index - if !index_exists { - let url = format!("{base_url}/indexes"); - retry(&must_stop_processing, || { - let mut request = agent.post(&url); - if let Some(bearer) = &bearer { - request = request.set("Authorization", bearer); - } - let index_param = json!({ "uid": uid, "primaryKey": primary_key }); - request.send_json(&index_param).map_err(into_backoff_error) - })?; - } - - // Patch the index primary key - if index_exists && *override_settings { - let url = format!("{base_url}/indexes/{uid}"); - retry(&must_stop_processing, || { - let mut request = agent.patch(&url); - if let Some(bearer) = &bearer { - request = request.set("Authorization", bearer); - } - let index_param = json!({ "primaryKey": primary_key }); - request.send_json(&index_param).map_err(into_backoff_error) - })?; - } - - // Send the index settings - if !index_exists || *override_settings { - let mut settings = - settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - // Remove the experimental chat setting if not enabled - if self.features().check_chat_completions("exporting chat settings").is_err() { - settings.chat = Setting::NotSet; - } - // Retry logic for sending settings - let url = format!("{base_url}/indexes/{uid}/settings"); - retry(&must_stop_processing, || { - let mut request = agent.patch(&url); - if let Some(bearer) = bearer.as_ref() { - request = request.set("Authorization", bearer); - } - request.send_json(settings.clone()).map_err(into_backoff_error) - })?; - } - - let filter = filter - .as_ref() - .map(Filter::from_json) - .transpose() - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? - .flatten(); - - let filter_universe = filter - .map(|f| f.evaluate(&index_rtxn, &index)) - .transpose() - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - let whole_universe = index - .documents_ids(&index_rtxn) - .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + let filter = filter.as_ref().map(Filter::from_json).transpose().map_err(err)?.flatten(); + let filter_universe = + filter.map(|f| f.evaluate(&index_rtxn, &index)).transpose().map_err(err)?; + let whole_universe = + index.documents_ids(&index_rtxn).map_err(milli::Error::from).map_err(err)?; let universe = filter_universe.unwrap_or(whole_universe); - - let fields_ids_map = index.fields_ids_map(&index_rtxn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - - // We don't need to keep this one alive as we will - // spawn many threads to process the documents - drop(index_rtxn); - - let total_documents = universe.len() as u32; - let (step, progress_step) = AtomicDocumentStep::new(total_documents); - progress.update_progress(progress_step); + let target = TargetInstance { base_url, api_key }; + let ctx = ExportContext { + index: &index, + index_rtxn: &index_rtxn, + universe: &universe, + progress: &progress, + agent: &agent, + must_stop_processing: &must_stop_processing, + }; + let options = ExportOptions { + index_uid: uid, + payload_size, + override_settings: *override_settings, + export_mode: ExportMode::ExportRoute, + }; + let total_documents = self.export_one_index(target, options, ctx)?; output.insert( IndexUidPattern::new_unchecked(uid.clone()), @@ -167,155 +101,415 @@ impl IndexScheduler { matched_documents: Some(total_documents as u64), }, ); - - let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB - let documents_url = format!("{base_url}/indexes/{uid}/documents"); - - let results = request_threads() - .broadcast(|ctx| { - let index_rtxn = index - .read_txn() - .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; - - let mut buffer = Vec::new(); - let mut tmp_buffer = Vec::new(); - let mut compressed_buffer = Vec::new(); - for (i, docid) in universe.iter().enumerate() { - if i % ctx.num_threads() != ctx.index() { - continue; - } - - let document = index - .document(&index_rtxn, docid) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - let mut document = obkv_to_json(&all_fields, &fields_ids_map, document) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - // TODO definitely factorize this code - 'inject_vectors: { - let embeddings = index - .embeddings(&index_rtxn, docid) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - if embeddings.is_empty() { - break 'inject_vectors; - } - - let vectors = document - .entry(RESERVED_VECTORS_FIELD_NAME) - .or_insert(serde_json::Value::Object(Default::default())); - - let serde_json::Value::Object(vectors) = vectors else { - return Err(Error::from_milli( - milli::Error::UserError( - milli::UserError::InvalidVectorsMapType { - document_id: { - if let Ok(Some(Ok(index))) = index - .external_id_of( - &index_rtxn, - std::iter::once(docid), - ) - .map(|it| it.into_iter().next()) - { - index - } else { - format!("internal docid={docid}") - } - }, - value: vectors.clone(), - }, - ), - Some(uid.to_string()), - )); - }; - - for ( - embedder_name, - EmbeddingsWithMetadata { embeddings, regenerate, has_fragments }, - ) in embeddings - { - let embeddings = ExplicitVectors { - embeddings: Some( - VectorOrArrayOfVectors::from_array_of_vectors(embeddings), - ), - regenerate: regenerate && - // Meilisearch does not handle well dumps with fragments, because as the fragments - // are marked as user-provided, - // all embeddings would be regenerated on any settings change or document update. - // To prevent this, we mark embeddings has non regenerate in this case. - !has_fragments, - }; - vectors.insert( - embedder_name, - serde_json::to_value(embeddings).unwrap(), - ); - } - } - - tmp_buffer.clear(); - serde_json::to_writer(&mut tmp_buffer, &document) - .map_err(milli::InternalError::from) - .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; - - // Make sure we put at least one document in the buffer even - // though we might go above the buffer limit before sending - if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit { - // We compress the documents before sending them - let mut encoder = - GzEncoder::new(&mut compressed_buffer, Compression::default()); - encoder - .write_all(&buffer) - .map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?; - encoder - .finish() - .map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?; - - retry(&must_stop_processing, || { - let mut request = agent.post(&documents_url); - request = request.set("Content-Type", "application/x-ndjson"); - request = request.set("Content-Encoding", "gzip"); - if let Some(bearer) = &bearer { - request = request.set("Authorization", bearer); - } - request.send_bytes(&compressed_buffer).map_err(into_backoff_error) - })?; - buffer.clear(); - compressed_buffer.clear(); - } - buffer.extend_from_slice(&tmp_buffer); - - if i > 0 && i % 100 == 0 { - step.fetch_add(100, atomic::Ordering::Relaxed); - } - } - - retry(&must_stop_processing, || { - let mut request = agent.post(&documents_url); - request = request.set("Content-Type", "application/x-ndjson"); - if let Some(bearer) = &bearer { - request = request.set("Authorization", bearer); - } - request.send_bytes(&buffer).map_err(into_backoff_error) - })?; - - Ok(()) - }) - .map_err(|e| { - Error::from_milli( - milli::Error::InternalError(InternalError::PanicInThreadPool(e)), - Some(uid.to_string()), - ) - })?; - for result in results { - result?; - } - - step.store(total_documents, atomic::Ordering::Relaxed); } Ok(output) } + + pub(super) fn export_one_index( + &self, + target: TargetInstance<'_>, + options: ExportOptions<'_>, + ctx: ExportContext<'_>, + ) -> Result { + let err = |err| Error::from_milli(err, Some(options.index_uid.to_string())); + + let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}")); + let url = format!( + "{base_url}/indexes/{index_uid}", + base_url = target.base_url, + index_uid = options.index_uid + ); + let response = retry(ctx.must_stop_processing, || { + let mut request = ctx.agent.get(&url); + if let Some(bearer) = &bearer { + request = request.set("Authorization", bearer); + } + + request.send_bytes(Default::default()).map_err(into_backoff_error) + }); + let index_exists = match response { + Ok(response) => response.status() == 200, + Err(Error::FromRemoteWhenExporting { code, .. }) + if code == Code::IndexNotFound.name() => + { + false + } + Err(e) => return Err(e), + }; + let primary_key = + ctx.index.primary_key(&ctx.index_rtxn).map_err(milli::Error::from).map_err(err)?; + if !index_exists { + let url = format!("{base_url}/indexes", base_url = target.base_url); + retry(ctx.must_stop_processing, || { + let mut request = ctx.agent.post(&url); + if let Some(bearer) = &bearer { + request = request.set("Authorization", bearer); + } + let index_param = json!({ "uid": options.index_uid, "primaryKey": primary_key }); + request.send_json(&index_param).map_err(into_backoff_error) + })?; + } + if index_exists && options.override_settings { + retry(ctx.must_stop_processing, || { + let mut request = ctx.agent.patch(&url); + if let Some(bearer) = &bearer { + request = request.set("Authorization", bearer); + } + let index_param = json!({ "primaryKey": primary_key }); + request.send_json(&index_param).map_err(into_backoff_error) + })?; + } + if !index_exists || options.override_settings { + /// TODO: attach a version to the settings + let mut settings = + settings::settings(&ctx.index, &ctx.index_rtxn, SecretPolicy::RevealSecrets) + .map_err(err)?; + // Remove the experimental chat setting if not enabled + if self.features().check_chat_completions("exporting chat settings").is_err() { + settings.chat = Setting::NotSet; + } + // Retry logic for sending settings + let url = format!( + "{base_url}/indexes/{index_uid}/settings", + base_url = target.base_url, + index_uid = options.index_uid + ); + retry(ctx.must_stop_processing, || { + let mut request = ctx.agent.patch(&url); + if let Some(bearer) = bearer.as_ref() { + request = request.set("Authorization", bearer); + } + request.send_json(settings.clone()).map_err(into_backoff_error) + })?; + } + + let fields_ids_map = ctx.index.fields_ids_map(&ctx.index_rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + let total_documents = ctx.universe.len() as u32; + let (step, progress_step) = AtomicDocumentStep::new(total_documents); + ctx.progress.update_progress(progress_step); + + let limit = options.payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); + let documents_url = format!( + "{base_url}/indexes/{index_uid}/documents", + base_url = target.base_url, + index_uid = options.index_uid + ); + + // no document to send, but we must still send a task when performing network balancing + if ctx.universe.is_empty() { + if let ExportMode::NetworkBalancing { + index_count, + export_old_remote_name, + network_change_origin, + } = options.export_mode + { + let mut compressed_buffer = Vec::new(); + // ignore control flow, we're returning anyway + let _ = send_buffer( + &[b' '], // needs something otherwise meili complains about missing payload + &mut compressed_buffer, + ctx.must_stop_processing, + ctx.agent, + &documents_url, + bearer.as_deref(), + Some(&( + ImportData { + remote_name: export_old_remote_name.to_string(), + index_name: options.index_uid.to_string(), + document_count: 0, + }, + network_change_origin.clone(), + ImportMetadata { + index_count, + task_key: 0, + total_index_documents: ctx.universe.len(), + }, + )), + &err, + )?; + } + return Ok(0); + } + + let results = request_threads() + .broadcast(|broadcast| { + let mut task_network = if let ExportMode::NetworkBalancing { + index_count, + export_old_remote_name, + network_change_origin, + } = options.export_mode + { + Some(( + ImportData { + remote_name: export_old_remote_name.to_string(), + index_name: options.index_uid.to_string(), + document_count: 0, + }, + network_change_origin.clone(), + ImportMetadata { + index_count, + task_key: 0, + total_index_documents: ctx.universe.len(), + }, + )) + } else { + None + }; + + let index_rtxn = ctx.index.read_txn().map_err(milli::Error::from).map_err(err)?; + + let mut buffer = Vec::new(); + let mut tmp_buffer = Vec::new(); + let mut compressed_buffer = Vec::new(); + for (i, docid) in ctx.universe.iter().enumerate() { + if i % broadcast.num_threads() != broadcast.index() { + continue; + } + if let Some((import_data, _, metadata)) = &mut task_network { + import_data.document_count += 1; + metadata.task_key = docid; + } + + let document = ctx.index.document(&index_rtxn, docid).map_err(err)?; + + let mut document = + obkv_to_json(&all_fields, &fields_ids_map, document).map_err(err)?; + + // TODO definitely factorize this code + 'inject_vectors: { + let embeddings = ctx.index.embeddings(&index_rtxn, docid).map_err(err)?; + + if embeddings.is_empty() { + break 'inject_vectors; + } + + let vectors = document + .entry(RESERVED_VECTORS_FIELD_NAME) + .or_insert(serde_json::Value::Object(Default::default())); + + let serde_json::Value::Object(vectors) = vectors else { + return Err(err(milli::Error::UserError( + milli::UserError::InvalidVectorsMapType { + document_id: { + if let Ok(Some(Ok(index))) = ctx + .index + .external_id_of(&index_rtxn, std::iter::once(docid)) + .map(|it| it.into_iter().next()) + { + index + } else { + format!("internal docid={docid}") + } + }, + value: vectors.clone(), + }, + ))); + }; + + for ( + embedder_name, + EmbeddingsWithMetadata { embeddings, regenerate, has_fragments }, + ) in embeddings + { + let embeddings = ExplicitVectors { + embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( + embeddings, + )), + regenerate: regenerate && + // Meilisearch does not handle well dumps with fragments, because as the fragments + // are marked as user-provided, + // all embeddings would be regenerated on any settings change or document update. + // To prevent this, we mark embeddings has non regenerate in this case. + !has_fragments, + }; + vectors + .insert(embedder_name, serde_json::to_value(embeddings).unwrap()); + } + } + + tmp_buffer.clear(); + serde_json::to_writer(&mut tmp_buffer, &document) + .map_err(milli::InternalError::from) + .map_err(milli::Error::from) + .map_err(err)?; + + // Make sure we put at least one document in the buffer even + // though we might go above the buffer limit before sending + if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit { + let control_flow = send_buffer( + &buffer, + &mut compressed_buffer, + &ctx.must_stop_processing, + &ctx.agent, + &documents_url, + bearer.as_deref(), + task_network.as_ref(), + &err, + )?; + buffer.clear(); + compressed_buffer.clear(); + if let Some((import_data, _, metadata)) = &mut task_network { + import_data.document_count = 0; + metadata.task_key = 0; + } + if control_flow.is_break() { + return Ok(()); + } + } + buffer.extend_from_slice(&tmp_buffer); + + if i > 0 && i % 100 == 0 { + step.fetch_add(100, atomic::Ordering::Relaxed); + } + } + + // send the last buffered documents if any + if !buffer.is_empty() { + // ignore control flow here + let _ = send_buffer( + &buffer, + &mut compressed_buffer, + ctx.must_stop_processing, + ctx.agent, + &documents_url, + bearer.as_deref(), + task_network.as_ref(), + &err, + )?; + } + + Ok(()) + }) + .map_err(|e| err(milli::Error::InternalError(InternalError::PanicInThreadPool(e))))?; + for result in results { + result?; + } + step.store(total_documents, atomic::Ordering::Relaxed); + Ok(total_documents as u64) + } + + pub(super) fn export_no_index( + &self, + target: TargetInstance<'_>, + export_old_remote_name: &str, + network_change_origin: &Origin, + agent: &ureq::Agent, + must_stop_processing: &MustStopProcessing, + ) -> Result<(), Error> { + let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}")); + let url = format!("{base_url}/network", base_url = target.base_url,); + + { + let _ = handle_response(retry(must_stop_processing, || { + let request = agent.patch(&url); + let mut request = set_network_ureq_headers( + request, + &ImportData { + remote_name: export_old_remote_name.to_string(), + index_name: "null".to_string(), + document_count: 0, + }, + &network_change_origin, + &ImportMetadata { index_count: 0, task_key: 0, total_index_documents: 0 }, + ); + request = request.set("Content-Type", "application/json"); + if let Some(bearer) = &bearer { + request = request.set("Authorization", bearer); + } + request + .send_json( + // empty payload that will be disregarded + serde_json::Value::Object(Default::default()), + ) + .map_err(into_backoff_error) + }))?; + } + + Ok(()) + } +} + +fn set_network_ureq_headers( + request: ureq::Request, + import_data: &ImportData, + origin: &Origin, + metadata: &ImportMetadata, +) -> ureq::Request { + request + .set(headers::PROXY_ORIGIN_REMOTE_HEADER, &origin.remote_name) + .set(headers::PROXY_ORIGIN_TASK_UID_HEADER, &origin.task_uid.to_string()) + .set(headers::PROXY_ORIGIN_NETWORK_VERSION_HEADER, &origin.network_version.to_string()) + .set(headers::PROXY_IMPORT_REMOTE_HEADER, &import_data.remote_name) + .set(headers::PROXY_IMPORT_INDEX_HEADER, &import_data.index_name) + .set(headers::PROXY_IMPORT_TASK_KEY_HEADER, &metadata.task_key.to_string()) + .set(headers::PROXY_IMPORT_DOCS_HEADER, &import_data.document_count.to_string()) + .set(headers::PROXY_IMPORT_INDEX_COUNT_HEADER, &metadata.index_count.to_string()) + .set( + headers::PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, + &metadata.total_index_documents.to_string(), + ) +} + +fn send_buffer<'a, 'b>( + buffer: &'a [u8], + mut compressed_buffer: &'a mut Vec, + must_stop_processing: &MustStopProcessing, + agent: &ureq::Agent, + documents_url: &'a str, + bearer: Option<&'a str>, + task_network: Option<&(ImportData, Origin, ImportMetadata)>, + err: &'a impl Fn(milli::Error) -> crate::Error, +) -> Result> { + // We compress the documents before sending them + let mut encoder: GzEncoder<&mut &mut Vec> = + GzEncoder::new(&mut compressed_buffer, Compression::default()); + encoder.write_all(&buffer).map_err(milli::Error::from).map_err(err)?; + encoder.finish().map_err(milli::Error::from).map_err(err)?; + + let res = retry(must_stop_processing, || { + let mut request = agent.post(documents_url); + request = request.set("Content-Type", "application/x-ndjson"); + request = request.set("Content-Encoding", "gzip"); + if let Some(bearer) = bearer { + request = request.set("Authorization", bearer); + } + if let Some((import_data, origin, metadata)) = task_network { + request = set_network_ureq_headers(request, import_data, origin, metadata); + } + request.send_bytes(&compressed_buffer).map_err(into_backoff_error) + }); + + handle_response(res) +} + +fn handle_response(res: Result) -> Result> { + match res { + Ok(_response) => Ok(ControlFlow::Continue(())), + Err(Error::FromRemoteWhenExporting { code, .. }) + if code == Code::ImportTaskAlreadyReceived.name() => + { + Ok(ControlFlow::Continue(())) + } + Err(Error::FromRemoteWhenExporting { code, message, .. }) + if code == Code::ImportTaskUnknownRemote.name() => + { + tracing::warn!("remote answered with: {message}"); + Ok(ControlFlow::Break(())) + } + // note: there has already been many attempts to get this due to exponential backoff + Err(Error::FromRemoteWhenExporting { code, message, .. }) + if code == Code::ImportTaskWithoutNetworkTask.name() => + { + tracing::warn!("remote answered with: {message}"); + Ok(ControlFlow::Break(())) + } + Err(e) => { + tracing::warn!("error while exporting: {e}"); + return Err(e); + } + } } fn retry(must_stop_processing: &MustStopProcessing, send_request: F) -> Result @@ -374,4 +568,37 @@ fn ureq_error_into_error(error: ureq::Error) -> Error { } } +// export_one_index arguments +pub(super) struct TargetInstance<'a> { + pub(super) base_url: &'a str, + pub(super) api_key: Option<&'a str>, +} + +pub(super) struct ExportOptions<'a> { + pub(super) index_uid: &'a str, + pub(super) payload_size: Option<&'a Byte>, + pub(super) override_settings: bool, + pub(super) export_mode: ExportMode<'a>, +} + +pub(super) struct ExportContext<'a> { + pub(super) index: &'a meilisearch_types::milli::Index, + pub(super) index_rtxn: &'a milli::heed::RoTxn<'a>, + pub(super) universe: &'a RoaringBitmap, + pub(super) progress: &'a Progress, + pub(super) agent: &'a ureq::Agent, + pub(super) must_stop_processing: &'a MustStopProcessing, +} + +pub(super) enum ExportMode<'a> { + ExportRoute, + NetworkBalancing { + index_count: u64, + + export_old_remote_name: &'a str, + network_change_origin: &'a Origin, + }, +} + +// progress related enum ExportIndex {} diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs index 18799027c..138791b78 100644 --- a/crates/index-scheduler/src/utils.rs +++ b/crates/index-scheduler/src/utils.rs @@ -4,9 +4,11 @@ use std::collections::{BTreeSet, HashSet}; use std::ops::Bound; use std::sync::Arc; +use convert_case::{Case, Casing as _}; use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats}; use meilisearch_types::heed::{Database, RoTxn, RwTxn}; -use meilisearch_types::milli::CboRoaringBitmapCodec; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::{CboRoaringBitmapCodec, ChannelCongestion}; use meilisearch_types::task_view::DetailsView; use meilisearch_types::tasks::{ BatchStopReason, Details, IndexSwap, Kind, KindWithContent, Status, @@ -119,17 +121,8 @@ impl ProcessingBatch { self.stats.total_nb_tasks = 0; } - /// Update the timestamp of the tasks and the inner structure of this structure. - pub fn update(&mut self, task: &mut Task) { - // We must re-set this value in case we're dealing with a task that has been added between - // the `processing` and `finished` state - // We must re-set this value in case we're dealing with a task that has been added between - // the `processing` and `finished` state or that failed. - task.batch_uid = Some(self.uid); - // Same - task.started_at = Some(self.started_at); - task.finished_at = self.finished_at; - + /// Update batch task from a processed task + pub fn update_from_task(&mut self, task: &Task) { self.statuses.insert(task.status); // Craft an aggregation of the details of all the tasks encountered in this batch. @@ -144,6 +137,63 @@ impl ProcessingBatch { } } + /// Update the timestamp of the tasks after they're done + pub fn finish_task(&self, task: &mut Task) { + // We must re-set this value in case we're dealing with a task that has been added between + // the `processing` and `finished` state or that failed. + task.batch_uid = Some(self.uid); + // Same + task.started_at = Some(self.started_at); + task.finished_at = self.finished_at; + } + + pub fn write_stats( + &mut self, + progress: &Progress, + congestion: Option, + pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>, + post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>, + ) { + self.stats.progress_trace = + progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect(); + self.stats.write_channel_congestion = congestion.map(|congestion| { + let mut congestion_info = serde_json::Map::new(); + congestion_info.insert("attempts".into(), congestion.attempts.into()); + congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into()); + congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into()); + congestion_info + }); + self.stats.internal_database_sizes = pre_commit_dabases_sizes + .iter() + .flat_map(|(dbname, pre_size)| { + post_commit_dabases_sizes + .get(dbname) + .map(|post_size| { + use std::cmp::Ordering::{Equal, Greater, Less}; + + use byte_unit::Byte; + use byte_unit::UnitType::Binary; + + let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary); + let diff_size = post_size.abs_diff(*pre_size) as u64; + let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary); + let sign = match post_size.cmp(pre_size) { + Equal => return None, + Greater => "+", + Less => "-", + }; + + Some(( + dbname.to_case(Case::Camel), + format!("{post:#.2} ({sign}{diff:#.2})").into(), + )) + }) + .into_iter() + .flatten() + }) + .collect(); + } + pub fn to_batch(&self) -> Batch { Batch { uid: self.uid, @@ -286,6 +336,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) { | K::DumpCreation { .. } | K::Export { .. } | K::UpgradeDatabase { .. } + | K::NetworkTopologyChange(_) | K::SnapshotCreation => (), }; if let Some(Details::IndexSwap { swaps }) = &mut task.details { @@ -627,6 +678,13 @@ impl crate::IndexScheduler { } => { assert_eq!(kind.as_kind(), Kind::IndexCompaction); } + Details::NetworkTopologyChange { + moved_documents: _, + received_documents: _, + message: _, + } => { + assert_eq!(kind.as_kind(), Kind::NetworkTopologyChange); + } } } diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml index f3279a094..6dbbd6bb7 100644 --- a/crates/meilisearch-types/Cargo.toml +++ b/crates/meilisearch-types/Cargo.toml @@ -24,6 +24,7 @@ enum-iterator = "2.1.0" file-store = { path = "../file-store" } flate2 = "1.1.2" fst = "0.4.7" +itertools = "0.14.0" memmap2 = "0.9.7" milli = { path = "../milli" } roaring = { version = "0.10.12", features = ["serde"] } diff --git a/crates/meilisearch-types/src/enterprise_edition/network.rs b/crates/meilisearch-types/src/enterprise_edition/network.rs index 9d5c51e25..576503b1d 100644 --- a/crates/meilisearch-types/src/enterprise_edition/network.rs +++ b/crates/meilisearch-types/src/enterprise_edition/network.rs @@ -7,6 +7,7 @@ use std::collections::BTreeMap; use milli::update::new::indexer::enterprise_edition::sharding::Shards; use serde::{Deserialize, Serialize}; +use uuid::Uuid; #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] #[serde(rename_all = "camelCase")] @@ -16,20 +17,18 @@ pub struct Network { #[serde(default)] pub remotes: BTreeMap, #[serde(default)] - pub sharding: bool, + pub leader: Option, + #[serde(default)] + pub version: Uuid, } impl Network { pub fn shards(&self) -> Option { - if self.sharding { - let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`"); - let others = self - .remotes - .keys() - .filter(|name| name.as_str() != this) - .map(|name| name.to_owned()) - .collect(); - Some(Shards { own: vec![this.to_owned()], others }) + if self.leader.is_some() { + Some(Shards::from_remotes_local( + self.remotes.keys().map(String::as_str), + self.local.as_deref(), + )) } else { None } diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index 408359a01..8f78dfd13 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -156,7 +156,7 @@ macro_rules! make_error_codes { } /// return error name, used as error code - fn name(&self) -> String { + pub fn name(&self) -> String { match self { $( Code::$code_ident => stringify!($code_ident).to_case(convert_case::Case::Snake) @@ -214,6 +214,9 @@ ImmutableApiKeyUid , InvalidRequest , BAD_REQU ImmutableApiKeyUpdatedAt , InvalidRequest , BAD_REQUEST; ImmutableIndexCreatedAt , InvalidRequest , BAD_REQUEST; ImmutableIndexUpdatedAt , InvalidRequest , BAD_REQUEST; +ImportTaskAlreadyReceived , InvalidRequest , PRECONDITION_FAILED; +ImportTaskUnknownRemote , InvalidRequest , PRECONDITION_FAILED; +ImportTaskWithoutNetworkTask , InvalidRequest , SERVICE_UNAVAILABLE; IndexAlreadyExists , InvalidRequest , CONFLICT ; IndexCreationFailed , Internal , INTERNAL_SERVER_ERROR; IndexNotFound , InvalidRequest , NOT_FOUND; @@ -270,9 +273,9 @@ InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQU InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; +InvalidNetworkLeader , InvalidRequest , BAD_REQUEST ; InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ; InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ; -InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ; InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ; InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ; InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ; @@ -377,7 +380,9 @@ MissingPayload , InvalidRequest , BAD_REQU MissingSearchHybrid , InvalidRequest , BAD_REQUEST ; MissingSwapIndexes , InvalidRequest , BAD_REQUEST ; MissingTaskFilters , InvalidRequest , BAD_REQUEST ; +NetworkVersionMismatch , InvalidRequest , PRECONDITION_FAILED ; NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY; +NotLeader , InvalidRequest , BAD_REQUEST ; PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ; RemoteBadResponse , System , BAD_GATEWAY ; RemoteBadRequest , InvalidRequest , BAD_REQUEST ; @@ -391,6 +396,8 @@ TaskFileNotFound , InvalidRequest , NOT_FOUN BatchNotFound , InvalidRequest , NOT_FOUND ; TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ; TooManyVectors , InvalidRequest , BAD_REQUEST ; +UnexpectedNetworkPreviousRemotes , InvalidRequest , BAD_REQUEST ; +NetworkVersionTooOld , InvalidRequest , BAD_REQUEST ; UnretrievableDocument , Internal , BAD_REQUEST ; UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; diff --git a/crates/meilisearch-types/src/task_view.rs b/crates/meilisearch-types/src/task_view.rs index 38b236dc7..6b7290fdb 100644 --- a/crates/meilisearch-types/src/task_view.rs +++ b/crates/meilisearch-types/src/task_view.rs @@ -9,9 +9,9 @@ use utoipa::ToSchema; use crate::batches::BatchId; use crate::error::ResponseError; use crate::settings::{Settings, Unchecked}; +use crate::tasks::enterprise_edition::network::DbTaskNetwork; use crate::tasks::{ serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId, - TaskNetwork, }; #[derive(Debug, Clone, PartialEq, Serialize, ToSchema)] @@ -54,7 +54,7 @@ pub struct TaskView { pub finished_at: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub network: Option, + pub network: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub custom_metadata: Option, @@ -151,6 +151,11 @@ pub struct DetailsView { pub pre_compaction_size: Option, #[serde(skip_serializing_if = "Option::is_none")] pub post_compaction_size: Option, + // network topology change + #[serde(skip_serializing_if = "Option::is_none")] + pub moved_documents: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, } impl DetailsView { @@ -161,6 +166,17 @@ impl DetailsView { (None, Some(doc)) | (Some(doc), None) => Some(doc), (Some(left), Some(right)) => Some(left + right), }, + moved_documents: match (self.moved_documents, other.moved_documents) { + (None, None) => None, + (None, Some(doc)) | (Some(doc), None) => Some(doc), + (Some(left), Some(right)) => Some(left + right), + }, + message: match (&mut self.message, &other.message) { + (None, None) => None, + (None, Some(message)) => Some(message.clone()), + (Some(message), None) => Some(std::mem::take(message)), + (Some(message), Some(_)) => Some(std::mem::take(message)), + }, indexed_documents: match (self.indexed_documents, other.indexed_documents) { (None, None) => None, (None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None), @@ -451,6 +467,14 @@ impl From
for DetailsView { ..Default::default() } } + Details::NetworkTopologyChange { moved_documents, received_documents, message } => { + DetailsView { + moved_documents: Some(moved_documents), + received_documents: Some(received_documents), + message: Some(message), + ..Default::default() + } + } } } } diff --git a/crates/meilisearch-types/src/tasks/enterprise_edition/mod.rs b/crates/meilisearch-types/src/tasks/enterprise_edition/mod.rs new file mode 100644 index 000000000..47047de48 --- /dev/null +++ b/crates/meilisearch-types/src/tasks/enterprise_edition/mod.rs @@ -0,0 +1,6 @@ +// Copyright © 2025 Meilisearch Some Rights Reserved +// This file is part of Meilisearch Enterprise Edition (EE). +// Use of this source code is governed by the Business Source License 1.1, +// as found in the LICENSE-EE file or at + +pub mod network; diff --git a/crates/meilisearch-types/src/tasks/enterprise_edition/network.rs b/crates/meilisearch-types/src/tasks/enterprise_edition/network.rs new file mode 100644 index 000000000..bf4e0ddfb --- /dev/null +++ b/crates/meilisearch-types/src/tasks/enterprise_edition/network.rs @@ -0,0 +1,663 @@ +// Copyright © 2025 Meilisearch Some Rights Reserved +// This file is part of Meilisearch Enterprise Edition (EE). +// Use of this source code is governed by the Business Source License 1.1, +// as found in the LICENSE-EE file or at + +use std::collections::{BTreeMap, BTreeSet}; + +use itertools::{EitherOrBoth, Itertools as _}; +use milli::DocumentId; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; +use uuid::Uuid; + +use crate::enterprise_edition::network::{Network, Remote}; +use crate::error::ResponseError; +use crate::tasks::{Details, TaskId}; + +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] +#[serde(untagged, rename_all = "camelCase")] +// This type is used in the database, care should be taken when modifying it. +pub enum DbTaskNetwork { + /// Tasks that were duplicated from `origin` + Origin { origin: Origin }, + /// Tasks that were duplicated as `remote_tasks` + Remotes { + remote_tasks: BTreeMap, + #[serde(default)] + network_version: Uuid, + }, + /// Document import tasks sent in the context of `network_change` + Import { import_from: ImportData, network_change: Origin }, +} + +impl DbTaskNetwork { + pub fn network_version(&self) -> Uuid { + match self { + DbTaskNetwork::Origin { origin } => origin.network_version, + DbTaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version, + DbTaskNetwork::Import { import_from: _, network_change } => { + network_change.network_version + } + } + } + + pub fn import_data(&self) -> Option<&ImportData> { + match self { + DbTaskNetwork::Origin { .. } | DbTaskNetwork::Remotes { .. } => None, + DbTaskNetwork::Import { import_from, .. } => Some(import_from), + } + } + + pub fn origin(&self) -> Option<&Origin> { + match self { + DbTaskNetwork::Origin { origin } => Some(origin), + DbTaskNetwork::Remotes { .. } => None, + DbTaskNetwork::Import { network_change, .. } => Some(network_change), + } + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum TaskNetwork { + /// Tasks that were duplicated from `origin` + Origin { origin: Origin }, + /// Tasks that were duplicated as `remote_tasks` + Remotes { remote_tasks: BTreeMap, network_version: Uuid }, + /// Document import tasks sent in the context of `network_change` + Import { import_from: ImportData, network_change: Origin, metadata: ImportMetadata }, +} + +impl TaskNetwork { + pub fn network_version(&self) -> Uuid { + match self { + TaskNetwork::Origin { origin } => origin.network_version, + TaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version, + TaskNetwork::Import { import_from: _, network_change, metadata: _ } => { + network_change.network_version + } + } + } +} + +impl From for DbTaskNetwork { + fn from(value: TaskNetwork) -> Self { + match value { + TaskNetwork::Origin { origin } => DbTaskNetwork::Origin { origin }, + TaskNetwork::Remotes { remote_tasks, network_version } => { + DbTaskNetwork::Remotes { remote_tasks, network_version } + } + TaskNetwork::Import { import_from, network_change, metadata: _ } => { + DbTaskNetwork::Import { import_from, network_change } + } + } + } +} + +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct Origin { + pub remote_name: String, + pub task_uid: u32, + pub network_version: Uuid, +} + +/// Import data stored in a task +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ImportData { + /// Remote that this task is imported from + pub remote_name: String, + /// Index relevant to this task + pub index_name: String, + /// Number of documents in this task + pub document_count: u64, +} + +/// Import metadata associated with a task but not stored in the task +#[derive(Debug, PartialEq, Clone)] +pub struct ImportMetadata { + /// Total number of indexes to import from this host + pub index_count: u64, + /// Key unique to this (network_change, index, host, key). + /// + /// In practice, an internal document id of one of the documents to import. + pub task_key: DocumentId, + /// Total number of documents to import for this index from this host. + pub total_index_documents: u64, +} + +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RemoteTask { + #[serde(skip_serializing_if = "Option::is_none")] + task_uid: Option, + error: Option, +} + +impl From> for RemoteTask { + fn from(res: Result) -> RemoteTask { + match res { + Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None }, + Err(err) => RemoteTask { task_uid: None, error: Some(err) }, + } + } +} + +/// Contains the full state of a network topology change. +/// +/// A network topology change task is unique in that it can be processed in multiple different batches, as its resolution +/// depends on various document additions tasks being processed. +/// +/// A network topology task has 4 states: +/// +/// 1. Processing any task that was meant for an earlier version of the network. This is necessary to know that we have the right version of +/// documents. +/// 2. Sending all documents that must be moved to other remotes. +/// 3. Processing any task coming from the remotes. +/// 4. Finished. +/// +/// Furthermore, it maintains some stats +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NetworkTopologyChange { + state: NetworkTopologyState, + // in name, `None` if the node is no longer part of the network + #[serde(skip_serializing_if = "Option::is_none")] + in_name: Option, + // out name, `None` if the node is new to the network + #[serde(skip_serializing_if = "Option::is_none")] + out_name: Option, + out_remotes: BTreeMap, + in_remotes: BTreeMap, + stats: NetworkTopologyStats, +} + +impl NetworkTopologyChange { + pub fn new(old_network: Network, new_network: Network) -> Self { + // we use our old name as export name + let out_name = old_network.local; + // we use our new name as import name + let in_name = new_network.local; + // we export to the new network + let mut out_remotes = new_network.remotes; + // don't export to ourselves + if let Some(in_name) = &in_name { + out_remotes.remove(in_name); + } + let in_remotes = old_network + .remotes + .into_keys() + // don't await imports from ourselves + .filter(|name| Some(name.as_str()) != out_name.as_deref()) + .map(|name| (name, InRemote::new())) + .collect(); + Self { + state: NetworkTopologyState::WaitingForOlderTasks, + in_name, + out_name, + out_remotes, + in_remotes, + stats: NetworkTopologyStats { received_documents: 0, moved_documents: 0 }, + } + } + + pub fn state(&self) -> NetworkTopologyState { + self.state + } + + pub fn out_name(&self) -> Option<&str> { + // unwrap: one of out name or in_name must be defined + self.out_name.as_deref() + } + + pub fn in_name(&self) -> Option<&str> { + self.in_name.as_deref() + } + + pub fn export_to_process(&self) -> Option<(&BTreeMap, &str)> { + if self.state != NetworkTopologyState::ExportingDocuments { + return None; + } + + if self.out_remotes.is_empty() { + return None; + } + + let out_name = self.out_name()?; + Some((&self.out_remotes, out_name)) + } + + /// Compute the next state from the current state of the task. + pub fn update_state(&mut self) { + self.state = match self.state { + NetworkTopologyState::WaitingForOlderTasks => { + // no more older tasks, so finished waiting + NetworkTopologyState::ExportingDocuments + } + NetworkTopologyState::ExportingDocuments => { + // processed all exported documents + if self.is_import_finished() { + NetworkTopologyState::Finished + } else { + NetworkTopologyState::ImportingDocuments + } + } + NetworkTopologyState::ImportingDocuments => { + if self.is_import_finished() { + NetworkTopologyState::Finished + } else { + NetworkTopologyState::ImportingDocuments + } + } + NetworkTopologyState::Finished => NetworkTopologyState::Finished, + }; + } + + pub fn receive_remote_task( + &mut self, + remote_name: &str, + index_name: &str, + task_key: DocumentId, + document_count: u64, + total_indexes: u64, + total_index_documents: u64, + ) -> Result<(), ReceiveTaskError> { + let remote = self + .in_remotes + .get_mut(remote_name) + .ok_or_else(|| ReceiveTaskError::UnknownRemote(remote_name.to_string()))?; + remote.import_state = match std::mem::take(&mut remote.import_state) { + ImportState::WaitingForInitialTask => { + if total_indexes == 0 { + ImportState::Finished { total_indexes, total_documents: 0 } + } else { + let mut task_keys = BTreeSet::new(); + task_keys.insert(task_key); + let mut import_index_state = BTreeMap::new(); + import_index_state.insert( + index_name.to_owned(), + ImportIndexState::Ongoing { + total_documents: total_index_documents, + received_documents: document_count, + task_keys, + processed_documents: 0, + }, + ); + ImportState::Ongoing { import_index_state, total_indexes } + } + } + ImportState::Ongoing { mut import_index_state, total_indexes } => { + if let Some((index_name, mut index_state)) = + import_index_state.remove_entry(index_name) + { + index_state = match index_state { + ImportIndexState::Ongoing { + total_documents, + received_documents: previously_received, + processed_documents, + mut task_keys, + } => { + if !task_keys.insert(task_key) { + return Err(ReceiveTaskError::DuplicateTask(task_key)); + } + + ImportIndexState::Ongoing { + total_documents, + received_documents: previously_received + document_count, + processed_documents, + task_keys, + } + } + ImportIndexState::Finished { total_documents } => { + ImportIndexState::Finished { total_documents } + } + }; + import_index_state.insert(index_name, index_state); + } else { + let mut task_keys = BTreeSet::new(); + task_keys.insert(task_key); + let state = ImportIndexState::Ongoing { + total_documents: total_index_documents, + received_documents: document_count, + processed_documents: 0, + task_keys, + }; + import_index_state.insert(index_name.to_string(), state); + } + ImportState::Ongoing { import_index_state, total_indexes: total_indexes } + } + ImportState::Finished { total_indexes, total_documents } => { + ImportState::Finished { total_indexes, total_documents } + } + }; + Ok(()) + } + + pub fn process_remote_tasks( + &mut self, + remote_name: &str, + index_name: &str, + document_count: u64, + ) { + /// FIXME: unwraps and panics + let remote = self.in_remotes.get_mut(remote_name).unwrap(); + remote.import_state = match std::mem::take(&mut remote.import_state) { + ImportState::WaitingForInitialTask => panic!("no task received yet one processed"), + ImportState::Ongoing { mut import_index_state, total_indexes } => { + let (index_name, mut index_state) = + import_index_state.remove_entry(index_name).unwrap(); + index_state = match index_state { + ImportIndexState::Ongoing { + total_documents, + received_documents, + processed_documents: previously_processed, + task_keys, + } => { + let newly_processed_documents = previously_processed + document_count; + if newly_processed_documents >= total_documents { + ImportIndexState::Finished { total_documents } + } else { + ImportIndexState::Ongoing { + total_documents, + received_documents, + processed_documents: newly_processed_documents, + task_keys, + } + } + } + ImportIndexState::Finished { total_documents } => { + ImportIndexState::Finished { total_documents } + } + }; + import_index_state.insert(index_name, index_state); + if import_index_state.len() as u64 == total_indexes + && import_index_state.values().all(|index| index.is_finished()) + { + let total_documents = + import_index_state.values().map(|index| index.total_documents()).sum(); + ImportState::Finished { total_indexes, total_documents } + } else { + ImportState::Ongoing { import_index_state, total_indexes } + } + } + ImportState::Finished { total_indexes, total_documents } => { + ImportState::Finished { total_indexes, total_documents } + } + } + } + + pub fn to_details(&self) -> Details { + let message = match self.state { + NetworkTopologyState::WaitingForOlderTasks => { + "Waiting for tasks enqueued before the network change to finish processing".into() + } + NetworkTopologyState::ExportingDocuments => "Exporting documents".into(), + NetworkTopologyState::ImportingDocuments => { + let mut finished_count = 0; + let mut first_ongoing = None; + let mut ongoing_total_indexes = 0; + let mut ongoing_processed_documents = 0; + let mut ongoing_missing_documents = 0; + let mut ongoing_total_documents = 0; + let mut other_ongoing_count = 0; + let mut first_waiting = None; + let mut other_waiting_count = 0; + for (remote_name, in_remote) in &self.in_remotes { + match &in_remote.import_state { + ImportState::WaitingForInitialTask => { + first_waiting = match first_waiting { + None => Some(remote_name), + first_waiting => { + other_waiting_count += 1; + first_waiting + } + }; + } + ImportState::Ongoing { import_index_state, total_indexes } => { + first_ongoing = match first_ongoing { + None => { + ongoing_total_indexes = *total_indexes; + Some(remote_name) + } + first_ongoing => { + other_ongoing_count += 1; + first_ongoing + } + }; + for import_state in import_index_state.values() { + match import_state { + ImportIndexState::Ongoing { + total_documents, + processed_documents, + received_documents, + task_keys: _, + } => { + ongoing_total_documents += total_documents; + ongoing_processed_documents += processed_documents; + ongoing_missing_documents += + total_documents.saturating_sub(*received_documents); + } + ImportIndexState::Finished { total_documents } => { + ongoing_total_documents += total_documents; + ongoing_processed_documents += total_documents; + } + } + } + } + ImportState::Finished { total_indexes, total_documents } => { + finished_count += 1; + ongoing_total_indexes = *total_indexes; + ongoing_total_documents += *total_documents; + ongoing_processed_documents += *total_documents; + } + } + } + format!( + "Importing documents from {total} remotes{waiting}{ongoing}{finished}", + total = self.in_remotes.len(), + waiting = if let Some(first_waiting) = first_waiting { + &format!( + ", waiting on first task from `{}`{others}", + first_waiting, + others = if other_waiting_count > 0 { + &format!(" and {other_waiting_count} other remotes") + } else { + "" + } + ) + } else { + "" + }, + ongoing = if let Some(first_ongoing) = first_ongoing { + &format!(", awaiting {ongoing_missing_documents} and processed {ongoing_processed_documents} out of {ongoing_total_documents} documents in {ongoing_total_indexes} indexes from `{first_ongoing}`{others}", + others=if other_ongoing_count > 0 {&format!(" and {other_ongoing_count} other remotes")} else {""}) + } else { + "" + }, + finished = if finished_count >= 0 { + &format!(", {finished_count} remotes finished processing") + } else { + "" + } + ) + } + NetworkTopologyState::Finished => "Finished".into(), + }; + Details::NetworkTopologyChange { + moved_documents: self.stats.moved_documents, + received_documents: self.stats.received_documents, + message, + } + } + + pub fn is_import_finished(&self) -> bool { + self.in_remotes.values().all(|remote| remote.is_finished()) + } + + pub fn merge(&mut self, other: NetworkTopologyChange) { + // The topology change has a guarantee of forward progress, so for each field we're going to keep the "most advanced" values. + let Self { state, in_name: _, out_name: _, out_remotes: _, in_remotes, stats } = self; + + *state = Ord::max(*state, other.state); + *stats = Ord::max(*stats, other.stats); + + for (old_value, new_value) in other.in_remotes.into_values().zip(in_remotes.values_mut()) { + new_value.import_state = match (old_value.import_state, std::mem::take(&mut new_value.import_state)) { + // waiting for initial task is always older + (ImportState::WaitingForInitialTask, newer) + | (newer, ImportState::WaitingForInitialTask) + + // finished is always newer + | (_, newer @ ImportState::Finished { .. }) + | (newer @ ImportState::Finished { .. }, _) => newer, + ( + ImportState::Ongoing { import_index_state: left_import, total_indexes: left_total_indexes }, + ImportState::Ongoing { import_index_state: right_import, total_indexes: right_total_indexes }, + ) => { + let import_index_state = left_import.into_iter().merge_join_by(right_import.into_iter(), |(k,_), (x, _)|k.cmp(x)).map(|eob| + match eob { + EitherOrBoth::Both((name, left), (_, right)) => { + let newer = merge_import_index_state(left, right); + (name, newer) + }, + EitherOrBoth::Left(import) | + EitherOrBoth::Right(import) => import, + } + ).collect(); + + ImportState::Ongoing{ import_index_state, total_indexes : u64::max(left_total_indexes, right_total_indexes) } + }, + } + } + } +} + +fn merge_import_index_state(left: ImportIndexState, right: ImportIndexState) -> ImportIndexState { + let newer = match (left, right) { + (_, newer @ ImportIndexState::Finished { .. }) => newer, + (newer @ ImportIndexState::Finished { .. }, _) => newer, + ( + ImportIndexState::Ongoing { + total_documents: left_total_documents, + received_documents: left_received_documents, + processed_documents: left_processed_documents, + task_keys: mut left_task_keys, + }, + ImportIndexState::Ongoing { + total_documents: right_total_documents, + received_documents: right_received_documents, + processed_documents: right_processed_documents, + task_keys: mut right_task_keys, + }, + ) => { + let total_documents = u64::max(left_total_documents, right_total_documents); + let received_documents = u64::max(left_received_documents, right_received_documents); + let processed_documents = u64::max(left_processed_documents, right_processed_documents); + left_task_keys.append(&mut right_task_keys); + let task_keys = left_task_keys; + + ImportIndexState::Ongoing { + total_documents, + received_documents, + processed_documents, + task_keys, + } + } + }; + newer +} + +pub enum ReceiveTaskError { + UnknownRemote(String), + DuplicateTask(DocumentId), +} + +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)] +#[serde(rename_all = "camelCase")] +pub enum NetworkTopologyState { + WaitingForOlderTasks, + ExportingDocuments, + ImportingDocuments, + Finished, +} + +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)] +#[serde(rename_all = "camelCase")] +pub struct NetworkTopologyStats { + pub received_documents: u64, + pub moved_documents: u64, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct InRemote { + import_state: ImportState, +} + +impl InRemote { + pub fn new() -> Self { + Self { import_state: ImportState::WaitingForInitialTask } + } + + pub fn is_finished(&self) -> bool { + matches!(self.import_state, ImportState::Finished { .. }) + } +} + +#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +enum ImportState { + /// Initially Meilisearch doesn't know how many documents it should expect from a remote. + /// The first task for each remote contains the information of how many indexes will be imported, + /// and the first task for each index contains the number of documents to import for that index. + #[default] + WaitingForInitialTask, + Ongoing { + import_index_state: BTreeMap, + total_indexes: u64, + }, + Finished { + total_indexes: u64, + total_documents: u64, + }, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +enum ImportIndexState { + Ongoing { + total_documents: u64, + received_documents: u64, + processed_documents: u64, + task_keys: BTreeSet, + }, + Finished { + total_documents: u64, + }, +} + +impl ImportIndexState { + pub fn is_finished(&self) -> bool { + matches!(self, ImportIndexState::Finished { .. }) + } + + fn total_documents(&self) -> u64 { + match *self { + ImportIndexState::Ongoing { total_documents, .. } + | ImportIndexState::Finished { total_documents } => total_documents, + } + } +} + +pub mod headers { + pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote"; + pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid"; + pub const PROXY_ORIGIN_NETWORK_VERSION_HEADER: &str = "Meili-Proxy-Origin-Network-Version"; + pub const PROXY_IMPORT_REMOTE_HEADER: &str = "Meili-Proxy-Import-Remote"; + pub const PROXY_IMPORT_INDEX_COUNT_HEADER: &str = "Meili-Proxy-Import-Index-Count"; + pub const PROXY_IMPORT_INDEX_HEADER: &str = "Meili-Proxy-Import-Index"; + pub const PROXY_IMPORT_TASK_KEY_HEADER: &str = "Meili-Proxy-Import-Task-Key"; + pub const PROXY_IMPORT_DOCS_HEADER: &str = "Meili-Proxy-Import-Docs"; + pub const PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER: &str = "Meili-Proxy-Import-Total-Index-Docs"; +} diff --git a/crates/meilisearch-types/src/tasks.rs b/crates/meilisearch-types/src/tasks/mod.rs similarity index 95% rename from crates/meilisearch-types/src/tasks.rs rename to crates/meilisearch-types/src/tasks/mod.rs index df8dd0f83..a497cf5f2 100644 --- a/crates/meilisearch-types/src/tasks.rs +++ b/crates/meilisearch-types/src/tasks/mod.rs @@ -23,6 +23,8 @@ use crate::{versioning, InstanceUid}; pub type TaskId = u32; +pub mod enterprise_edition; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Task { @@ -44,7 +46,7 @@ pub struct Task { pub kind: KindWithContent, #[serde(default, skip_serializing_if = "Option::is_none")] - pub network: Option, + pub network: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub custom_metadata: Option, @@ -61,6 +63,7 @@ impl Task { | TaskDeletion { .. } | Export { .. } | UpgradeDatabase { .. } + | NetworkTopologyChange { .. } | IndexSwap { .. } => None, DocumentAdditionOrUpdate { index_uid, .. } | DocumentEdition { index_uid, .. } @@ -99,6 +102,7 @@ impl Task { | KindWithContent::SnapshotCreation | KindWithContent::Export { .. } | KindWithContent::UpgradeDatabase { .. } + | KindWithContent::NetworkTopologyChange { .. } | KindWithContent::IndexCompaction { .. } => None, } } @@ -178,6 +182,7 @@ pub enum KindWithContent { IndexCompaction { index_uid: String, }, + NetworkTopologyChange(enterprise_edition::network::NetworkTopologyChange), } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] @@ -215,6 +220,7 @@ impl KindWithContent { KindWithContent::Export { .. } => Kind::Export, KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase, KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction, + KindWithContent::NetworkTopologyChange { .. } => Kind::NetworkTopologyChange, } } @@ -227,6 +233,7 @@ impl KindWithContent { | TaskCancelation { .. } | TaskDeletion { .. } | Export { .. } + | NetworkTopologyChange { .. } | UpgradeDatabase { .. } => vec![], DocumentAdditionOrUpdate { index_uid, .. } | DocumentEdition { index_uid, .. } @@ -340,6 +347,11 @@ impl KindWithContent { pre_compaction_size: None, post_compaction_size: None, }), + KindWithContent::NetworkTopologyChange { .. } => Some(Details::NetworkTopologyChange { + moved_documents: 0, + received_documents: 0, + message: "processing tasks for previous network versions".into(), + }), } } @@ -392,7 +404,7 @@ impl KindWithContent { }) } KindWithContent::IndexSwap { .. } => { - todo!() + unimplemented!("do not call `default_finished_details` for `IndexSwap` tasks") } KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation { matched_tasks: tasks.len(), @@ -427,6 +439,9 @@ impl KindWithContent { pre_compaction_size: None, post_compaction_size: None, }), + KindWithContent::NetworkTopologyChange(network_topology_change) => { + Some(network_topology_change.to_details()) + } } } } @@ -494,6 +509,9 @@ impl From<&KindWithContent> for Option
{ pre_compaction_size: None, post_compaction_size: None, }), + KindWithContent::NetworkTopologyChange(network_topology_change) => { + Some(network_topology_change.to_details()) + } } } } @@ -605,6 +623,7 @@ pub enum Kind { Export, UpgradeDatabase, IndexCompaction, + NetworkTopologyChange, } impl Kind { @@ -624,6 +643,7 @@ impl Kind { | Kind::DumpCreation | Kind::Export | Kind::UpgradeDatabase + | Kind::NetworkTopologyChange | Kind::SnapshotCreation => false, } } @@ -646,6 +666,7 @@ impl Display for Kind { Kind::Export => write!(f, "export"), Kind::UpgradeDatabase => write!(f, "upgradeDatabase"), Kind::IndexCompaction => write!(f, "indexCompaction"), + Kind::NetworkTopologyChange => write!(f, "networkTopologyChange"), } } } @@ -683,6 +704,8 @@ impl FromStr for Kind { Ok(Kind::UpgradeDatabase) } else if kind.eq_ignore_ascii_case("indexCompaction") { Ok(Kind::IndexCompaction) + } else if kind.eq_ignore_ascii_case("networkTopologyChange") { + Ok(Kind::NetworkTopologyChange) } else { Err(ParseTaskKindError(kind.to_owned())) } @@ -773,36 +796,11 @@ pub enum Details { pre_compaction_size: Option, post_compaction_size: Option, }, -} - -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] -#[serde(untagged, rename_all = "camelCase")] -pub enum TaskNetwork { - Origin { origin: Origin }, - Remotes { remote_tasks: BTreeMap }, -} -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct Origin { - pub remote_name: String, - pub task_uid: usize, -} - -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct RemoteTask { - #[serde(skip_serializing_if = "Option::is_none")] - task_uid: Option, - error: Option, -} - -impl From> for RemoteTask { - fn from(res: Result) -> RemoteTask { - match res { - Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None }, - Err(err) => RemoteTask { task_uid: None, error: Some(err) }, - } - } + NetworkTopologyChange { + moved_documents: u64, + received_documents: u64, + message: String, + }, } #[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] @@ -845,6 +843,9 @@ impl Details { | Self::Export { .. } | Self::UpgradeDatabase { .. } | Self::IndexSwap { .. } => (), + Self::NetworkTopologyChange { moved_documents: _, received_documents: _, message } => { + *message = format!("Failed. Previous status: {}", message); + } } details diff --git a/crates/meilisearch/src/error.rs b/crates/meilisearch/src/error.rs index 371e5c67d..4de438b81 100644 --- a/crates/meilisearch/src/error.rs +++ b/crates/meilisearch/src/error.rs @@ -6,8 +6,13 @@ use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; use meilisearch_types::milli; use meilisearch_types::milli::OrderBy; +use meilisearch_types::tasks::enterprise_edition::network::headers::{ + PROXY_IMPORT_DOCS_HEADER, PROXY_IMPORT_INDEX_COUNT_HEADER, PROXY_IMPORT_INDEX_HEADER, + PROXY_IMPORT_REMOTE_HEADER, PROXY_IMPORT_TASK_KEY_HEADER, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, +}; use serde_json::Value; use tokio::task::JoinError; +use uuid::Uuid; use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER}; @@ -93,8 +98,51 @@ pub enum MeilisearchHttpError { } else { PROXY_ORIGIN_TASK_UID_HEADER } )] InconsistentOriginHeaders { is_remote_missing: bool }, + #[error("Inconsistent `Import` headers: {remote}: {remote_status}, {index}: {index_status}, {docs}: {docs_status}.\n - Hint: either all three headers should be provided, or none of them", + remote = PROXY_IMPORT_REMOTE_HEADER, + remote_status = if *is_remote_missing { "missing" } else{ "provided" }, + index = PROXY_IMPORT_INDEX_HEADER, + index_status = if *is_index_missing { "missing" } else { "provided" }, + docs = PROXY_IMPORT_DOCS_HEADER, + docs_status = if *is_docs_missing { "missing" } else { "provided" } + )] + InconsistentImportHeaders { + is_remote_missing: bool, + is_index_missing: bool, + is_docs_missing: bool, + }, + #[error("Inconsistent `Import-Metadata` headers: {index_count}: {index_count_status}, {task_key}: {task_key_status}, {total_index_documents}: {total_index_documents_status}.\n - Hint: either all three headers should be provided, or none of them", + index_count = PROXY_IMPORT_INDEX_COUNT_HEADER, + index_count_status = if *is_index_count_missing { "missing" } else { "provided"}, + task_key = PROXY_IMPORT_TASK_KEY_HEADER, + task_key_status = if *is_task_key_missing { "missing" } else { "provided"}, + total_index_documents = PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, + total_index_documents_status = if *is_total_index_documents_missing { "missing" } else { "provided"}, + )] + InconsistentImportMetadataHeaders { + is_index_count_missing: bool, + is_task_key_missing: bool, + is_total_index_documents_missing: bool, + }, + + #[error( + "Inconsistent task network headers: origin headers: {origin_status}, import headers: {import_status}, import metadata: {import_metadata_status}", + origin_status = if *is_missing_origin { "missing"} else { "present" }, + import_status = if *is_missing_import { "missing"} else { "present" }, + import_metadata_status = if *is_missing_import_metadata { "missing"} else { "present" })] + InconsistentTaskNetworkHeaders { + is_missing_origin: bool, + is_missing_import: bool, + is_missing_import_metadata: bool, + }, #[error("Invalid value for header {header_name}: {msg}")] InvalidHeaderValue { header_name: &'static str, msg: String }, + #[error("This remote is not the leader of the network.\n - Note: only the leader `{leader}` can receive new tasks.")] + NotLeader { leader: String }, + #[error("Unexpected `previousRemotes` in network call.\n - Note: `previousRemote` is reserved for internal use.")] + UnexpectedNetworkPreviousRemotes, + #[error("The network version in request is too old.\n - Received: {received}\n - Expected at least: {expected_at_least}")] + NetworkVersionTooOld { received: Uuid, expected_at_least: Uuid }, } impl MeilisearchHttpError { @@ -142,10 +190,18 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::PersonalizationInFederatedQuery(_) => { Code::InvalidMultiSearchQueryPersonalization } - MeilisearchHttpError::InconsistentOriginHeaders { .. } => { + MeilisearchHttpError::InconsistentOriginHeaders { .. } + | MeilisearchHttpError::InconsistentImportHeaders { .. } + | MeilisearchHttpError::InconsistentImportMetadataHeaders { .. } + | MeilisearchHttpError::InconsistentTaskNetworkHeaders { .. } => { Code::InconsistentDocumentChangeHeaders } MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue, + MeilisearchHttpError::NotLeader { .. } => Code::NotLeader, + MeilisearchHttpError::UnexpectedNetworkPreviousRemotes => { + Code::UnexpectedNetworkPreviousRemotes + } + MeilisearchHttpError::NetworkVersionTooOld { .. } => Code::NetworkVersionTooOld, } } } diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index 673df64cc..8bbf4e3a2 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -45,7 +45,9 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::indexes::enterprise_edition::proxy::{proxy, Body}; +use crate::routes::indexes::enterprise_edition::proxy::{ + proxy, task_network_and_check_leader_and_version, Body, +}; use crate::routes::indexes::search::fix_sort_query_parameters; use crate::routes::{ get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, @@ -342,6 +344,7 @@ pub async fn delete_document( let DocumentParam { index_uid, document_id } = path.into_inner(); let index_uid = IndexUid::try_from(index_uid)?; let network = index_scheduler.network(); + let task_network = task_network_and_check_leader_and_version(&req, &network)?; analytics.publish( DocumentsDeletionAggregator { @@ -359,16 +362,23 @@ pub async fn delete_document( }; let uid = get_task_id(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?; - let task = { + let mut task = { let index_scheduler = index_scheduler.clone(); tokio::task::spawn_blocking(move || { - index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run) + index_scheduler.register_with_custom_metadata( + task, + uid, + custom_metadata, + dry_run, + task_network, + ) }) .await?? }; - if network.sharding && !dry_run { - proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?; + if let Some(task_network) = task.network.take() { + proxy(&index_scheduler, Some(&index_uid), &req, task_network, network, Body::none(), &task) + .await?; } let task: SummarizedTaskView = task.into(); @@ -967,6 +977,7 @@ async fn document_addition( ) -> Result { let mime_type = extract_mime_type(req)?; let network = index_scheduler.network(); + let task_network = task_network_and_check_leader_and_version(&req, &network)?; let format = match ( mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())), @@ -1085,9 +1096,16 @@ async fn document_addition( index_uid: index_uid.to_string(), }; + /// FIXME: not new to this PR, but _any_ error here will cause the payload to unduly persist let scheduler = index_scheduler.clone(); - let task = match tokio::task::spawn_blocking(move || { - scheduler.register_with_custom_metadata(task, task_id, custom_metadata, dry_run) + let mut task = match tokio::task::spawn_blocking(move || { + scheduler.register_with_custom_metadata( + task, + task_id, + custom_metadata, + dry_run, + task_network, + ) }) .await? { @@ -1098,12 +1116,13 @@ async fn document_addition( } }; - if network.sharding { + if let Some(task_network) = task.network.take() { if let Some(file) = file { proxy( &index_scheduler, - &index_uid, + Some(&index_uid), req, + task_network, network, Body::with_ndjson_payload(file), &task, @@ -1194,6 +1213,7 @@ pub async fn delete_documents_batch( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let network = index_scheduler.network(); + let task_network = task_network_and_check_leader_and_version(&req, &network)?; analytics.publish( DocumentsDeletionAggregator { @@ -1214,16 +1234,31 @@ pub async fn delete_documents_batch( KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; let uid = get_task_id(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?; - let task = { + let mut task = { let index_scheduler = index_scheduler.clone(); tokio::task::spawn_blocking(move || { - index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run) + index_scheduler.register_with_custom_metadata( + task, + uid, + custom_metadata, + dry_run, + task_network, + ) }) .await?? }; - if network.sharding && !dry_run { - proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?; + if let Some(task_network) = task.network.take() { + proxy( + &index_scheduler, + Some(&index_uid), + &req, + task_network, + network, + Body::inline(body), + &task, + ) + .await?; } let task: SummarizedTaskView = task.into(); @@ -1286,6 +1321,7 @@ pub async fn delete_documents_by_filter( let index_uid = index_uid.into_inner(); let filter = body.into_inner(); let network = index_scheduler.network(); + let task_network = task_network_and_check_leader_and_version(&req, &network)?; analytics.publish( DocumentsDeletionAggregator { @@ -1312,16 +1348,31 @@ pub async fn delete_documents_by_filter( let uid = get_task_id(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?; - let task = { + let mut task = { let index_scheduler = index_scheduler.clone(); tokio::task::spawn_blocking(move || { - index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run) + index_scheduler.register_with_custom_metadata( + task, + uid, + custom_metadata, + dry_run, + task_network, + ) }) .await?? }; - if network.sharding && !dry_run { - proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?; + if let Some(task_network) = task.network.take() { + proxy( + &index_scheduler, + Some(&index_uid), + &req, + task_network, + network, + Body::inline(filter), + &task, + ) + .await?; } let task: SummarizedTaskView = task.into(); @@ -1421,6 +1472,7 @@ pub async fn edit_documents_by_function( .check_edit_documents_by_function("Using the documents edit route")?; let network = index_scheduler.network(); + let task_network = task_network_and_check_leader_and_version(&req, &network)?; let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); @@ -1467,16 +1519,31 @@ pub async fn edit_documents_by_function( let uid = get_task_id(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?; - let task = { + let mut task = { let index_scheduler = index_scheduler.clone(); tokio::task::spawn_blocking(move || { - index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run) + index_scheduler.register_with_custom_metadata( + task, + uid, + custom_metadata, + dry_run, + task_network, + ) }) .await?? }; - if network.sharding && !dry_run { - proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?; + if let Some(task_network) = task.network.take() { + proxy( + &index_scheduler, + Some(&index_uid), + &req, + task_network, + network, + Body::inline(body), + &task, + ) + .await?; } let task: SummarizedTaskView = task.into(); @@ -1525,6 +1592,7 @@ pub async fn clear_all_documents( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let network = index_scheduler.network(); let CustomMetadataQuery { custom_metadata } = params.into_inner(); + let task_network = task_network_and_check_leader_and_version(&req, &network)?; analytics.publish( DocumentsDeletionAggregator { @@ -1540,17 +1608,24 @@ pub async fn clear_all_documents( let uid = get_task_id(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?; - let task = { + let mut task = { let index_scheduler = index_scheduler.clone(); tokio::task::spawn_blocking(move || { - index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run) + index_scheduler.register_with_custom_metadata( + task, + uid, + custom_metadata, + dry_run, + task_network, + ) }) .await?? }; - if network.sharding && !dry_run { - proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?; + if let Some(task_network) = task.network.take() { + proxy(&index_scheduler, Some(&index_uid), &req, task_network, network, Body::none(), &task) + .await?; } let task: SummarizedTaskView = task.into(); diff --git a/crates/meilisearch/src/routes/indexes/enterprise_edition/proxy.rs b/crates/meilisearch/src/routes/indexes/enterprise_edition/proxy.rs index 7cb3eb8cc..937f08e79 100644 --- a/crates/meilisearch/src/routes/indexes/enterprise_edition/proxy.rs +++ b/crates/meilisearch/src/routes/indexes/enterprise_edition/proxy.rs @@ -3,6 +3,7 @@ // Use of this source code is governed by the Business Source License 1.1, // as found in the LICENSE-EE file or at +use std::borrow::Cow; use std::collections::BTreeMap; use std::fs::File; @@ -10,11 +11,22 @@ use actix_web::http::header::CONTENT_TYPE; use actix_web::HttpRequest; use bytes::Bytes; use index_scheduler::IndexScheduler; +use meilisearch_types::enterprise_edition::network::Remote; use meilisearch_types::error::ResponseError; -use meilisearch_types::tasks::{Origin, RemoteTask, TaskNetwork}; +use meilisearch_types::milli::DocumentId; +use meilisearch_types::tasks::enterprise_edition::network::headers::{ + PROXY_IMPORT_DOCS_HEADER, PROXY_IMPORT_INDEX_COUNT_HEADER, PROXY_IMPORT_INDEX_HEADER, + PROXY_IMPORT_REMOTE_HEADER, PROXY_IMPORT_TASK_KEY_HEADER, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, + PROXY_ORIGIN_NETWORK_VERSION_HEADER, PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER, +}; +use meilisearch_types::tasks::enterprise_edition::network::{ + DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork, +}; +use meilisearch_types::tasks::Task; use reqwest::StatusCode; use serde::de::DeserializeOwned; use serde_json::Value; +use uuid::Uuid; use crate::error::MeilisearchHttpError; use crate::routes::indexes::enterprise_edition::proxy::error::{ @@ -22,13 +34,18 @@ use crate::routes::indexes::enterprise_edition::proxy::error::{ }; use crate::routes::SummarizedTaskView; -pub enum Body { +pub enum Body +where + T: serde::Serialize, + F: FnMut(&str, &Remote, &mut T), +{ NdJsonPayload(File), Inline(T), + Generated(T, F), None, } -impl Body<()> { +impl Body<(), fn(&str, &Remote, &mut ())> { pub fn with_ndjson_payload(file: File) -> Self { Self::NdJsonPayload(file) } @@ -38,7 +55,125 @@ impl Body<()> { } } -/// If necessary, proxies the passed request to the network and update the task description. +impl Body +where + T: serde::Serialize, +{ + pub fn inline(payload: T) -> Self { + Self::Inline(payload) + } +} + +impl Body +where + T: serde::Serialize, + F: FnMut(&str, &Remote, &mut T), +{ + pub fn generated(initial: T, f: F) -> Self { + Self::Generated(initial, f) + } +} + +impl Body +where + T: serde::Serialize, + F: FnMut(&str, &Remote, &mut T), +{ + pub fn into_bytes_iter( + self, + remotes: impl IntoIterator, + ) -> Result< + impl Iterator, (String, Remote))>, + meilisearch_types::milli::Error, + > { + let bytes = match self { + Body::NdJsonPayload(file) => { + Some(Bytes::from_owner(unsafe { memmap2::Mmap::map(&file)? })) + } + + Body::Inline(payload) => { + Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap())) + } + + Body::None => None, + + Body::Generated(mut initial, mut f) => { + return Ok(either::Right(remotes.into_iter().map(move |(name, remote)| { + f(&name, &remote, &mut initial); + let bytes = + Some(Bytes::copy_from_slice(&serde_json::to_vec(&initial).unwrap())); + (bytes, (name, remote)) + }))); + } + }; + Ok(either::Left(std::iter::repeat(bytes).zip(remotes))) + } +} + +/// Parses the header to determine if this task is a duplicate and originates with a remote. +/// +/// If not, checks whether this remote is the leader and return `MeilisearchHttpError::NotLeader` if not. +/// +/// If there is no leader, returns `Ok(None)` +/// +/// # Errors +/// +/// - `MeiliearchHttpError::NotLeader`: if the following are true simultaneously: +/// 1. The task originates with the current node +/// 2. There's a declared `leader` +/// 3. The declared leader is **not** the current node +/// - `MeilisearchHttpError::InvalidHeaderValue`: if headers cannot be parsed as a task network. +/// - `MeilisearchHttpError::InconsistentTaskNetwork`: if only some of the headers are present. +pub fn task_network_and_check_leader_and_version( + req: &HttpRequest, + network: &meilisearch_types::enterprise_edition::network::Network, +) -> Result, MeilisearchHttpError> { + let task_network = + match (origin_from_req(req)?, import_data_from_req(req)?, import_metadata_from_req(req)?) { + (Some(network_change), Some(import_from), Some(metadata)) => { + TaskNetwork::Import { import_from, network_change, metadata } + } + (Some(origin), None, None) => TaskNetwork::Origin { origin }, + (None, None, None) => { + match (network.leader.as_deref(), network.local.as_deref()) { + // 1. Always allowed if there is no leader + (None, _) => return Ok(None), + // 2. Allowed if the leader is self + (Some(leader), Some(this)) if leader == this => (), + // 3. Any other change is disallowed + (Some(leader), _) => { + return Err( + MeilisearchHttpError::NotLeader { leader: leader.to_string() }.into() + ) + } + } + + TaskNetwork::Remotes { + remote_tasks: Default::default(), + network_version: network.version, + } + } + // all good cases were matched, so this is always an error + (origin, import_from, metadata) => { + return Err(MeilisearchHttpError::InconsistentTaskNetworkHeaders { + is_missing_origin: origin.is_none(), + is_missing_import: import_from.is_none(), + is_missing_import_metadata: metadata.is_none(), + }) + } + }; + + if task_network.network_version() < network.version { + return Err(MeilisearchHttpError::NetworkVersionTooOld { + received: task_network.network_version(), + expected_at_least: network.version, + }); + } + + Ok(Some(task_network)) +} + +/// Updates the task description and, if necessary, proxies the passed request to the network and update the task description. /// /// This function reads the custom headers from the request to determine if must proxy the request or if the request /// has already been proxied. @@ -48,152 +183,143 @@ impl Body<()> { /// with the task ids from the task queues of the remotes. /// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task. /// This information is copied to the passed task. -pub async fn proxy( +/// +/// # Returns +/// +/// The updated task. The task is read back from the database to avoid erasing concurrent changes. +pub async fn proxy( index_scheduler: &IndexScheduler, - index_uid: &str, + index_uid: Option<&str>, req: &HttpRequest, + mut task_network: DbTaskNetwork, network: meilisearch_types::enterprise_edition::network::Network, - body: Body, + body: Body, task: &meilisearch_types::tasks::Task, -) -> Result<(), MeilisearchHttpError> { - match origin_from_req(req)? { - Some(origin) => { - index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })? +) -> Result +where + T: serde::Serialize, + F: FnMut(&str, &Remote, &mut T), +{ + if let DbTaskNetwork::Remotes { remote_tasks, network_version } = &mut task_network { + let network_version = *network_version; + let this = network + .local + .as_deref() + .expect("inconsistent `network.sharding` and `network.self`") + .to_owned(); + + let content_type = match &body { + // for file bodies, force x-ndjson + Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()), + // otherwise get content type from request + _ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()), + }; + + let mut in_flight_remote_queries = BTreeMap::new(); + let client = reqwest::ClientBuilder::new() + .connect_timeout(std::time::Duration::from_secs(3)) + .build() + .unwrap(); + + let method = from_old_http_method(req.method()); + + // send payload to all remotes + for (body, (node_name, node)) in body + .into_bytes_iter(network.remotes.into_iter().filter(|(name, _)| name.as_str() != this)) + .map_err(|err| { + MeilisearchHttpError::from_milli(err, index_uid.map(ToOwned::to_owned)) + })? + { + tracing::trace!(node_name, "proxying task to remote"); + + let client = client.clone(); + let api_key = node.write_api_key; + let this = this.clone(); + let method = method.clone(); + let path_and_query = req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/"); + + in_flight_remote_queries.insert( + node_name, + tokio::spawn({ + let url = format!("{}{}", node.url, path_and_query); + + let url_encoded_this = urlencoding::encode(&this).into_owned(); + let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize + + let content_type = content_type.map(|b| b.to_owned()); + + let backoff = backoff::ExponentialBackoffBuilder::new() + .with_max_elapsed_time(Some(std::time::Duration::from_secs(25))) + .build(); + + backoff::future::retry(backoff, move || { + let url = url.clone(); + let client = client.clone(); + let url_encoded_this = url_encoded_this.clone(); + let url_encoded_task_uid = url_encoded_task_uid.clone(); + let content_type = content_type.clone(); + + let body = body.clone(); + let api_key = api_key.clone(); + let method = method.clone(); + + async move { + try_proxy( + method, + &url, + content_type.as_deref(), + network_version, + api_key.as_deref(), + &client, + &url_encoded_this, + &url_encoded_task_uid, + body, + ) + .await + } + }) + }), + ); } - None => { - let this = network - .local - .as_deref() - .expect("inconsistent `network.sharding` and `network.self`") - .to_owned(); - let content_type = match &body { - // for file bodies, force x-ndjson - Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()), - // otherwise get content type from request - _ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()), - }; + // wait for all in-flight queries to finish and collect their results + for (node_name, handle) in in_flight_remote_queries { + match handle.await { + Ok(Ok(res)) => { + let task_uid = res.task_uid; - let body = match body { - Body::NdJsonPayload(file) => Some(Bytes::from_owner(unsafe { - memmap2::Mmap::map(&file).map_err(|err| { - MeilisearchHttpError::from_milli(err.into(), Some(index_uid.to_owned())) - })? - })), - - Body::Inline(payload) => { - Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap())) + remote_tasks.insert(node_name, Ok(task_uid).into()); } - - Body::None => None, - }; - - let mut in_flight_remote_queries = BTreeMap::new(); - let client = reqwest::ClientBuilder::new() - .connect_timeout(std::time::Duration::from_secs(3)) - .build() - .unwrap(); - - let method = from_old_http_method(req.method()); - - // send payload to all remotes - for (node_name, node) in - network.remotes.into_iter().filter(|(name, _)| name.as_str() != this) - { - let body = body.clone(); - let client = client.clone(); - let api_key = node.write_api_key; - let this = this.clone(); - let method = method.clone(); - let path_and_query = - req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/"); - - in_flight_remote_queries.insert( - node_name, - tokio::spawn({ - let url = format!("{}{}", node.url, path_and_query); - - let url_encoded_this = urlencoding::encode(&this).into_owned(); - let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize - - let content_type = content_type.map(|b| b.to_owned()); - - let backoff = backoff::ExponentialBackoffBuilder::new() - .with_max_elapsed_time(Some(std::time::Duration::from_secs(25))) - .build(); - - backoff::future::retry(backoff, move || { - let url = url.clone(); - let client = client.clone(); - let url_encoded_this = url_encoded_this.clone(); - let url_encoded_task_uid = url_encoded_task_uid.clone(); - let content_type = content_type.clone(); - - let body = body.clone(); - let api_key = api_key.clone(); - let method = method.clone(); - - async move { - try_proxy( - method, - &url, - content_type.as_deref(), - api_key.as_deref(), - &client, - &url_encoded_this, - &url_encoded_task_uid, - body, - ) - .await - } - }) - }), - ); - } - - // wait for all in-flight queries to finish and collect their results - let mut remote_tasks: BTreeMap = BTreeMap::new(); - for (node_name, handle) in in_flight_remote_queries { - match handle.await { - Ok(Ok(res)) => { - let task_uid = res.task_uid; - - remote_tasks.insert(node_name, Ok(task_uid).into()); - } - Ok(Err(error)) => { - remote_tasks.insert(node_name, Err(error.as_response_error()).into()); - } - Err(panic) => match panic.try_into_panic() { - Ok(panic) => { - let msg = match panic.downcast_ref::<&'static str>() { - Some(s) => *s, - None => match panic.downcast_ref::() { - Some(s) => &s[..], - None => "Box", - }, - }; - remote_tasks.insert( - node_name, - Err(ResponseError::from_msg( - msg.to_string(), - meilisearch_types::error::Code::Internal, - )) - .into(), - ); - } - Err(_) => { - tracing::error!("proxy task was unexpectedly cancelled") - } - }, + Ok(Err(error)) => { + remote_tasks.insert(node_name, Err(error.as_response_error()).into()); } + Err(panic) => match panic.try_into_panic() { + Ok(panic) => { + let msg = match panic.downcast_ref::<&'static str>() { + Some(s) => *s, + None => match panic.downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + remote_tasks.insert( + node_name, + Err(ResponseError::from_msg( + msg.to_string(), + meilisearch_types::error::Code::Internal, + )) + .into(), + ); + } + Err(_) => { + tracing::error!("proxy task was unexpectedly cancelled") + } + }, } - - // edit details to contain the return values from the remotes - index_scheduler.set_task_network(task.uid, TaskNetwork::Remotes { remote_tasks })?; } } - Ok(()) + Ok(index_scheduler.set_task_network(task.uid, task_network)?) } fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method { @@ -216,6 +342,7 @@ async fn try_proxy( method: reqwest::Method, url: &str, content_type: Option<&[u8]>, + network_version: Uuid, api_key: Option<&str>, client: &reqwest::Client, url_encoded_this: &str, @@ -226,6 +353,7 @@ async fn try_proxy( let request = if let Some(body) = body { request.body(body) } else { request }; let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request }; let request = request.header(PROXY_ORIGIN_TASK_UID_HEADER, url_encoded_task_uid); + let request = request.header(PROXY_ORIGIN_NETWORK_VERSION_HEADER, &network_version.to_string()); let request = request.header(PROXY_ORIGIN_REMOTE_HEADER, url_encoded_this); let request = if let Some(content_type) = content_type { request.header(CONTENT_TYPE.as_str(), content_type) @@ -375,25 +503,23 @@ mod error { } } -pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote"; -pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid"; - pub fn origin_from_req(req: &HttpRequest) -> Result, MeilisearchHttpError> { - let (remote_name, task_uid) = match ( + let (remote_name, task_uid, network_version) = match ( req.headers().get(PROXY_ORIGIN_REMOTE_HEADER), req.headers().get(PROXY_ORIGIN_TASK_UID_HEADER), + req.headers().get(PROXY_ORIGIN_NETWORK_VERSION_HEADER), ) { - (None, None) => return Ok(None), - (None, Some(_)) => { + (None, None, _) => return Ok(None), + (None, Some(_), _) => { return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true }) } - (Some(_), None) => { + (Some(_), None, _) => { return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: false, }) } - (Some(remote_name), Some(task_uid)) => ( - urlencoding::decode(remote_name.to_str().map_err(|err| { + (Some(remote_name), Some(task_uid), network_version) => { + let remote_name = urlencoding::decode(remote_name.to_str().map_err(|err| { MeilisearchHttpError::InvalidHeaderValue { header_name: PROXY_ORIGIN_REMOTE_HEADER, msg: format!("while parsing remote name as UTF-8: {err}"), @@ -402,8 +528,8 @@ pub fn origin_from_req(req: &HttpRequest) -> Result, MeilisearchH .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { header_name: PROXY_ORIGIN_REMOTE_HEADER, msg: format!("while URL-decoding remote name: {err}"), - })?, - urlencoding::decode(task_uid.to_str().map_err(|err| { + })?; + let task_uid = urlencoding::decode(task_uid.to_str().map_err(|err| { MeilisearchHttpError::InvalidHeaderValue { header_name: PROXY_ORIGIN_TASK_UID_HEADER, msg: format!("while parsing task UID as UTF-8: {err}"), @@ -412,15 +538,181 @@ pub fn origin_from_req(req: &HttpRequest) -> Result, MeilisearchH .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { header_name: PROXY_ORIGIN_TASK_UID_HEADER, msg: format!("while URL-decoding task UID: {err}"), - })?, - ), + })?; + let network_version = match network_version { + Some(network_version) => { + urlencoding::decode(network_version.to_str().map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_ORIGIN_NETWORK_VERSION_HEADER, + msg: format!("while parsing network version as UTF-8: {err}"), + } + })?) + .map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_ORIGIN_NETWORK_VERSION_HEADER, + msg: format!("while URL-decoding network version: {err}"), + } + })? + } + None => Cow::Borrowed("0"), + }; + (remote_name, task_uid, network_version) + } }; - let task_uid: usize = + let task_uid: u32 = task_uid.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue { header_name: PROXY_ORIGIN_TASK_UID_HEADER, msg: format!("while parsing the task UID as an integer: {err}"), })?; - Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid })) + let network_version: Uuid = Uuid::parse_str(&network_version).map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_ORIGIN_NETWORK_VERSION_HEADER, + msg: format!("while parsing the network version as an UUID: {err}"), + } + })?; + + Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid, network_version })) +} + +pub fn import_data_from_req(req: &HttpRequest) -> Result, MeilisearchHttpError> { + let (remote_name, index_name, documents) = match ( + req.headers().get(PROXY_IMPORT_REMOTE_HEADER), + req.headers().get(PROXY_IMPORT_INDEX_HEADER), + req.headers().get(PROXY_IMPORT_DOCS_HEADER), + ) { + (None, None, None) => return Ok(None), + (Some(remote_name), Some(index_name), Some(documents)) => { + let remote_name = urlencoding::decode(remote_name.to_str().map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_REMOTE_HEADER, + msg: format!("while parsing import remote name as UTF-8: {err}"), + } + })?) + .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_REMOTE_HEADER, + msg: format!("while URL-decoding import remote name: {err}"), + })?; + + let index_name = urlencoding::decode(index_name.to_str().map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_INDEX_HEADER, + msg: format!("while parsing import index name as UTF-8: {err}"), + } + })?) + .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_INDEX_HEADER, + msg: format!("while URL-decoding import index name: {err}"), + })?; + + let documents = urlencoding::decode(documents.to_str().map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_DOCS_HEADER, + msg: format!("while parsing documents as UTF-8: {err}"), + } + })?) + .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_DOCS_HEADER, + msg: format!("while URL-decoding documents: {err}"), + })?; + (remote_name, index_name, documents) + } + // catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some) + (remote_name, index_name, documents) => { + return Err(MeilisearchHttpError::InconsistentImportHeaders { + is_remote_missing: remote_name.is_none(), + is_index_missing: index_name.is_none(), + is_docs_missing: documents.is_none(), + }) + } + }; + + let document_count: u64 = + documents.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_DOCS_HEADER, + msg: format!("while parsing the documents as an integer: {err}"), + })?; + + Ok(Some(ImportData { + remote_name: remote_name.to_string(), + index_name: index_name.to_string(), + document_count, + })) +} + +pub fn import_metadata_from_req( + req: &HttpRequest, +) -> Result, MeilisearchHttpError> { + let (index_count, task_key, total_index_documents) = match ( + req.headers().get(PROXY_IMPORT_INDEX_COUNT_HEADER), + req.headers().get(PROXY_IMPORT_TASK_KEY_HEADER), + req.headers().get(PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER), + ) { + (None, None, None) => return Ok(None), + (Some(index_count), Some(task_key), Some(total_index_documents)) => { + let index_count = urlencoding::decode(index_count.to_str().map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_REMOTE_HEADER, + msg: format!("while parsing import index count as UTF-8: {err}"), + } + })?) + .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_INDEX_COUNT_HEADER, + msg: format!("while URL-decoding import index count: {err}"), + })?; + + let task_key = urlencoding::decode(task_key.to_str().map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_TASK_KEY_HEADER, + msg: format!("while parsing import task key as UTF-8: {err}"), + } + })?) + .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_TASK_KEY_HEADER, + msg: format!("while URL-decoding import task key: {err}"), + })?; + + let total_index_documents = + urlencoding::decode(total_index_documents.to_str().map_err(|err| { + MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, + msg: format!("while parsing total index documents as UTF-8: {err}"), + } + })?) + .map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, + msg: format!("while URL-decoding total index documents: {err}"), + })?; + (index_count, task_key, total_index_documents) + } + // catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some) + (index_count, task_key, total_index_documents) => { + return Err(MeilisearchHttpError::InconsistentImportMetadataHeaders { + is_index_count_missing: index_count.is_none(), + is_task_key_missing: task_key.is_none(), + is_total_index_documents_missing: total_index_documents.is_none(), + }) + } + }; + + let index_count: u64 = + index_count.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_INDEX_COUNT_HEADER, + msg: format!("while parsing the index count as an integer: {err}"), + })?; + + let task_key: DocumentId = + task_key.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_TASK_KEY_HEADER, + msg: format!("while parsing import task key as an integer: {err}"), + })?; + + let total_index_documents: u64 = + total_index_documents.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue { + header_name: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, + msg: format!("while parsing the total index documents as an integer: {err}"), + })?; + + Ok(Some(ImportMetadata { index_count, task_key, total_index_documents })) } diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index d3c399dec..c7667b802 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -30,7 +30,7 @@ use crate::Opt; pub mod compact; pub mod documents; -mod enterprise_edition; +pub mod enterprise_edition; pub mod facet_search; pub mod search; mod search_analytics; @@ -41,7 +41,9 @@ mod settings_analytics; pub mod similar; mod similar_analytics; -pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER}; +pub use meilisearch_types::tasks::enterprise_edition::network::headers::{ + PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER, +}; #[derive(OpenApi)] #[openapi( diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs index cc825f893..f67664b9e 100644 --- a/crates/meilisearch/src/routes/indexes/settings.rs +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -636,6 +636,7 @@ pub async fn update_all( }; let uid = get_task_id(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?; + /// TODO: make sure to proxy all settings tasks let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) .await?? diff --git a/crates/meilisearch/src/routes/network.rs b/crates/meilisearch/src/routes/network.rs index b7fa2980c..08fe979e8 100644 --- a/crates/meilisearch/src/routes/network.rs +++ b/crates/meilisearch/src/routes/network.rs @@ -9,20 +9,27 @@ use itertools::{EitherOrBoth, Itertools}; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::enterprise_edition::network::{Network as DbNetwork, Remote as DbRemote}; use meilisearch_types::error::deserr_codes::{ - InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding, + InvalidNetworkLeader, InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl, InvalidNetworkWriteApiKey, }; -use meilisearch_types::error::ResponseError; +use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::keys::actions; use meilisearch_types::milli::update::Setting; +use meilisearch_types::tasks::enterprise_edition::network::{ + headers, NetworkTopologyChange, Origin, TaskNetwork, +}; +use meilisearch_types::tasks::KindWithContent; use serde::Serialize; use tracing::debug; use utoipa::{OpenApi, ToSchema}; use crate::analytics::{Aggregate, Analytics}; +use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::indexes::enterprise_edition::proxy::{self, proxy, Body}; +use crate::routes::SummarizedTaskView; #[derive(OpenApi)] #[openapi( @@ -83,7 +90,7 @@ async fn get_network( Ok(HttpResponse::Ok().json(network)) } -#[derive(Debug, Deserr, ToSchema, Serialize)] +#[derive(Clone, Debug, Deserr, ToSchema, Serialize)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] #[serde(rename_all = "camelCase")] #[schema(rename_all = "camelCase")] @@ -106,12 +113,19 @@ pub struct Remote { pub write_api_key: Setting, } -#[derive(Debug, Deserr, ToSchema, Serialize)] +#[derive(Clone, Debug, Deserr, ToSchema, Serialize)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] #[serde(rename_all = "camelCase")] #[schema(rename_all = "camelCase")] pub struct Network { - #[schema(value_type = Option>, example = json!("http://localhost:7700"))] + #[schema(value_type = Option>, example = json!({ + "ms-00": { + "url": "http://localhost:7700" + }, + "ms-01": { + "url": "http://localhost:7701" + } + }))] #[deserr(default, error = DeserrJsonError)] #[serde(default)] pub remotes: Setting>>, @@ -119,10 +133,21 @@ pub struct Network { #[serde(default, rename = "self")] #[deserr(default, rename = "self", error = DeserrJsonError)] pub local: Setting, - #[schema(value_type = Option, example = json!(true))] + #[schema(value_type = Option, example = json!("ms-00"))] #[serde(default)] - #[deserr(default, error = DeserrJsonError)] - pub sharding: Setting, + #[deserr(default, error = DeserrJsonError)] + pub leader: Setting, + #[schema(value_type = Option>, example = json!({ + "ms-00": { + "url": "http://localhost:7700" + }, + "ms-01": { + "url": "http://localhost:7701" + } + }))] + #[deserr(default, error = DeserrJsonError)] + #[serde(default)] + pub previous_remotes: Setting>>, } impl Remote { @@ -207,29 +232,254 @@ async fn patch_network( ) -> Result { index_scheduler.features().check_network("Using the /network route")?; + match ( + proxy::origin_from_req(&req)?, + proxy::import_data_from_req(&req)?, + proxy::import_metadata_from_req(&req)?, + ) { + (Some(origin), None, None) => { + patch_network_with_origin(index_scheduler, new_network, req, origin, analytics).await + } + (None, None, None) => { + patch_network_without_origin(index_scheduler, new_network, req, analytics).await + } + (Some(origin), Some(import_data), Some(metadata)) => { + if metadata.index_count == 0 { + tokio::task::spawn_blocking(move || { + index_scheduler.network_no_index_for_remote(import_data.remote_name, origin) + }) + .await + .map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??; + Ok(HttpResponse::Accepted().finish()) + } else { + Err(MeilisearchHttpError::InvalidHeaderValue { + header_name: headers::PROXY_IMPORT_INDEX_COUNT_HEADER, + msg: format!("Expected 0 indexes, got `{}`", metadata.index_count), + } + .into()) + } + } + (origin, import_data, metadata) => { + return Err(MeilisearchHttpError::InconsistentTaskNetworkHeaders { + is_missing_origin: origin.is_none(), + is_missing_import: import_data.is_none(), + is_missing_import_metadata: metadata.is_none(), + } + .into()) + } + } +} + +async fn patch_network_without_origin( + index_scheduler: GuardedData, Data>, + new_network: AwebJson, + req: HttpRequest, + analytics: Data, +) -> Result { + /// FIXME: check network tasks from all nodes to detect already enqueued/processing network tasks let new_network = new_network.0; let old_network = index_scheduler.network(); debug!(parameters = ?new_network, "Patch network"); + if !matches!(new_network.previous_remotes, Setting::NotSet) { + return Err(MeilisearchHttpError::UnexpectedNetworkPreviousRemotes.into()); + } + + let merged_network = merge_networks(old_network.clone(), new_network)?; + index_scheduler.put_network(merged_network.clone())?; + + analytics.publish( + PatchNetworkAnalytics { + network_size: merged_network.remotes.len(), + network_has_self: merged_network.local.is_some(), + }, + &req, + ); + + if merged_network.leader.is_some() { + let network_topology_change = + NetworkTopologyChange::new(old_network.clone(), merged_network.clone()); + let task = KindWithContent::NetworkTopologyChange(network_topology_change); + let mut task = { + let index_scheduler = index_scheduler.clone(); + tokio::task::spawn_blocking(move || { + index_scheduler.register_with_custom_metadata( + task, + None, + None, + false, + Some(TaskNetwork::Remotes { + remote_tasks: Default::default(), + network_version: merged_network.version, + }), + ) + }) + .await?? + }; + + let mut proxied_network = Network { + remotes: Setting::Set(to_settings_remotes(&merged_network.remotes)), + local: Setting::NotSet, + leader: Setting::some_or_not_set(merged_network.leader.clone()), + previous_remotes: Setting::Set(to_settings_remotes(&old_network.remotes)), + }; + let mut deleted_network = old_network; + + let deleted_remotes = &mut deleted_network.remotes; + deleted_remotes.retain(|node, _| !merged_network.remotes.contains_key(node)); + + // proxy network change to the remaining remotes. + let updated_task = proxy( + &index_scheduler, + None, + &req, + task.network.take().unwrap(), // set in register + merged_network, + Body::generated(proxied_network.clone(), |name, _remote, network| { + network.local = Setting::Set(name.to_string()); + }), + &task, + ) + .await?; + // unwrap: network was set by `proxy` + let task_network = updated_task.network.unwrap(); + + proxied_network.previous_remotes = Setting::NotSet; + + if deleted_network.leader.is_some() { + // proxy network change to the deleted remotes + proxy( + &index_scheduler, + None, + &req, + task_network, + deleted_network, + Body::generated(proxied_network.clone(), |_name, _remote, network| { + network.local = Setting::Reset; + }), + &task, + ) + .await?; + } + + let task: SummarizedTaskView = task.into(); + debug!("returns: {:?}", task); + Ok(HttpResponse::Accepted().json(task)) + } else { + Ok(HttpResponse::Accepted().json(merged_network)) + } +} + +async fn patch_network_with_origin( + index_scheduler: GuardedData, Data>, + merged_network: AwebJson, + req: HttpRequest, + origin: Origin, + analytics: Data, +) -> Result { + let merged_network = merged_network.into_inner(); + debug!(parameters = ?merged_network, ?origin, "Patch network"); + let mut remotes = BTreeMap::new(); + let mut old_network = index_scheduler.network(); + + for (name, remote) in merged_network.remotes.set().into_iter().flat_map(|x| x.into_iter()) { + let Some(remote) = remote else { continue }; + let remote = remote.try_into_db_node(&name)?; + remotes.insert(name, remote); + } + let mut previous_remotes = BTreeMap::new(); + for (name, remote) in + merged_network.previous_remotes.set().into_iter().flat_map(|x| x.into_iter()) + { + let Some(remote) = remote else { + continue; + }; + let remote = remote.try_into_db_node(&name)?; + previous_remotes.insert(name, remote); + } + + old_network.remotes = previous_remotes; + + let new_network = DbNetwork { + local: merged_network.local.set(), + remotes, + leader: merged_network.leader.set(), + version: origin.network_version, + }; + index_scheduler.put_network(new_network.clone())?; + + analytics.publish( + PatchNetworkAnalytics { + network_size: new_network.remotes.len(), + network_has_self: new_network.local.is_some(), + }, + &req, + ); + + let network_topology_change = NetworkTopologyChange::new(old_network, new_network); + let task = KindWithContent::NetworkTopologyChange(network_topology_change); + let task = { + let index_scheduler = index_scheduler.clone(); + tokio::task::spawn_blocking(move || { + index_scheduler.register_with_custom_metadata( + task, + None, + None, + false, + Some(TaskNetwork::Origin { origin }), + ) + }) + .await?? + }; + + let task: SummarizedTaskView = task.into(); + debug!("returns: {:?}", task); + Ok(HttpResponse::Accepted().json(task)) +} + +fn to_settings_remotes( + db_remotes: &BTreeMap, +) -> BTreeMap> { + db_remotes + .iter() + .map(|(name, remote)| { + ( + name.clone(), + Some(Remote { + url: Setting::Set(remote.url.clone()), + search_api_key: Setting::some_or_not_set(remote.search_api_key.clone()), + write_api_key: Setting::some_or_not_set(remote.write_api_key.clone()), + }), + ) + }) + .collect() +} + +fn merge_networks( + old_network: DbNetwork, + new_network: Network, +) -> Result { let merged_self = match new_network.local { Setting::Set(new_self) => Some(new_self), Setting::Reset => None, Setting::NotSet => old_network.local, }; - - let merged_sharding = match new_network.sharding { - Setting::Set(new_sharding) => new_sharding, - Setting::Reset => false, - Setting::NotSet => old_network.sharding, + let merged_leader = match new_network.leader { + Setting::Set(new_leader) => Some(new_leader), + Setting::Reset => None, + Setting::NotSet => old_network.leader, }; - - if merged_sharding && merged_self.is_none() { - return Err(ResponseError::from_msg( - "`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(), - meilisearch_types::error::Code::InvalidNetworkSharding, - )); + match (merged_leader.as_deref(), merged_self.as_deref()) { + // 1. Always allowed if there is no leader + (None, _) => (), + // 2. Allowed if the leader is self + (Some(leader), Some(this)) if leader == this => (), + // 3. Any other change is disallowed + (Some(leader), _) => { + return Err(MeilisearchHttpError::NotLeader { leader: leader.to_string() }.into()) + } } - + let new_version = uuid::Uuid::now_v7(); let merged_remotes = match new_network.remotes { Setting::Set(new_remotes) => { let mut merged_remotes = BTreeMap::new(); @@ -301,18 +551,11 @@ async fn patch_network( Setting::Reset => BTreeMap::new(), Setting::NotSet => old_network.remotes, }; - - analytics.publish( - PatchNetworkAnalytics { - network_size: merged_remotes.len(), - network_has_self: merged_self.is_some(), - }, - &req, - ); - - let merged_network = - DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding }; - index_scheduler.put_network(merged_network.clone())?; - debug!(returns = ?merged_network, "Patch network"); - Ok(HttpResponse::Ok().json(merged_network)) + let merged_network = DbNetwork { + local: merged_self, + remotes: merged_remotes, + leader: merged_leader, + version: new_version, + }; + Ok(merged_network) } diff --git a/crates/milli/src/update/new/indexer/enterprise_edition/sharding.rs b/crates/milli/src/update/new/indexer/enterprise_edition/sharding.rs index c0322f868..b73cb0d22 100644 --- a/crates/milli/src/update/new/indexer/enterprise_edition/sharding.rs +++ b/crates/milli/src/update/new/indexer/enterprise_edition/sharding.rs @@ -5,18 +5,38 @@ use std::hash::{BuildHasher as _, BuildHasherDefault}; -pub struct Shards { - pub own: Vec, - pub others: Vec, +#[derive(Debug, Clone)] +pub struct Shards(pub Vec); + +#[derive(Debug, Clone)] +pub struct Shard { + pub is_own: bool, + pub name: String, } impl Shards { + pub fn from_remotes_local<'a>( + remotes: impl IntoIterator, + local: Option<&str>, + ) -> Self { + Shards( + remotes + .into_iter() + .map(|name| Shard { is_own: Some(name) == local, name: name.to_owned() }) + .collect(), + ) + } + pub fn must_process(&self, docid: &str) -> bool { + self.processing_shard(docid).map(|shard| shard.is_own).unwrap_or_default() + } + + pub fn processing_shard<'a>(&'a self, docid: &str) -> Option<&'a Shard> { let hasher = BuildHasherDefault::::new(); - let to_hash = |shard: &String| hasher.hash_one((shard, docid)); + let to_hash = |shard: &'a Shard| (shard, hasher.hash_one((&shard.name, docid))); - let max_hash = self.others.iter().map(to_hash).max().unwrap_or_default(); - - self.own.iter().map(to_hash).any(|hash| hash > max_hash) + let shard = + self.0.iter().map(to_hash).max_by_key(|(_, hash)| *hash).map(|(shard, _)| shard); + shard } }