mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-16 17:36:58 +00:00
Compare commits
2 Commits
delta-enco
...
proper-def
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0be7db9b42 | ||
|
|
051c084aba |
12
.github/workflows/sdks-tests.yml
vendored
12
.github/workflows/sdks-tests.yml
vendored
@@ -25,18 +25,14 @@ jobs:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Define the Docker image we need to use
|
||||
id: define-image
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
DOCKER_IMAGE_INPUT: ${{ github.event.inputs.docker_image }}
|
||||
run: |
|
||||
event=${{ github.event_name }}
|
||||
echo "docker-image=nightly" >> $GITHUB_OUTPUT
|
||||
if [[ "$EVENT_NAME" == 'workflow_dispatch' ]]; then
|
||||
echo "docker-image=$DOCKER_IMAGE_INPUT" >> $GITHUB_OUTPUT
|
||||
if [[ $event == 'workflow_dispatch' ]]; then
|
||||
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Docker image is ${{ steps.define-image.outputs.docker-image }}
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ steps.define-image.outputs.docker-image }}
|
||||
run: echo "Docker image is $DOCKER_IMAGE"
|
||||
run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"
|
||||
|
||||
##########
|
||||
## SDKs ##
|
||||
|
||||
606
Cargo.lock
generated
606
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.30.0"
|
||||
version = "1.29.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -9,9 +9,8 @@ use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::network::{DbTaskNetwork, NetworkTopologyChange};
|
||||
use meilisearch_types::tasks::{
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
|
||||
};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -96,7 +95,7 @@ pub struct TaskDump {
|
||||
)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<DbTaskNetwork>,
|
||||
pub network: Option<TaskNetwork>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
@@ -164,7 +163,6 @@ pub enum KindDump {
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
NetworkTopologyChange(NetworkTopologyChange),
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -251,9 +249,6 @@ impl From<KindWithContent> for KindDump {
|
||||
KindWithContent::IndexCompaction { index_uid } => {
|
||||
KindDump::IndexCompaction { index_uid }
|
||||
}
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change) => {
|
||||
KindDump::NetworkTopologyChange(network_topology_change)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -565,8 +560,7 @@ pub(crate) mod test {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
|
||||
leader: None,
|
||||
version: Default::default(),
|
||||
sharding: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -620,10 +614,7 @@ pub(crate) mod test {
|
||||
assert_eq!(dump.features().unwrap().unwrap(), expected);
|
||||
|
||||
// ==== checking the network
|
||||
let mut expected = create_test_network();
|
||||
// from v1.29, we drop `leader` and `local` on import
|
||||
expected.leader = None;
|
||||
expected.local = None;
|
||||
let expected = create_test_network();
|
||||
assert_eq!(&expected, dump.network().unwrap().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -434,11 +434,7 @@ pub(crate) mod test {
|
||||
// network
|
||||
|
||||
let network = dump.network().unwrap().unwrap();
|
||||
|
||||
// since v1.29 we are dropping `local` and `leader` on import
|
||||
insta::assert_snapshot!(network.local.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.leader.is_none(), @"true");
|
||||
|
||||
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
|
||||
|
||||
@@ -95,26 +95,17 @@ impl V6Reader {
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let mut network: Option<meilisearch_types::network::Network> =
|
||||
match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
|
||||
if let Some(network) = &mut network {
|
||||
// as dumps are typically imported in a different machine as the emitter (otherwise dumpless upgrade would be used),
|
||||
// we decide to remove the self to avoid alias issues
|
||||
network.local = None;
|
||||
// for the same reason we disable automatic sharding
|
||||
network.leader = None;
|
||||
}
|
||||
let network = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
|
||||
let webhooks = match fs::read(dump.path().join("webhooks.json")) {
|
||||
Ok(webhooks_file) => Some(serde_json::from_reader(&*webhooks_file)?),
|
||||
|
||||
@@ -24,7 +24,6 @@ dump = { path = "../dump" }
|
||||
enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.5"
|
||||
hashbrown = "0.15.5"
|
||||
indexmap = "2.12.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
@@ -48,13 +47,9 @@ tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"rustls-tls",
|
||||
"http2",
|
||||
], default-features = false }
|
||||
reqwest = { version = "0.12.24", features = ["rustls-tls", "http2"], default-features = false }
|
||||
rusty-s3 = "0.8.1"
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
urlencoding = "2.1.3"
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
@@ -63,6 +58,3 @@ crossbeam-channel = "0.5.15"
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
enterprise = ["meilisearch-types/enterprise"]
|
||||
|
||||
@@ -238,9 +238,6 @@ impl<'a> Dump<'a> {
|
||||
KindDump::IndexCompaction { index_uid } => {
|
||||
KindWithContent::IndexCompaction { index_uid }
|
||||
}
|
||||
KindDump::NetworkTopologyChange(network_topology_change) => {
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change)
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -3,13 +3,10 @@ use std::fmt::Display;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::milli::index::RollbackOutcome;
|
||||
use meilisearch_types::milli::DocumentId;
|
||||
use meilisearch_types::tasks::network::ReceiveTaskError;
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use meilisearch_types::{heed, milli};
|
||||
use reqwest::StatusCode;
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::TaskId;
|
||||
|
||||
@@ -194,17 +191,6 @@ pub enum Error {
|
||||
#[error(transparent)]
|
||||
HeedTransaction(heed::Error),
|
||||
|
||||
#[error("No network topology change task is currently enqueued or processing")]
|
||||
ImportTaskWithoutNetworkTask,
|
||||
#[error("The network task version (`{network_task}`) does not match the import task version (`{import_task}`)")]
|
||||
NetworkVersionMismatch { network_task: Uuid, import_task: Uuid },
|
||||
#[error("The import task emanates from an unknown remote `{0}`")]
|
||||
ImportTaskUnknownRemote(String),
|
||||
#[error("The import task with key `{0}` was already received")]
|
||||
ImportTaskAlreadyReceived(DocumentId),
|
||||
#[error("{action} requires the Enterprise Edition")]
|
||||
RequiresEnterpriseEdition { action: &'static str },
|
||||
|
||||
#[cfg(test)]
|
||||
#[error("Planned failure for tests.")]
|
||||
PlannedFailure,
|
||||
@@ -262,11 +248,6 @@ impl Error {
|
||||
| Error::Persist(_)
|
||||
| Error::FeatureNotEnabled(_)
|
||||
| Error::Export(_)
|
||||
| Error::ImportTaskWithoutNetworkTask
|
||||
| Error::NetworkVersionMismatch { .. }
|
||||
| Error::ImportTaskAlreadyReceived(_)
|
||||
| Error::ImportTaskUnknownRemote(_)
|
||||
| Error::RequiresEnterpriseEdition { .. }
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
@@ -326,11 +307,6 @@ impl ErrorCode for Error {
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::ImportTaskWithoutNetworkTask => Code::ImportTaskWithoutNetworkTask,
|
||||
Error::NetworkVersionMismatch { .. } => Code::NetworkVersionMismatch,
|
||||
Error::ImportTaskAlreadyReceived(_) => Code::ImportTaskAlreadyReceived,
|
||||
Error::ImportTaskUnknownRemote(_) => Code::ImportTaskUnknownRemote,
|
||||
Error::RequiresEnterpriseEdition { .. } => Code::RequiresEnterpriseEdition,
|
||||
Error::S3Error { status, .. } if status.is_client_error() => {
|
||||
Code::InvalidS3SnapshotRequest
|
||||
}
|
||||
@@ -369,12 +345,3 @@ impl ErrorCode for Error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ReceiveTaskError> for Error {
|
||||
fn from(value: ReceiveTaskError) -> Self {
|
||||
match value {
|
||||
ReceiveTaskError::UnknownRemote(unknown) => Error::ImportTaskUnknownRemote(unknown),
|
||||
ReceiveTaskError::DuplicateTask(dup) => Error::ImportTaskAlreadyReceived(dup),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,10 +38,6 @@ impl RoFeatures {
|
||||
Self { runtime }
|
||||
}
|
||||
|
||||
pub fn from_runtime_features(features: RuntimeTogglableFeatures) -> Self {
|
||||
Self { runtime: features }
|
||||
}
|
||||
|
||||
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
self.runtime
|
||||
}
|
||||
|
||||
@@ -361,12 +361,6 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The number of indexes in the database
|
||||
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
|
||||
pub fn index_count(&self, rtxn: &RoTxn) -> Result<u64> {
|
||||
Ok(self.index_mapping.len(rtxn)?)
|
||||
}
|
||||
|
||||
/// Return an index, may open it if it wasn't already opened.
|
||||
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::fmt::Write;
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -27,7 +27,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
queue,
|
||||
scheduler,
|
||||
persisted,
|
||||
export_default_payload_size_bytes: _,
|
||||
|
||||
index_mapper,
|
||||
features: _,
|
||||
@@ -188,7 +187,7 @@ pub fn snapshot_all_batches(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Batch>>)
|
||||
|
||||
pub fn snapshot_batches_to_tasks_mappings(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<BEU32, DeCboRoaringBitmapCodec>,
|
||||
db: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
@@ -199,7 +198,7 @@ pub fn snapshot_batches_to_tasks_mappings(
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, DeCboRoaringBitmapCodec>) -> String {
|
||||
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
@@ -330,9 +329,6 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
|
||||
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
|
||||
}
|
||||
Details::NetworkTopologyChange { moved_documents, message } => {
|
||||
format!("{{ moved_documents: {moved_documents:?}, message: {message:?}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,6 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use dump::Dump;
|
||||
pub use error::Error;
|
||||
pub use features::RoFeatures;
|
||||
@@ -69,12 +68,10 @@ use meilisearch_types::milli::vector::{
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::network::{
|
||||
DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork,
|
||||
};
|
||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
|
||||
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use processing::ProcessingTasks;
|
||||
pub use queue::Query;
|
||||
use queue::Queue;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -85,7 +82,6 @@ use uuid::Uuid;
|
||||
use versioning::Versioning;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::clamp_to_page_size;
|
||||
|
||||
pub(crate) type BEI128 = I128<BE>;
|
||||
@@ -148,11 +144,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// If the autobatcher is allowed to automatically batch tasks
|
||||
/// it will only batch this defined maximum size (in bytes) of tasks at once.
|
||||
pub batched_tasks_size_limit: u64,
|
||||
/// The maximum size of the default payload for exporting documents, in bytes
|
||||
pub export_default_payload_size_bytes: Byte,
|
||||
/// The experimental features enabled for this instance.
|
||||
pub instance_features: InstanceTogglableFeatures,
|
||||
/// Whether the index scheduler is able to auto upgrade or not.
|
||||
/// The experimental features enabled for this instance.
|
||||
pub auto_upgrade: bool,
|
||||
/// The maximal number of entries in the search query cache of an embedder.
|
||||
///
|
||||
@@ -205,9 +199,6 @@ pub struct IndexScheduler {
|
||||
/// to the same embeddings for the same input text.
|
||||
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
||||
|
||||
/// The maximum size of the default payload for exporting documents, in bytes
|
||||
pub export_default_payload_size_bytes: Byte,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@@ -243,7 +234,6 @@ impl IndexScheduler {
|
||||
cleanup_enabled: self.cleanup_enabled,
|
||||
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
|
||||
persisted: self.persisted,
|
||||
export_default_payload_size_bytes: self.export_default_payload_size_bytes,
|
||||
|
||||
webhooks: self.webhooks.clone(),
|
||||
embedders: self.embedders.clone(),
|
||||
@@ -355,7 +345,6 @@ impl IndexScheduler {
|
||||
persisted,
|
||||
webhooks: Arc::new(webhooks),
|
||||
embedders: Default::default(),
|
||||
export_default_payload_size_bytes: options.export_default_payload_size_bytes,
|
||||
|
||||
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||
test_breakpoint_sdr: crossbeam_channel::bounded(0).0,
|
||||
@@ -711,14 +700,14 @@ impl IndexScheduler {
|
||||
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
|
||||
}
|
||||
|
||||
pub fn set_task_network(&self, task_id: TaskId, network: DbTaskNetwork) -> Result<Task> {
|
||||
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
|
||||
task.network = Some(network);
|
||||
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
|
||||
wtxn.commit()?;
|
||||
Ok(task)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the batches matching the query from the user's point of view along
|
||||
@@ -768,30 +757,18 @@ impl IndexScheduler {
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
self.register_with_custom_metadata(kind, task_id, None, dry_run, None)
|
||||
self.register_with_custom_metadata(kind, task_id, None, dry_run)
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler, with metadata.
|
||||
///
|
||||
/// If it fails and data was associated with the task, it tries to delete the associated data.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - task_network: network of the task to check.
|
||||
///
|
||||
/// If the task is an import task, only accept it if:
|
||||
///
|
||||
/// 1. There is an ongoing network topology change task
|
||||
/// 2. The task to register matches the network version of the network topology change task
|
||||
///
|
||||
/// Always accept the task if it is not an import task.
|
||||
pub fn register_with_custom_metadata(
|
||||
&self,
|
||||
kind: KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
task_network: Option<TaskNetwork>,
|
||||
) -> Result<Task> {
|
||||
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
|
||||
@@ -802,19 +779,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
if let Some(TaskNetwork::Import { import_from, network_change, metadata }) = &task_network {
|
||||
self.update_network_task(&mut wtxn, import_from, network_change, metadata)?;
|
||||
}
|
||||
|
||||
let task = self.queue.register(
|
||||
&mut wtxn,
|
||||
&kind,
|
||||
task_id,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network.map(DbTaskNetwork::from),
|
||||
)?;
|
||||
let task = self.queue.register(&mut wtxn, &kind, task_id, custom_metadata, dry_run)?;
|
||||
|
||||
// If the registered task is a task cancelation
|
||||
// we inform the processing tasks to stop (if necessary).
|
||||
@@ -836,91 +801,6 @@ impl IndexScheduler {
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub fn network_no_index_for_remote(
|
||||
&self,
|
||||
remote_name: String,
|
||||
origin: Origin,
|
||||
) -> Result<(), Error> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
self.update_network_task(
|
||||
&mut wtxn,
|
||||
&ImportData { remote_name, index_name: None, document_count: 0 },
|
||||
&origin,
|
||||
&ImportMetadata { index_count: 0, task_key: None, total_index_documents: 0 },
|
||||
)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
// wake up the scheduler as the task state has changed
|
||||
self.scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_network_task(
|
||||
&self,
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
import_from: &ImportData,
|
||||
network_change: &Origin,
|
||||
metadata: &ImportMetadata,
|
||||
) -> Result<(), Error> {
|
||||
let mut network_tasks = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_kind(&*wtxn, meilisearch_types::tasks::Kind::NetworkTopologyChange)?;
|
||||
if network_tasks.is_empty() {
|
||||
return Err(Error::ImportTaskWithoutNetworkTask);
|
||||
}
|
||||
let network_task = {
|
||||
let processing = self.processing_tasks.read().unwrap().processing.clone();
|
||||
if processing.is_disjoint(&network_tasks) {
|
||||
let enqueued = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_status(&*wtxn, meilisearch_types::tasks::Status::Enqueued)?;
|
||||
|
||||
network_tasks &= enqueued;
|
||||
if let Some(network_task) = network_tasks.into_iter().next() {
|
||||
network_task
|
||||
} else {
|
||||
return Err(Error::ImportTaskWithoutNetworkTask);
|
||||
}
|
||||
} else {
|
||||
network_tasks &= &*processing;
|
||||
network_tasks.into_iter().next().unwrap()
|
||||
}
|
||||
};
|
||||
let mut network_task = self.queue.tasks.get_task(&*wtxn, network_task)?.unwrap();
|
||||
let network_task_version = network_task
|
||||
.network
|
||||
.as_ref()
|
||||
.map(|network| network.network_version())
|
||||
.unwrap_or_default();
|
||||
if network_task_version != network_change.network_version {
|
||||
return Err(Error::NetworkVersionMismatch {
|
||||
network_task: network_task_version,
|
||||
import_task: network_change.network_version,
|
||||
});
|
||||
}
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) =
|
||||
&mut network_task.kind
|
||||
else {
|
||||
tracing::error!("unexpected network kind for network task while registering task");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
network_topology_change.receive_remote_task(
|
||||
&import_from.remote_name,
|
||||
import_from.index_name.as_deref(),
|
||||
metadata.task_key,
|
||||
import_from.document_count,
|
||||
metadata.index_count,
|
||||
metadata.total_index_documents,
|
||||
)?;
|
||||
self.queue.tasks.update_task(wtxn, &mut network_task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {
|
||||
|
||||
@@ -42,10 +42,12 @@ impl ProcessingTasks {
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
pub fn stop_processing(&mut self) -> Self {
|
||||
self.progress = None;
|
||||
|
||||
Self {
|
||||
batch: std::mem::take(&mut self.batch),
|
||||
processing: std::mem::take(&mut self.processing),
|
||||
progress: std::mem::take(&mut self.progress),
|
||||
progress: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::ops::{Bound, RangeBounds};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
@@ -42,11 +42,11 @@ pub struct BatchQueue {
|
||||
/// Store the batches associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing finished tasks started at a specific date
|
||||
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl BatchQueue {
|
||||
@@ -502,13 +502,11 @@ impl Queue {
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
batches = if query.reverse.unwrap_or_default() {
|
||||
batches.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
batches.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
batches = if query.reverse.unwrap_or_default() {
|
||||
batches.into_iter().take(*limit).collect()
|
||||
} else {
|
||||
batches.into_iter().rev().take(*limit).collect()
|
||||
};
|
||||
|
||||
Ok(batches)
|
||||
}
|
||||
@@ -602,11 +600,8 @@ impl Queue {
|
||||
Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
|
||||
let batches = self.batches.get_existing_batches(
|
||||
rtxn,
|
||||
batches.take(query.limit.unwrap_or(u32::MAX) as usize),
|
||||
processing,
|
||||
)?;
|
||||
let batches =
|
||||
self.batches.get_existing_batches(rtxn, batches.take(query.limit), processing)?;
|
||||
|
||||
Ok((batches, total))
|
||||
}
|
||||
|
||||
@@ -28,21 +28,21 @@ fn query_batches_from_and_limit() {
|
||||
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let query = Query { limit: 0, ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let query = Query { limit: 1, ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let query = Query { limit: 2, ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
@@ -63,14 +63,14 @@ fn query_batches_from_and_limit() {
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let query = Query { from: Some(1), limit: 1, ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let query = Query { from: Some(1), limit: 2, ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
|
||||
@@ -14,8 +14,7 @@ use std::time::Duration;
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::network::DbTaskNetwork;
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@@ -32,6 +31,9 @@ use crate::{Error, IndexSchedulerOptions, Result, TaskId};
|
||||
|
||||
/// The number of database used by queue itself
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// The default limit for pagination
|
||||
const DEFAULT_LIMIT: usize = 20;
|
||||
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
|
||||
@@ -41,11 +43,11 @@ mod db_name {
|
||||
///
|
||||
/// An empty/default query (where each field is set to `None`) matches all tasks.
|
||||
/// Each non-null field restricts the set of tasks further.
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Query {
|
||||
/// The maximum number of tasks to be matched
|
||||
pub limit: Option<u32>,
|
||||
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
/// The maximum number of tasks to be matched. Defaults to 20.
|
||||
pub limit: usize,
|
||||
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched. Defaults to 0.
|
||||
pub from: Option<u32>,
|
||||
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
|
||||
pub reverse: Option<bool>,
|
||||
@@ -84,32 +86,29 @@ pub struct Query {
|
||||
pub after_finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
/// Return `true` if every field of the query is set to `None`, such that the query
|
||||
/// matches all tasks.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
reverse: None,
|
||||
uids: None,
|
||||
batch_uids: None,
|
||||
statuses: None,
|
||||
types: None,
|
||||
index_uids: None,
|
||||
canceled_by: None,
|
||||
before_enqueued_at: None,
|
||||
after_enqueued_at: None,
|
||||
before_started_at: None,
|
||||
after_started_at: None,
|
||||
before_finished_at: None,
|
||||
after_finished_at: None,
|
||||
}
|
||||
)
|
||||
impl Default for Query {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
limit: DEFAULT_LIMIT,
|
||||
from: Default::default(),
|
||||
reverse: Default::default(),
|
||||
uids: Default::default(),
|
||||
batch_uids: Default::default(),
|
||||
statuses: Default::default(),
|
||||
types: Default::default(),
|
||||
index_uids: Default::default(),
|
||||
canceled_by: Default::default(),
|
||||
before_enqueued_at: Default::default(),
|
||||
after_enqueued_at: Default::default(),
|
||||
before_started_at: Default::default(),
|
||||
after_started_at: Default::default(),
|
||||
before_finished_at: Default::default(),
|
||||
after_finished_at: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Query {
|
||||
/// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
|
||||
pub fn with_index(self, index_uid: String) -> Self {
|
||||
let mut index_vec = self.index_uids.unwrap_or_default();
|
||||
@@ -120,7 +119,7 @@ impl Query {
|
||||
// Removes the `from` and `limit` restrictions from the query.
|
||||
// Useful to get the total number of tasks matching a filter.
|
||||
pub fn without_limits(self) -> Self {
|
||||
Query { limit: None, from: None, ..self }
|
||||
Query { limit: usize::MAX, from: None, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,7 +130,7 @@ pub struct Queue {
|
||||
pub(crate) batches: batches::BatchQueue,
|
||||
|
||||
/// Matches a batch id with the associated task ids.
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, DeCboRoaringBitmapCodec>,
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
|
||||
/// The list of files referenced by the tasks.
|
||||
pub(crate) file_store: FileStore,
|
||||
@@ -260,7 +259,6 @@ impl Queue {
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
network: Option<DbTaskNetwork>,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
|
||||
@@ -282,7 +280,7 @@ impl Queue {
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
network,
|
||||
network: None,
|
||||
custom_metadata,
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
@@ -350,7 +348,6 @@ impl Queue {
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -2,9 +2,8 @@ use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::network::DbTaskNetwork;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -44,11 +43,11 @@ pub struct TaskQueue {
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl TaskQueue {
|
||||
@@ -115,15 +114,14 @@ impl TaskQueue {
|
||||
/// - CorruptedTaskQueue: The task doesn't exist in the database
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
|
||||
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
// network topology tasks may be processed multiple times.
|
||||
let maybe_reprocessing = old_task.status != Status::Enqueued
|
||||
|| task.kind.as_kind() == Kind::NetworkTopologyChange;
|
||||
let reprocessing = old_task.status != Status::Enqueued;
|
||||
|
||||
debug_assert!(old_task != *task);
|
||||
debug_assert_eq!(old_task.uid, task.uid);
|
||||
|
||||
// If we're processing a task that failed it may already contains a batch_uid
|
||||
debug_assert!(
|
||||
maybe_reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
|
||||
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
|
||||
"\n==> old: {old_task:?}\n==> new: {task:?}"
|
||||
);
|
||||
|
||||
@@ -145,24 +143,13 @@ impl TaskQueue {
|
||||
})?;
|
||||
}
|
||||
|
||||
// Avoids rewriting part of the network topology change because of TOCTOU errors
|
||||
if let (
|
||||
KindWithContent::NetworkTopologyChange(old_state),
|
||||
KindWithContent::NetworkTopologyChange(new_state),
|
||||
) = (old_task.kind, &mut task.kind)
|
||||
{
|
||||
new_state.merge(old_state);
|
||||
// the state possibly just changed, rewrite the details
|
||||
task.details = Some(new_state.to_details());
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
old_task.enqueued_at, task.enqueued_at,
|
||||
"Cannot update a task's enqueued_at time"
|
||||
);
|
||||
if old_task.started_at != task.started_at {
|
||||
assert!(
|
||||
maybe_reprocessing || old_task.started_at.is_none(),
|
||||
reprocessing || old_task.started_at.is_none(),
|
||||
"Cannot update a task's started_at time"
|
||||
);
|
||||
if let Some(started_at) = old_task.started_at {
|
||||
@@ -174,7 +161,7 @@ impl TaskQueue {
|
||||
}
|
||||
if old_task.finished_at != task.finished_at {
|
||||
assert!(
|
||||
maybe_reprocessing || old_task.finished_at.is_none(),
|
||||
reprocessing || old_task.finished_at.is_none(),
|
||||
"Cannot update a task's finished_at time"
|
||||
);
|
||||
if let Some(finished_at) = old_task.finished_at {
|
||||
@@ -188,16 +175,7 @@ impl TaskQueue {
|
||||
task.network = match (old_task.network, task.network.take()) {
|
||||
(None, None) => None,
|
||||
(None, Some(network)) | (Some(network), None) => Some(network),
|
||||
(Some(left), Some(right)) => Some(match (left, right) {
|
||||
(
|
||||
DbTaskNetwork::Remotes { remote_tasks: mut left, network_version: _ },
|
||||
DbTaskNetwork::Remotes { remote_tasks: mut right, network_version },
|
||||
) => {
|
||||
left.append(&mut right);
|
||||
DbTaskNetwork::Remotes { remote_tasks: left, network_version }
|
||||
}
|
||||
(_, right) => right,
|
||||
}),
|
||||
(Some(_), Some(network)) => Some(network),
|
||||
};
|
||||
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
@@ -487,13 +465,11 @@ impl Queue {
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
tasks = if query.reverse.unwrap_or_default() {
|
||||
tasks.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
tasks.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
tasks = if query.reverse.unwrap_or_default() {
|
||||
tasks.into_iter().take(*limit).collect()
|
||||
} else {
|
||||
tasks.into_iter().rev().take(*limit).collect()
|
||||
};
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
@@ -551,9 +527,7 @@ impl Queue {
|
||||
} else {
|
||||
Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
let tasks = self
|
||||
.tasks
|
||||
.get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
|
||||
let tasks = self.tasks.get_existing_tasks(rtxn, tasks.take(query.limit))?;
|
||||
|
||||
let ProcessingTasks { batch, processing, progress: _ } = processing_tasks;
|
||||
|
||||
|
||||
@@ -28,21 +28,21 @@ fn query_tasks_from_and_limit() {
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let processing = index_scheduler.processing_tasks.read().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let query = Query { limit: 0, ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let query = Query { limit: 1, ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let query = Query { limit: 2, ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
@@ -63,14 +63,14 @@ fn query_tasks_from_and_limit() {
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let query = Query { from: Some(1), limit: 1, ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let query = Query { from: Some(1), limit: 2, ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
|
||||
@@ -203,30 +203,26 @@ fn test_disable_auto_deletion_of_tasks() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
}
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function no new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
|
||||
}
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -271,69 +267,59 @@ fn test_auto_deletion_of_tasks() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
}
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
{
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
}
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
{
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
}
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
// a new task deletion has been enqueued
|
||||
handle.advance_one_successful_batch();
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
}
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
}
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -74,7 +74,6 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::NetworkTopologyChange(_)
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
|
||||
}
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::tasks::Task;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::scheduler::process_batch::ProcessBatchInfo;
|
||||
use crate::utils::ProcessingBatch;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_network_index_batch(
|
||||
&self,
|
||||
_network_task: Task,
|
||||
_inner_batch: Box<Batch>,
|
||||
_current_batch: &mut ProcessingBatch,
|
||||
_progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
|
||||
}
|
||||
|
||||
pub(super) fn process_network_ready(
|
||||
&self,
|
||||
_task: Task,
|
||||
_progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,6 @@ use std::io::ErrorKind;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::network::NetworkTopologyState;
|
||||
use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
@@ -60,14 +59,6 @@ pub(crate) enum Batch {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
#[allow(clippy::enum_variant_names)] // warranted because we are executing an inner index batch
|
||||
NetworkIndexBatch {
|
||||
network_task: Task,
|
||||
inner_batch: Box<Batch>,
|
||||
},
|
||||
NetworkReady {
|
||||
task: Task,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -149,14 +140,9 @@ impl Batch {
|
||||
..
|
||||
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
|
||||
},
|
||||
Batch::IndexSwap { task } | Batch::NetworkReady { task } => {
|
||||
Batch::IndexSwap { task } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::NetworkIndexBatch { network_task, inner_batch } => {
|
||||
let mut tasks = inner_batch.ids();
|
||||
tasks.insert(network_task.uid);
|
||||
tasks
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,14 +156,12 @@ impl Batch {
|
||||
| Dump(_)
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| NetworkReady { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. }
|
||||
| IndexCompaction { index_uid, .. } => Some(index_uid),
|
||||
NetworkIndexBatch { network_task: _, inner_batch } => inner_batch.index_uid(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -200,8 +184,6 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
Batch::NetworkIndexBatch { .. } => f.write_str("NetworkTopologyChange")?,
|
||||
Batch::NetworkReady { .. } => f.write_str("NetworkTopologyChange")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
@@ -470,7 +452,6 @@ impl IndexScheduler {
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing_network_tasks: &RoaringBitmap,
|
||||
) -> Result<Option<(Batch, ProcessingBatch)>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
|
||||
@@ -479,6 +460,7 @@ impl IndexScheduler {
|
||||
let mut current_batch = ProcessingBatch::new(batch_id);
|
||||
|
||||
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let count_total_enqueued = enqueued.len();
|
||||
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
|
||||
// 0. we get the last task to cancel.
|
||||
@@ -527,15 +509,7 @@ impl IndexScheduler {
|
||||
)));
|
||||
}
|
||||
|
||||
// 2. Check for enqueued network topology changes
|
||||
let network_changes = self.queue.tasks.get_kind(rtxn, Kind::NetworkTopologyChange)?
|
||||
& (enqueued | processing_network_tasks);
|
||||
if let Some(task_id) = network_changes.iter().next() {
|
||||
let task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
return self.start_processing_network(rtxn, task, enqueued, current_batch);
|
||||
}
|
||||
|
||||
// 3. we get the next task to delete
|
||||
// 2. we get the next task to delete
|
||||
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
|
||||
if !to_delete.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
|
||||
@@ -545,7 +519,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 4. we get the next task to compact
|
||||
// 3. we get the next task to compact
|
||||
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
|
||||
if let Some(task_id) = to_compact.min() {
|
||||
let mut task =
|
||||
@@ -560,7 +534,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
|
||||
}
|
||||
|
||||
// 5. we batch the export.
|
||||
// 4. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
@@ -571,7 +545,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 6. we batch the snapshot.
|
||||
// 5. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -581,7 +555,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 7. we batch the dumps.
|
||||
// 6. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -594,66 +568,25 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
let network = self.network();
|
||||
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// 8. We make a batch from the unprioritised tasks.
|
||||
let (batch, current_batch) =
|
||||
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
|
||||
// We want to execute all tasks, except those that have a version strictly higher than the network version
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
// and use the autobatcher to batch the enqueued tasks associated with it
|
||||
|
||||
let Some(task_version) =
|
||||
task.network.as_ref().map(|tastk_network| tastk_network.network_version())
|
||||
else {
|
||||
// do not skip tasks that have no network version, otherwise we will never execute them
|
||||
return false;
|
||||
};
|
||||
|
||||
// skip tasks with a version strictly higher than the network version
|
||||
task_version > network.version
|
||||
})?;
|
||||
Ok(batch.map(|batch| (batch, current_batch)))
|
||||
}
|
||||
|
||||
fn create_next_batch_unprioritized<F>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
enqueued: &RoaringBitmap,
|
||||
mut current_batch: ProcessingBatch,
|
||||
mut skip_if: F,
|
||||
) -> Result<(Option<Batch>, ProcessingBatch)>
|
||||
where
|
||||
F: FnMut(&Task) -> bool,
|
||||
{
|
||||
let count_total_enqueued = enqueued.len();
|
||||
|
||||
let mut enqueued_it = enqueued.iter();
|
||||
let mut task;
|
||||
let index_name = loop {
|
||||
let Some(task_id) = enqueued_it.next() else {
|
||||
return Ok((None, current_batch));
|
||||
};
|
||||
task = self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
if skip_if(&task) {
|
||||
continue;
|
||||
}
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
// and use the autobatcher to batch the enqueued tasks associated with it
|
||||
|
||||
if let Some(&index_name) = task.indexes().first() {
|
||||
break index_name;
|
||||
} else {
|
||||
assert!(
|
||||
matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())
|
||||
);
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexSwap,
|
||||
id: task.uid,
|
||||
});
|
||||
return Ok((Some(Batch::IndexSwap { task }), current_batch));
|
||||
};
|
||||
let index_name = if let Some(&index_name) = task.indexes().first() {
|
||||
index_name
|
||||
} else {
|
||||
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexSwap,
|
||||
id: task.uid,
|
||||
});
|
||||
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
|
||||
};
|
||||
|
||||
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
|
||||
@@ -688,10 +621,6 @@ impl IndexScheduler {
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if skip_if(&task) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = match self.queue.file_store.compute_size(uuid) {
|
||||
Ok(content_size) => content_size,
|
||||
@@ -722,127 +651,19 @@ impl IndexScheduler {
|
||||
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
|
||||
{
|
||||
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
|
||||
let batch = self.create_next_batch_index(
|
||||
rtxn,
|
||||
index_name.to_string(),
|
||||
batchkind,
|
||||
&mut current_batch,
|
||||
create_index,
|
||||
)?;
|
||||
return Ok((batch, current_batch));
|
||||
return Ok(self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_name.to_string(),
|
||||
batchkind,
|
||||
&mut current_batch,
|
||||
create_index,
|
||||
)?
|
||||
.map(|batch| (batch, current_batch)));
|
||||
}
|
||||
|
||||
// If we found no tasks then we were notified for something that got autobatched
|
||||
// somehow and there is nothing to do.
|
||||
Ok((None, current_batch))
|
||||
}
|
||||
|
||||
fn start_processing_network(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
mut task: Task,
|
||||
enqueued: &RoaringBitmap,
|
||||
mut current_batch: ProcessingBatch,
|
||||
) -> Result<Option<(Batch, ProcessingBatch)>> {
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::NetworkTask { id: task.uid });
|
||||
|
||||
let change_version =
|
||||
task.network.as_ref().map(|network| network.network_version()).unwrap_or_default();
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) = &task.kind else {
|
||||
panic!("inconsistent kind with content")
|
||||
};
|
||||
|
||||
match network_topology_change.state() {
|
||||
NetworkTopologyState::WaitingForOlderTasks => {
|
||||
let res =
|
||||
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
|
||||
// in this limited mode of execution, we only want to run tasks:
|
||||
// 0. with an index
|
||||
// 1. with a version
|
||||
// 2. that version strictly lower than the network task version
|
||||
|
||||
// 0. skip indexless tasks that are not index swap
|
||||
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 1. skip tasks without version
|
||||
let Some(task_version) =
|
||||
task.network.as_ref().map(|network| network.network_version())
|
||||
else {
|
||||
return true;
|
||||
};
|
||||
|
||||
// 2. skip tasks with a version equal or higher to the network task version
|
||||
task_version >= change_version
|
||||
});
|
||||
|
||||
let (batch, mut current_batch) = res?;
|
||||
|
||||
let batch = match batch {
|
||||
Some(batch) => {
|
||||
let inner_batch = Box::new(batch);
|
||||
let inner_reason = current_batch.reason.to_string();
|
||||
current_batch.reason(BatchStopReason::NetworkTaskOlderTasks {
|
||||
id: task.uid,
|
||||
inner_reason,
|
||||
});
|
||||
|
||||
Batch::NetworkIndexBatch { network_task: task, inner_batch }
|
||||
}
|
||||
None => Batch::NetworkReady { task },
|
||||
};
|
||||
|
||||
Ok(Some((batch, current_batch)))
|
||||
}
|
||||
NetworkTopologyState::ImportingDocuments => {
|
||||
// if the import is done we need to go to the next state
|
||||
if network_topology_change.is_import_finished() {
|
||||
return Ok(Some((Batch::NetworkReady { task }, current_batch)));
|
||||
}
|
||||
|
||||
let res =
|
||||
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
|
||||
// in this limited mode of execution, we only want to run tasks:
|
||||
// 0. with an index
|
||||
// 1. with a version
|
||||
// 2. that version equal to the network task version
|
||||
|
||||
// 0. skip indexless tasks
|
||||
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 1. skip tasks without version
|
||||
let Some(task_version) =
|
||||
task.network.as_ref().map(|network| network.network_version())
|
||||
else {
|
||||
return true;
|
||||
};
|
||||
|
||||
// 2. skip tasks with a version different from the network task version
|
||||
task_version != change_version
|
||||
});
|
||||
|
||||
let (batch, mut current_batch) = res?;
|
||||
|
||||
let batch = batch.map(|batch| {
|
||||
let inner_batch = Box::new(batch);
|
||||
let inner_reason = current_batch.reason.to_string();
|
||||
current_batch.reason(BatchStopReason::NetworkTaskImportTasks {
|
||||
id: task.uid,
|
||||
inner_reason,
|
||||
});
|
||||
|
||||
(Batch::NetworkIndexBatch { network_task: task, inner_batch }, current_batch)
|
||||
});
|
||||
|
||||
Ok(batch)
|
||||
}
|
||||
NetworkTopologyState::ExportingDocuments | NetworkTopologyState::Finished => {
|
||||
Ok(Some((Batch::NetworkReady { task }, current_batch)))
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,308 +0,0 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::milli::documents::PrimaryKey;
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::new::indexer;
|
||||
use meilisearch_types::milli::update::new::indexer::current_edition::sharding::Shards;
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::network::Remote;
|
||||
use meilisearch_types::tasks::network::{NetworkTopologyState, Origin};
|
||||
use meilisearch_types::tasks::{KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::scheduler::process_batch::ProcessBatchInfo;
|
||||
use crate::scheduler::process_export::{ExportContext, ExportOptions, TargetInstance};
|
||||
use crate::utils::ProcessingBatch;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_network_index_batch(
|
||||
&self,
|
||||
mut network_task: Task,
|
||||
inner_batch: Box<Batch>,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) =
|
||||
&mut network_task.kind
|
||||
else {
|
||||
tracing::error!("unexpected network kind for network task while processing batch");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
|
||||
let network = network_topology_change.network_for_state();
|
||||
|
||||
let (mut tasks, info) =
|
||||
self.process_batch(*inner_batch, current_batch, progress, network)?;
|
||||
|
||||
for task in &tasks {
|
||||
let Some(network) = task.network.as_ref() else {
|
||||
continue;
|
||||
};
|
||||
let Some(import) = network.import_data() else {
|
||||
continue;
|
||||
};
|
||||
if let Some(index_name) = import.index_name.as_deref() {
|
||||
network_topology_change.process_remote_tasks(
|
||||
&import.remote_name,
|
||||
index_name,
|
||||
import.document_count,
|
||||
);
|
||||
}
|
||||
}
|
||||
network_task.details = Some(network_topology_change.to_details());
|
||||
|
||||
tasks.push(network_task);
|
||||
Ok((tasks, info))
|
||||
}
|
||||
|
||||
pub(super) fn process_network_ready(
|
||||
&self,
|
||||
mut task: Task,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) = &mut task.kind else {
|
||||
tracing::error!("network topology change task has the wrong kind with content");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
|
||||
let Some(task_network) = &task.network else {
|
||||
tracing::error!("network topology change task has no network");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
|
||||
let origin;
|
||||
let origin = match task_network.origin() {
|
||||
Some(origin) => origin,
|
||||
None => {
|
||||
let myself = network_topology_change.in_name().expect("origin is not the leader");
|
||||
origin = Origin {
|
||||
remote_name: myself.to_string(),
|
||||
task_uid: task.uid,
|
||||
network_version: task_network.network_version(),
|
||||
};
|
||||
&origin
|
||||
}
|
||||
};
|
||||
|
||||
let mut moved_documents = None;
|
||||
if let (Some((remotes, out_name)), Some(new_shards)) =
|
||||
(network_topology_change.export_to_process(), network_topology_change.new_shards())
|
||||
{
|
||||
moved_documents = Some(self.balance_documents(
|
||||
remotes,
|
||||
out_name,
|
||||
new_shards,
|
||||
origin,
|
||||
&progress,
|
||||
&self.scheduler.must_stop_processing,
|
||||
)?);
|
||||
}
|
||||
if let Some(moved_documents) = moved_documents {
|
||||
// we need the mut moved documents to avoid a lifetime error in the previous if let.
|
||||
network_topology_change.set_moved(moved_documents);
|
||||
}
|
||||
network_topology_change.update_state();
|
||||
if network_topology_change.state() == NetworkTopologyState::Finished {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
task.details = Some(network_topology_change.to_details());
|
||||
Ok((vec![task], Default::default()))
|
||||
}
|
||||
|
||||
fn balance_documents<'a, I: Iterator<Item = (&'a str, &'a Remote)> + Clone>(
|
||||
&self,
|
||||
remotes: I,
|
||||
out_name: &str,
|
||||
new_shards: Shards,
|
||||
network_change_origin: &Origin,
|
||||
progress: &Progress,
|
||||
must_stop_processing: &crate::scheduler::MustStopProcessing,
|
||||
) -> crate::Result<u64> {
|
||||
// TECHDEBT: this spawns a `ureq` agent additionally to `reqwest`. We probably want to harmonize all of this.
|
||||
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||
|
||||
let mut indexer_alloc = Bump::new();
|
||||
|
||||
let scheduler_rtxn = self.env.read_txn()?;
|
||||
|
||||
let index_count = self.index_mapper.index_count(&scheduler_rtxn)?;
|
||||
|
||||
// when the instance is empty, we still need to tell that to remotes, as they cannot know of that fact and will be waiting for
|
||||
// data
|
||||
if index_count == 0 {
|
||||
for (remote_name, remote) in remotes {
|
||||
let target = TargetInstance {
|
||||
remote_name: Some(remote_name),
|
||||
base_url: &remote.url,
|
||||
api_key: remote.write_api_key.as_deref(),
|
||||
};
|
||||
|
||||
let res = self.export_no_index(
|
||||
target,
|
||||
out_name,
|
||||
network_change_origin,
|
||||
&agent,
|
||||
must_stop_processing,
|
||||
);
|
||||
|
||||
if let Err(err) = res {
|
||||
tracing::warn!("Could not signal not to wait documents to `{remote_name}` due to error: {err}");
|
||||
}
|
||||
}
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut total_moved_documents = 0;
|
||||
|
||||
self.index_mapper.try_for_each_index::<(), ()>(
|
||||
&scheduler_rtxn,
|
||||
|index_uid, index| -> crate::Result<()> {
|
||||
indexer_alloc.reset();
|
||||
let err = |err| Error::from_milli(err, Some(index_uid.to_string()));
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let all_docids = index.external_documents_ids();
|
||||
let mut documents_to_move_to =
|
||||
hashbrown::HashMap::<String, RoaringBitmap>::new();
|
||||
let mut documents_to_delete = RoaringBitmap::new();
|
||||
|
||||
for res in all_docids.iter(&index_rtxn)? {
|
||||
let (external_docid, docid) = res?;
|
||||
match new_shards.processing_shard(external_docid) {
|
||||
Some(shard) if shard.is_own => continue,
|
||||
Some(shard) => {
|
||||
documents_to_move_to.entry_ref(&shard.name).or_default().insert(docid);
|
||||
}
|
||||
None => {
|
||||
documents_to_delete.insert(docid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||
|
||||
for (remote_name, remote) in remotes.clone() {
|
||||
let documents_to_move =
|
||||
documents_to_move_to.remove(remote_name).unwrap_or_default();
|
||||
|
||||
let target = TargetInstance {
|
||||
remote_name: Some(remote_name),
|
||||
base_url: &remote.url,
|
||||
api_key: remote.write_api_key.as_deref(),
|
||||
};
|
||||
let options = ExportOptions {
|
||||
index_uid,
|
||||
payload_size: None,
|
||||
override_settings: false,
|
||||
export_mode: super::process_export::ExportMode::NetworkBalancing {
|
||||
index_count,
|
||||
export_old_remote_name: out_name,
|
||||
network_change_origin,
|
||||
},
|
||||
};
|
||||
let ctx = ExportContext {
|
||||
index,
|
||||
index_rtxn: &index_rtxn,
|
||||
universe: &documents_to_move,
|
||||
progress,
|
||||
agent: &agent,
|
||||
must_stop_processing,
|
||||
};
|
||||
|
||||
let res = self.export_one_index(target, options, ctx);
|
||||
|
||||
match res {
|
||||
Ok(_) =>{ documents_to_delete |= documents_to_move;}
|
||||
Err(err) => {
|
||||
tracing::warn!("Could not export documents to `{remote_name}` due to error: {err}\n - Note: Documents will be kept");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if documents_to_delete.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
total_moved_documents += documents_to_delete.len();
|
||||
|
||||
self.delete_documents_from_index(progress, must_stop_processing, &indexer_alloc, index_uid, index, &err, index_rtxn, documents_to_delete, fields_ids_map)
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(total_moved_documents)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn delete_documents_from_index(
|
||||
&self,
|
||||
progress: &Progress,
|
||||
must_stop_processing: &super::MustStopProcessing,
|
||||
indexer_alloc: &Bump,
|
||||
index_uid: &str,
|
||||
index: &milli::Index,
|
||||
err: &impl Fn(milli::Error) -> Error,
|
||||
index_rtxn: milli::heed::RoTxn<'_, milli::heed::WithoutTls>,
|
||||
documents_to_delete: RoaringBitmap,
|
||||
fields_ids_map: milli::FieldsIdsMap,
|
||||
) -> std::result::Result<(), Error> {
|
||||
let mut new_fields_ids_map = fields_ids_map.clone();
|
||||
|
||||
// candidates not empty => index not empty => a primary key is set
|
||||
let primary_key = index.primary_key(&index_rtxn)?.unwrap();
|
||||
|
||||
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(milli::Error::from)
|
||||
.map_err(err)?;
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
|
||||
let mut indexer = indexer::DocumentDeletion::new();
|
||||
indexer.delete_documents_by_docids(documents_to_delete);
|
||||
let document_changes = indexer.into_changes(indexer_alloc, primary_key);
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(&index_wtxn)
|
||||
.map_err(milli::Error::from)
|
||||
.map_err(err)?;
|
||||
let embedders = self.embedders(index_uid.to_string(), embedders)?;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
indexer::index(
|
||||
&mut index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&EmbedderStats::default(),
|
||||
)
|
||||
.map_err(err)?;
|
||||
|
||||
// update stats
|
||||
let mut mapper_wtxn = self.env.write_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(index, &index_wtxn).map_err(err)?;
|
||||
self.index_mapper.store_stats_of(&mut mapper_wtxn, index_uid, &stats)?;
|
||||
|
||||
index_wtxn.commit()?;
|
||||
// update stats after committing changes to index
|
||||
mapper_wtxn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,7 @@
|
||||
mod autobatcher;
|
||||
#[cfg(test)]
|
||||
mod autobatcher_test;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
mod community_edition;
|
||||
mod create_batch;
|
||||
#[cfg(feature = "enterprise")]
|
||||
mod enterprise_edition;
|
||||
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_export;
|
||||
@@ -26,6 +21,7 @@ use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
@@ -137,7 +133,6 @@ impl Scheduler {
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit,
|
||||
export_default_payload_size_bytes: _,
|
||||
instance_features: _,
|
||||
auto_upgrade: _,
|
||||
embedding_cache_cap,
|
||||
@@ -183,8 +178,6 @@ impl IndexScheduler {
|
||||
self.breakpoint(crate::test_utils::Breakpoint::Start);
|
||||
}
|
||||
|
||||
let previous_processing_batch = self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
if self.cleanup_enabled {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.queue.cleanup_task_queue(&mut wtxn)?;
|
||||
@@ -192,16 +185,11 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
let (batch, mut processing_batch) = match self
|
||||
.create_next_batch(&rtxn, &previous_processing_batch.processing)
|
||||
.map_err(|e| Error::CreateBatch(Box::new(e)))?
|
||||
{
|
||||
Some(batch) => batch,
|
||||
None => {
|
||||
*self.processing_tasks.write().unwrap() = previous_processing_batch;
|
||||
return Ok(TickOutcome::WaitForSignal);
|
||||
}
|
||||
};
|
||||
let (batch, mut processing_batch) =
|
||||
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
|
||||
Some(batch) => batch,
|
||||
None => return Ok(TickOutcome::WaitForSignal),
|
||||
};
|
||||
let index_uid = batch.index_uid().map(ToOwned::to_owned);
|
||||
drop(rtxn);
|
||||
|
||||
@@ -231,12 +219,7 @@ impl IndexScheduler {
|
||||
let handle = std::thread::Builder::new()
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn_scoped(s, move || {
|
||||
cloned_index_scheduler.process_batch(
|
||||
batch,
|
||||
processing_batch,
|
||||
p,
|
||||
&self.network(),
|
||||
)
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, p)
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -277,14 +260,7 @@ impl IndexScheduler {
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
||||
|
||||
processing_batch.finished();
|
||||
// whether the batch made progress.
|
||||
// a batch make progress if it failed or if it contains at least one fully processed (or cancelled) task.
|
||||
//
|
||||
// if a batch did not make progress, it means that all of its tasks are waiting on the scheduler to make progress,
|
||||
// and so we must wait for new tasks. Such a batch is not persisted to DB, and is resumed on the next tick.
|
||||
let mut batch_made_progress = false;
|
||||
let mut stop_scheduler_forever = false;
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
@@ -305,11 +281,7 @@ impl IndexScheduler {
|
||||
#[allow(unused_variables)]
|
||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
processing_batch.update_from_task(&task);
|
||||
if !matches!(task.status, Status::Processing | Status::Enqueued) {
|
||||
batch_made_progress = true;
|
||||
processing_batch.finish_task(&mut task);
|
||||
}
|
||||
processing_batch.update(&mut task);
|
||||
if task.status == Status::Canceled {
|
||||
canceled.insert(task.uid);
|
||||
canceled_by = task.canceled_by;
|
||||
@@ -376,9 +348,6 @@ impl IndexScheduler {
|
||||
}
|
||||
// In case of a failure we must get back and patch all the tasks with the error.
|
||||
Err(err) => {
|
||||
// always persist failed batches
|
||||
batch_made_progress = true;
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||
@@ -402,10 +371,7 @@ impl IndexScheduler {
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(error.clone());
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
processing_batch.update_from_task(&task);
|
||||
if !matches!(task.status, Status::Processing | Status::Enqueued) {
|
||||
processing_batch.finish_task(&mut task);
|
||||
}
|
||||
processing_batch.update(&mut task);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
@@ -428,12 +394,44 @@ impl IndexScheduler {
|
||||
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
|
||||
process_batch_info;
|
||||
|
||||
processing_batch.write_stats(
|
||||
&progress,
|
||||
congestion,
|
||||
pre_commit_dabases_sizes,
|
||||
post_commit_dabases_sizes,
|
||||
);
|
||||
processing_batch.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
let mut congestion_info = serde_json::Map::new();
|
||||
congestion_info.insert("attempts".into(), congestion.attempts.into());
|
||||
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
|
||||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
use byte_unit::Byte;
|
||||
use byte_unit::UnitType::Binary;
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((
|
||||
dbname.to_case(Case::Camel),
|
||||
format!("{post:#.2} ({sign}{diff:#.2})").into(),
|
||||
))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(congestion) = congestion {
|
||||
tracing::debug!(
|
||||
@@ -446,49 +444,46 @@ impl IndexScheduler {
|
||||
|
||||
tracing::debug!("call trace: {:?}", progress.accumulated_durations());
|
||||
|
||||
if batch_made_progress {
|
||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
}
|
||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
if batch_made_progress {
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
|
||||
let idx = AtomicU32::new(0);
|
||||
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
|
||||
let rtxn = self.read_txn()?;
|
||||
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
|
||||
tracing::error!(
|
||||
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
|
||||
let idx = AtomicU32::new(0);
|
||||
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
|
||||
let rtxn = self.read_txn()?;
|
||||
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
|
||||
tracing::error!(
|
||||
"Failure to delete the content files associated with task {}. Error: {e}",
|
||||
task.uid
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
self.notify_webhooks(ids);
|
||||
}
|
||||
self.notify_webhooks(ids);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
|
||||
|
||||
if stop_scheduler_forever {
|
||||
Ok(TickOutcome::StopProcessingForever)
|
||||
} else {
|
||||
|
||||
@@ -10,7 +10,6 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion};
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
@@ -56,7 +55,6 @@ impl IndexScheduler {
|
||||
batch: Batch,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
network: &Network,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
@@ -178,7 +176,6 @@ impl IndexScheduler {
|
||||
op,
|
||||
&progress,
|
||||
current_batch.embedder_stats.clone(),
|
||||
network,
|
||||
)?;
|
||||
|
||||
{
|
||||
@@ -238,7 +235,6 @@ impl IndexScheduler {
|
||||
Batch::IndexUpdate { index_uid, primary_key, new_index_uid: None, task },
|
||||
current_batch,
|
||||
progress,
|
||||
network,
|
||||
)
|
||||
}
|
||||
Batch::IndexUpdate { index_uid, primary_key, new_index_uid, mut task } => {
|
||||
@@ -543,10 +539,6 @@ impl IndexScheduler {
|
||||
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::NetworkIndexBatch { network_task, inner_batch } => {
|
||||
self.process_network_index_batch(network_task, inner_batch, current_batch, progress)
|
||||
}
|
||||
Batch::NetworkReady { task } => self.process_network_ready(task, progress),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{self, Write as _};
|
||||
use std::ops::ControlFlow;
|
||||
use std::sync::atomic;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -8,7 +7,6 @@ use backoff::ExponentialBackoff;
|
||||
use byte_unit::Byte;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
@@ -17,10 +15,7 @@ use meilisearch_types::milli::update::{request_threads, Setting};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
|
||||
use meilisearch_types::settings::{self, SecretPolicy};
|
||||
use meilisearch_types::tasks::network::headers::SetHeader as _;
|
||||
use meilisearch_types::tasks::network::{headers, ImportData, ImportMetadata, Origin};
|
||||
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Deserialize;
|
||||
use ureq::{json, Response};
|
||||
|
||||
@@ -55,7 +50,6 @@ impl IndexScheduler {
|
||||
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
|
||||
let err = |err| Error::from_milli(err, Some(uid.to_string()));
|
||||
if must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
@@ -67,473 +61,260 @@ impl IndexScheduler {
|
||||
));
|
||||
|
||||
let ExportIndexSettings { filter, override_settings } = export_settings;
|
||||
|
||||
let index = self.index(uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let filter = filter.as_ref().map(Filter::from_json).transpose().map_err(err)?.flatten();
|
||||
let filter_universe =
|
||||
filter.map(|f| f.evaluate(&index_rtxn, &index)).transpose().map_err(err)?;
|
||||
let whole_universe =
|
||||
index.documents_ids(&index_rtxn).map_err(milli::Error::from).map_err(err)?;
|
||||
let universe = filter_universe.unwrap_or(whole_universe);
|
||||
let target = TargetInstance { remote_name: None, base_url, api_key };
|
||||
let ctx = ExportContext {
|
||||
index: &index,
|
||||
index_rtxn: &index_rtxn,
|
||||
universe: &universe,
|
||||
progress: &progress,
|
||||
agent: &agent,
|
||||
must_stop_processing: &must_stop_processing,
|
||||
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
|
||||
// First, check if the index already exists
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
let response = retry(&must_stop_processing, || {
|
||||
let mut request = agent.get(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
|
||||
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||
});
|
||||
let index_exists = match response {
|
||||
Ok(response) => response.status() == 200,
|
||||
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
|
||||
false
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
let options = ExportOptions {
|
||||
index_uid: uid,
|
||||
payload_size,
|
||||
override_settings: *override_settings,
|
||||
export_mode: ExportMode::ExportRoute,
|
||||
};
|
||||
let total_documents = self.export_one_index(target, options, ctx)?;
|
||||
|
||||
output.insert(
|
||||
IndexUidPattern::new_unchecked(uid.clone()),
|
||||
DetailsExportIndexSettings {
|
||||
settings: (*export_settings).clone(),
|
||||
matched_documents: Some(total_documents),
|
||||
},
|
||||
);
|
||||
}
|
||||
let primary_key = index
|
||||
.primary_key(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub(super) fn export_one_index(
|
||||
&self,
|
||||
target: TargetInstance<'_>,
|
||||
options: ExportOptions<'_>,
|
||||
ctx: ExportContext<'_>,
|
||||
) -> Result<u64, Error> {
|
||||
let err = |err| Error::from_milli(err, Some(options.index_uid.to_string()));
|
||||
let total_index_documents = ctx.universe.len();
|
||||
let task_network = options.task_network(total_index_documents);
|
||||
|
||||
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
let url = format!(
|
||||
"{base_url}/indexes/{index_uid}",
|
||||
base_url = target.base_url,
|
||||
index_uid = options.index_uid
|
||||
);
|
||||
let response = retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.get(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
|
||||
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||
});
|
||||
let index_exists = match response {
|
||||
Ok(response) => response.status() == 200,
|
||||
Err(Error::FromRemoteWhenExporting { code, .. })
|
||||
if code == Code::IndexNotFound.name() =>
|
||||
{
|
||||
false
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
let primary_key =
|
||||
ctx.index.primary_key(ctx.index_rtxn).map_err(milli::Error::from).map_err(err)?;
|
||||
if !index_exists {
|
||||
let url = format!("{base_url}/indexes", base_url = target.base_url);
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.post(&url);
|
||||
|
||||
if let Some((import_data, origin, metadata)) = &task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
// Create the index
|
||||
if !index_exists {
|
||||
let url = format!("{base_url}/indexes");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param =
|
||||
json!({ "uid": options.index_uid, "primaryKey": primary_key });
|
||||
|
||||
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
}
|
||||
if index_exists && options.override_settings {
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.patch(&url);
|
||||
if let Some((import_data, origin, metadata)) = &task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
})?;
|
||||
}
|
||||
|
||||
// Patch the index primary key
|
||||
if index_exists && *override_settings {
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
}
|
||||
if !index_exists || options.override_settings {
|
||||
let mut settings =
|
||||
settings::settings(ctx.index, ctx.index_rtxn, SecretPolicy::RevealSecrets)
|
||||
.map_err(err)?;
|
||||
// Remove the experimental chat setting if not enabled
|
||||
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||
settings.chat = Setting::NotSet;
|
||||
})?;
|
||||
}
|
||||
// Retry logic for sending settings
|
||||
let url = format!(
|
||||
"{base_url}/indexes/{index_uid}/settings",
|
||||
base_url = target.base_url,
|
||||
index_uid = options.index_uid
|
||||
);
|
||||
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.patch(&url);
|
||||
|
||||
if let Some((import_data, origin, metadata)) = &task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
|
||||
// Send the index settings
|
||||
if !index_exists || *override_settings {
|
||||
let mut settings =
|
||||
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// Remove the experimental chat setting if not enabled
|
||||
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||
settings.chat = Setting::NotSet;
|
||||
}
|
||||
// Retry logic for sending settings
|
||||
let url = format!("{base_url}/indexes/{uid}/settings");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_json(settings.clone()).map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
}
|
||||
|
||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.index_rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let total_documents = ctx.universe.len() as u32;
|
||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||
ctx.progress.update_progress(progress_step);
|
||||
|
||||
let limit = options
|
||||
.payload_size
|
||||
.map(|ps| ps.as_u64() as usize)
|
||||
.unwrap_or(self.export_default_payload_size_bytes.as_u64() as usize);
|
||||
let documents_url = format!(
|
||||
"{base_url}/indexes/{index_uid}/documents",
|
||||
base_url = target.base_url,
|
||||
index_uid = options.index_uid
|
||||
);
|
||||
|
||||
// no document to send, but we must still send a task when performing network balancing
|
||||
if ctx.universe.is_empty() {
|
||||
if let Some((import_data, network_change_origin, metadata)) = task_network {
|
||||
let mut compressed_buffer = Vec::new();
|
||||
// ignore control flow, we're returning anyway
|
||||
let _ = send_buffer(
|
||||
b" ", // needs something otherwise meili complains about missing payload
|
||||
&mut compressed_buffer,
|
||||
ctx.must_stop_processing,
|
||||
ctx.agent,
|
||||
&documents_url,
|
||||
target.remote_name,
|
||||
bearer.as_deref(),
|
||||
Some(&(import_data, network_change_origin.clone(), metadata)),
|
||||
&err,
|
||||
)?;
|
||||
})?;
|
||||
}
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let results = request_threads()
|
||||
.broadcast(|broadcast| {
|
||||
let mut task_network = options.task_network(total_index_documents);
|
||||
let filter = filter
|
||||
.as_ref()
|
||||
.map(Filter::from_json)
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
.flatten();
|
||||
|
||||
let index_rtxn = ctx.index.read_txn().map_err(milli::Error::from).map_err(err)?;
|
||||
let filter_universe = filter
|
||||
.map(|f| f.evaluate(&index_rtxn, &index))
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
let whole_universe = index
|
||||
.documents_ids(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
let universe = filter_universe.unwrap_or(whole_universe);
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let mut compressed_buffer = Vec::new();
|
||||
for (i, docid) in ctx.universe.iter().enumerate() {
|
||||
if i % broadcast.num_threads() != broadcast.index() {
|
||||
continue;
|
||||
}
|
||||
if let Some((import_data, _, metadata)) = &mut task_network {
|
||||
import_data.document_count += 1;
|
||||
metadata.task_key = Some(docid);
|
||||
}
|
||||
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let document = ctx.index.document(&index_rtxn, docid).map_err(err)?;
|
||||
// We don't need to keep this one alive as we will
|
||||
// spawn many threads to process the documents
|
||||
drop(index_rtxn);
|
||||
|
||||
let mut document =
|
||||
obkv_to_json(&all_fields, &fields_ids_map, document).map_err(err)?;
|
||||
let total_documents = universe.len() as u32;
|
||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||
progress.update_progress(progress_step);
|
||||
|
||||
// TODO definitely factorize this code
|
||||
'inject_vectors: {
|
||||
let embeddings = ctx.index.embeddings(&index_rtxn, docid).map_err(err)?;
|
||||
output.insert(
|
||||
IndexUidPattern::new_unchecked(uid.clone()),
|
||||
DetailsExportIndexSettings {
|
||||
settings: (*export_settings).clone(),
|
||||
matched_documents: Some(total_documents as u64),
|
||||
},
|
||||
);
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
|
||||
let documents_url = format!("{base_url}/indexes/{uid}/documents");
|
||||
|
||||
let results = request_threads()
|
||||
.broadcast(|ctx| {
|
||||
let index_rtxn = index
|
||||
.read_txn()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let mut compressed_buffer = Vec::new();
|
||||
for (i, docid) in universe.iter().enumerate() {
|
||||
if i % ctx.num_threads() != ctx.index() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
let document = index
|
||||
.document(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(err(milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = ctx
|
||||
.index
|
||||
.external_id_of(&index_rtxn, std::iter::once(docid))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={docid}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
)));
|
||||
};
|
||||
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
// TODO definitely factorize this code
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(Error::from_milli(
|
||||
milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(
|
||||
&index_rtxn,
|
||||
std::iter::once(docid),
|
||||
)
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={docid}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
),
|
||||
Some(uid.to_string()),
|
||||
));
|
||||
};
|
||||
vectors
|
||||
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buffer.clear();
|
||||
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||
.map_err(milli::InternalError::from)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Make sure we put at least one document in the buffer even
|
||||
// though we might go above the buffer limit before sending
|
||||
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||
// We compress the documents before sending them
|
||||
let mut encoder =
|
||||
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||
encoder
|
||||
.write_all(&buffer)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
encoder
|
||||
.finish()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
request = request.set("Content-Encoding", "gzip");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
buffer.clear();
|
||||
compressed_buffer.clear();
|
||||
}
|
||||
buffer.extend_from_slice(&tmp_buffer);
|
||||
|
||||
if i > 0 && i % 100 == 0 {
|
||||
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buffer.clear();
|
||||
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||
.map_err(milli::InternalError::from)
|
||||
.map_err(milli::Error::from)
|
||||
.map_err(err)?;
|
||||
|
||||
// Make sure we put at least one document in the buffer even
|
||||
// though we might go above the buffer limit before sending
|
||||
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||
let control_flow = send_buffer(
|
||||
&buffer,
|
||||
&mut compressed_buffer,
|
||||
ctx.must_stop_processing,
|
||||
ctx.agent,
|
||||
&documents_url,
|
||||
target.remote_name,
|
||||
bearer.as_deref(),
|
||||
task_network.as_ref(),
|
||||
&err,
|
||||
)?;
|
||||
buffer.clear();
|
||||
compressed_buffer.clear();
|
||||
if let Some((import_data, _, metadata)) = &mut task_network {
|
||||
import_data.document_count = 0;
|
||||
metadata.task_key = None;
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
if control_flow.is_break() {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
buffer.extend_from_slice(&tmp_buffer);
|
||||
request.send_bytes(&buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
|
||||
if i > 0 && i % 100 == 0 {
|
||||
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| {
|
||||
Error::from_milli(
|
||||
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
|
||||
Some(uid.to_string()),
|
||||
)
|
||||
})?;
|
||||
for result in results {
|
||||
result?;
|
||||
}
|
||||
|
||||
// send the last buffered documents if any
|
||||
if !buffer.is_empty() {
|
||||
// ignore control flow here
|
||||
let _ = send_buffer(
|
||||
&buffer,
|
||||
&mut compressed_buffer,
|
||||
ctx.must_stop_processing,
|
||||
ctx.agent,
|
||||
&documents_url,
|
||||
target.remote_name,
|
||||
bearer.as_deref(),
|
||||
task_network.as_ref(),
|
||||
&err,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| err(milli::Error::InternalError(InternalError::PanicInThreadPool(e))))?;
|
||||
for result in results {
|
||||
result?;
|
||||
}
|
||||
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||
Ok(total_documents as u64)
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
|
||||
pub(super) fn export_no_index(
|
||||
&self,
|
||||
target: TargetInstance<'_>,
|
||||
export_old_remote_name: &str,
|
||||
network_change_origin: &Origin,
|
||||
agent: &ureq::Agent,
|
||||
must_stop_processing: &MustStopProcessing,
|
||||
) -> Result<(), Error> {
|
||||
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
let url = format!("{base_url}/network", base_url = target.base_url,);
|
||||
|
||||
{
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(must_stop_processing, || {
|
||||
let request = agent.patch(&url);
|
||||
let mut request = set_network_ureq_headers(
|
||||
request,
|
||||
&ImportData {
|
||||
remote_name: export_old_remote_name.to_string(),
|
||||
index_name: None,
|
||||
document_count: 0,
|
||||
},
|
||||
network_change_origin,
|
||||
&ImportMetadata {
|
||||
index_count: 0,
|
||||
task_key: None,
|
||||
total_index_documents: 0,
|
||||
},
|
||||
);
|
||||
request = request.set("Content-Type", "application/json");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request
|
||||
.send_json(
|
||||
// empty payload that will be disregarded
|
||||
serde_json::Value::Object(Default::default()),
|
||||
)
|
||||
.map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn set_network_ureq_headers(
|
||||
request: ureq::Request,
|
||||
import_data: &ImportData,
|
||||
origin: &Origin,
|
||||
metadata: &ImportMetadata,
|
||||
) -> ureq::Request {
|
||||
let request = RequestWrapper(request);
|
||||
|
||||
let ImportMetadata { index_count, task_key, total_index_documents } = metadata;
|
||||
let Origin { remote_name: origin_remote, task_uid, network_version } = origin;
|
||||
let ImportData { remote_name: import_remote, index_name, document_count } = import_data;
|
||||
|
||||
let request = request
|
||||
.set_origin_remote(origin_remote)
|
||||
.set_origin_task_uid(*task_uid)
|
||||
.set_origin_network_version(*network_version)
|
||||
.set_import_remote(import_remote)
|
||||
.set_import_docs(*document_count)
|
||||
.set_import_index_count(*index_count)
|
||||
.set_import_index_docs(*total_index_documents);
|
||||
|
||||
let request = if let Some(index_name) = index_name.as_deref() {
|
||||
request.set_import_index(index_name)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
let RequestWrapper(request) = if let Some(task_key) = task_key {
|
||||
request.set_import_task_key(*task_key)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
request
|
||||
}
|
||||
|
||||
struct RequestWrapper(ureq::Request);
|
||||
impl headers::SetHeader for RequestWrapper {
|
||||
fn set_header(self, name: &str, value: &str) -> Self {
|
||||
Self(self.0.set(name, value))
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn send_buffer<'a>(
|
||||
buffer: &'a [u8],
|
||||
mut compressed_buffer: &'a mut Vec<u8>,
|
||||
must_stop_processing: &MustStopProcessing,
|
||||
agent: &ureq::Agent,
|
||||
documents_url: &'a str,
|
||||
remote_name: Option<&str>,
|
||||
bearer: Option<&'a str>,
|
||||
task_network: Option<&(ImportData, Origin, ImportMetadata)>,
|
||||
err: &'a impl Fn(milli::Error) -> crate::Error,
|
||||
) -> Result<ControlFlow<(), ()>> {
|
||||
// We compress the documents before sending them
|
||||
let mut encoder: GzEncoder<&mut &mut Vec<u8>> =
|
||||
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||
encoder.write_all(buffer).map_err(milli::Error::from).map_err(err)?;
|
||||
encoder.finish().map_err(milli::Error::from).map_err(err)?;
|
||||
|
||||
let res = retry(must_stop_processing, || {
|
||||
let mut request = agent.post(documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
request = request.set("Content-Encoding", "gzip");
|
||||
if let Some(bearer) = bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
if let Some((import_data, origin, metadata)) = task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
request.send_bytes(compressed_buffer).map_err(into_backoff_error)
|
||||
});
|
||||
|
||||
handle_response(remote_name, res)
|
||||
}
|
||||
|
||||
fn handle_response(remote_name: Option<&str>, res: Result<Response>) -> Result<ControlFlow<()>> {
|
||||
let remote_name = remote_name.unwrap_or("unnamed");
|
||||
match res {
|
||||
Ok(_response) => Ok(ControlFlow::Continue(())),
|
||||
Err(Error::FromRemoteWhenExporting { code, .. })
|
||||
if code == Code::ImportTaskAlreadyReceived.name() =>
|
||||
{
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
Err(Error::FromRemoteWhenExporting { code, message, .. })
|
||||
if code == Code::ImportTaskUnknownRemote.name() =>
|
||||
{
|
||||
tracing::warn!("remote `{remote_name}` answered with: {message}");
|
||||
Ok(ControlFlow::Break(()))
|
||||
}
|
||||
// note: there has already been many attempts to get this due to exponential backoff
|
||||
Err(Error::FromRemoteWhenExporting { code, message, .. })
|
||||
if code == Code::ImportTaskWithoutNetworkTask.name() =>
|
||||
{
|
||||
tracing::warn!("remote `{remote_name}` answered with: {message}");
|
||||
Ok(ControlFlow::Break(()))
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("error while exporting: {e}");
|
||||
Err(e)
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -593,65 +374,4 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||
}
|
||||
}
|
||||
|
||||
// export_one_index arguments
|
||||
pub(super) struct TargetInstance<'a> {
|
||||
pub(super) remote_name: Option<&'a str>,
|
||||
pub(super) base_url: &'a str,
|
||||
pub(super) api_key: Option<&'a str>,
|
||||
}
|
||||
|
||||
pub(super) struct ExportOptions<'a> {
|
||||
pub(super) index_uid: &'a str,
|
||||
pub(super) payload_size: Option<&'a Byte>,
|
||||
pub(super) override_settings: bool,
|
||||
pub(super) export_mode: ExportMode<'a>,
|
||||
}
|
||||
|
||||
impl ExportOptions<'_> {
|
||||
fn task_network(
|
||||
&self,
|
||||
total_index_documents: u64,
|
||||
) -> Option<(ImportData, Origin, ImportMetadata)> {
|
||||
if let ExportMode::NetworkBalancing {
|
||||
index_count,
|
||||
export_old_remote_name,
|
||||
network_change_origin,
|
||||
} = self.export_mode
|
||||
{
|
||||
Some((
|
||||
ImportData {
|
||||
remote_name: export_old_remote_name.to_string(),
|
||||
index_name: Some(self.index_uid.to_string()),
|
||||
document_count: 0,
|
||||
},
|
||||
network_change_origin.clone(),
|
||||
ImportMetadata { index_count, task_key: None, total_index_documents },
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct ExportContext<'a> {
|
||||
pub(super) index: &'a meilisearch_types::milli::Index,
|
||||
pub(super) index_rtxn: &'a milli::heed::RoTxn<'a>,
|
||||
pub(super) universe: &'a RoaringBitmap,
|
||||
pub(super) progress: &'a Progress,
|
||||
pub(super) agent: &'a ureq::Agent,
|
||||
pub(super) must_stop_processing: &'a MustStopProcessing,
|
||||
}
|
||||
|
||||
pub(super) enum ExportMode<'a> {
|
||||
ExportRoute,
|
||||
#[cfg_attr(not(feature = "enterprise"), allow(dead_code))]
|
||||
NetworkBalancing {
|
||||
index_count: u64,
|
||||
|
||||
export_old_remote_name: &'a str,
|
||||
network_change_origin: &'a Origin,
|
||||
},
|
||||
}
|
||||
|
||||
// progress related
|
||||
enum ExportIndex {}
|
||||
|
||||
@@ -8,7 +8,6 @@ use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use meilisearch_types::Index;
|
||||
@@ -37,7 +36,6 @@ impl IndexScheduler {
|
||||
operation: IndexOperation,
|
||||
progress: &Progress,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
network: &Network,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
let indexer_alloc = Bump::new();
|
||||
let started_processing_at = std::time::Instant::now();
|
||||
@@ -69,6 +67,8 @@ impl IndexScheduler {
|
||||
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||
|
||||
let network = self.network();
|
||||
|
||||
let shards = network.shards();
|
||||
|
||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||
@@ -504,7 +504,6 @@ impl IndexScheduler {
|
||||
},
|
||||
progress,
|
||||
embedder_stats.clone(),
|
||||
network,
|
||||
)?;
|
||||
|
||||
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||
@@ -513,7 +512,6 @@ impl IndexScheduler {
|
||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||
progress,
|
||||
embedder_stats,
|
||||
network,
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
|
||||
@@ -747,7 +747,6 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
@@ -783,7 +782,6 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
@@ -826,7 +824,6 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
@@ -870,7 +867,6 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
|
||||
@@ -112,7 +112,6 @@ impl IndexScheduler {
|
||||
max_number_of_batched_tasks: usize::MAX,
|
||||
batched_tasks_size_limit: u64::MAX,
|
||||
instance_features: Default::default(),
|
||||
export_default_payload_size_bytes: byte_unit::Byte::parse_str("20MiB", false).unwrap(),
|
||||
auto_upgrade: true, // Don't cost much and will ensure the happy path works
|
||||
embedding_cache_cap: 10,
|
||||
experimental_no_snapshot_compaction: false,
|
||||
|
||||
@@ -8,8 +8,6 @@ use tracing::info;
|
||||
use crate::queue::TaskQueue;
|
||||
use crate::versioning::Versioning;
|
||||
|
||||
mod v1_29;
|
||||
mod v1_30;
|
||||
trait UpgradeIndexScheduler {
|
||||
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()>;
|
||||
/// Whether the migration should be applied, depending on the initial version of the index scheduler before
|
||||
@@ -43,7 +41,6 @@ pub fn upgrade_index_scheduler(
|
||||
|
||||
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
|
||||
// List all upgrade functions to apply in order here.
|
||||
&v1_30::MigrateNetwork,
|
||||
];
|
||||
|
||||
let (initial_major, initial_minor, initial_patch) = initial_version;
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Env, RoTxn, WithoutTls};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::Result;
|
||||
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub sharding: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
|
||||
pub fn get_network(env: &Env<WithoutTls>, rtxn: &RoTxn) -> Result<Option<Network>> {
|
||||
let Some(network_db) =
|
||||
env.open_database::<Str, SerdeJson<Network>>(rtxn, Some(db_name::EXPERIMENTAL_FEATURES))?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(network_db.get(rtxn, db_keys::NETWORK)?)
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub leader: Option<String>,
|
||||
#[serde(default)]
|
||||
pub version: Uuid,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
|
||||
use super::v1_29;
|
||||
use crate::Result;
|
||||
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
|
||||
pub struct MigrateNetwork;
|
||||
|
||||
impl super::UpgradeIndexScheduler for MigrateNetwork {
|
||||
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()> {
|
||||
let Some(v1_29::Network { local, remotes, sharding }) = v1_29::get_network(env, wtxn)?
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let leader = if sharding { remotes.keys().next().cloned() } else { None };
|
||||
|
||||
let remotes = remotes
|
||||
.into_iter()
|
||||
.map(|(name, v1_29::Remote { url, search_api_key, write_api_key })| {
|
||||
(name, Remote { url, search_api_key, write_api_key })
|
||||
})
|
||||
.collect();
|
||||
|
||||
let network = Network { local, remotes, leader, version: Uuid::nil() };
|
||||
|
||||
set_network(env, wtxn, &network)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||
initial_version < (1, 30, 0)
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"updating the network struct"
|
||||
}
|
||||
}
|
||||
|
||||
fn set_network(env: &Env<WithoutTls>, wtxn: &mut RwTxn<'_>, network: &Network) -> Result<()> {
|
||||
let network_db =
|
||||
env.create_database::<Str, SerdeJson<Network>>(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
|
||||
|
||||
network_db.put(wtxn, db_keys::NETWORK, network)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -4,11 +4,9 @@ use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::{ChannelCongestion, DeCboRoaringBitmapCodec};
|
||||
use meilisearch_types::milli::CboRoaringBitmapCodec;
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{
|
||||
BatchStopReason, Details, IndexSwap, Kind, KindWithContent, Status,
|
||||
@@ -121,8 +119,17 @@ impl ProcessingBatch {
|
||||
self.stats.total_nb_tasks = 0;
|
||||
}
|
||||
|
||||
/// Update batch task from a processed task
|
||||
pub fn update_from_task(&mut self, task: &Task) {
|
||||
/// Update the timestamp of the tasks and the inner structure of this structure.
|
||||
pub fn update(&mut self, task: &mut Task) {
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state or that failed.
|
||||
task.batch_uid = Some(self.uid);
|
||||
// Same
|
||||
task.started_at = Some(self.started_at);
|
||||
task.finished_at = self.finished_at;
|
||||
|
||||
self.statuses.insert(task.status);
|
||||
|
||||
// Craft an aggregation of the details of all the tasks encountered in this batch.
|
||||
@@ -137,63 +144,6 @@ impl ProcessingBatch {
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the timestamp of the tasks after they're done
|
||||
pub fn finish_task(&self, task: &mut Task) {
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state or that failed.
|
||||
task.batch_uid = Some(self.uid);
|
||||
// Same
|
||||
task.started_at = Some(self.started_at);
|
||||
task.finished_at = self.finished_at;
|
||||
}
|
||||
|
||||
pub fn write_stats(
|
||||
&mut self,
|
||||
progress: &Progress,
|
||||
congestion: Option<ChannelCongestion>,
|
||||
pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
) {
|
||||
self.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
self.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
let mut congestion_info = serde_json::Map::new();
|
||||
congestion_info.insert("attempts".into(), congestion.attempts.into());
|
||||
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
|
||||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
self.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
use byte_unit::Byte;
|
||||
use byte_unit::UnitType::Binary;
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((
|
||||
dbname.to_case(Case::Camel),
|
||||
format!("{post:#.2} ({sign}{diff:#.2})").into(),
|
||||
))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
|
||||
pub fn to_batch(&self) -> Batch {
|
||||
Batch {
|
||||
uid: self.uid,
|
||||
@@ -211,7 +161,7 @@ impl ProcessingBatch {
|
||||
|
||||
pub(crate) fn insert_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
@@ -224,7 +174,7 @@ pub(crate) fn insert_task_datetime(
|
||||
|
||||
pub(crate) fn remove_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
@@ -243,7 +193,7 @@ pub(crate) fn remove_task_datetime(
|
||||
|
||||
pub(crate) fn remove_n_tasks_datetime_earlier_than(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
earlier_than: OffsetDateTime,
|
||||
mut count: usize,
|
||||
task_id: TaskId,
|
||||
@@ -271,7 +221,7 @@ pub(crate) fn remove_n_tasks_datetime_earlier_than(
|
||||
pub(crate) fn keep_ids_within_datetimes(
|
||||
rtxn: &RoTxn,
|
||||
ids: &mut RoaringBitmap,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
after: Option<OffsetDateTime>,
|
||||
before: Option<OffsetDateTime>,
|
||||
) -> Result<()> {
|
||||
@@ -336,7 +286,6 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
| K::DumpCreation { .. }
|
||||
| K::Export { .. }
|
||||
| K::UpgradeDatabase { .. }
|
||||
| K::NetworkTopologyChange(_)
|
||||
| K::SnapshotCreation => (),
|
||||
};
|
||||
if let Some(Details::IndexSwap { swaps }) = &mut task.details {
|
||||
@@ -678,9 +627,6 @@ impl crate::IndexScheduler {
|
||||
} => {
|
||||
assert_eq!(kind.as_kind(), Kind::IndexCompaction);
|
||||
}
|
||||
Details::NetworkTopologyChange { moved_documents: _, message: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::NetworkTopologyChange);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ license.workspace = true
|
||||
[dependencies]
|
||||
actix-web = { version = "4.12.0", default-features = false }
|
||||
anyhow = "1.0.100"
|
||||
base64 = "0.22.1"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
@@ -25,7 +24,6 @@ enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.5"
|
||||
fst = "0.4.7"
|
||||
itertools = "0.14.0"
|
||||
memmap2 = "0.9.9"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
@@ -43,7 +41,6 @@ time = { version = "0.3.44", features = [
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.48"
|
||||
urlencoding = "2.1.3"
|
||||
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
|
||||
@@ -9,17 +9,21 @@ use crate::network::Network;
|
||||
|
||||
impl Network {
|
||||
pub fn shards(&self) -> Option<Shards> {
|
||||
if self.sharding() {
|
||||
Some(Shards::from_remotes_local(
|
||||
self.remotes.keys().map(String::as_str),
|
||||
self.local.as_deref(),
|
||||
))
|
||||
if self.sharding {
|
||||
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
|
||||
let others = self
|
||||
.remotes
|
||||
.keys()
|
||||
.filter(|name| name.as_str() != this)
|
||||
.map(|name| name.to_owned())
|
||||
.collect();
|
||||
Some(Shards { own: vec![this.to_owned()], others })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sharding(&self) -> bool {
|
||||
self.leader.is_some()
|
||||
self.sharding
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ macro_rules! make_error_codes {
|
||||
}
|
||||
|
||||
/// return error name, used as error code
|
||||
pub fn name(&self) -> String {
|
||||
fn name(&self) -> String {
|
||||
match self {
|
||||
$(
|
||||
Code::$code_ident => stringify!($code_ident).to_case(convert_case::Case::Snake)
|
||||
@@ -214,9 +214,6 @@ ImmutableApiKeyUid , InvalidRequest , BAD_REQU
|
||||
ImmutableApiKeyUpdatedAt , InvalidRequest , BAD_REQUEST;
|
||||
ImmutableIndexCreatedAt , InvalidRequest , BAD_REQUEST;
|
||||
ImmutableIndexUpdatedAt , InvalidRequest , BAD_REQUEST;
|
||||
ImportTaskAlreadyReceived , InvalidRequest , PRECONDITION_FAILED;
|
||||
ImportTaskUnknownRemote , InvalidRequest , PRECONDITION_FAILED;
|
||||
ImportTaskWithoutNetworkTask , InvalidRequest , SERVICE_UNAVAILABLE;
|
||||
IndexAlreadyExists , InvalidRequest , CONFLICT ;
|
||||
IndexCreationFailed , Internal , INTERNAL_SERVER_ERROR;
|
||||
IndexNotFound , InvalidRequest , NOT_FOUND;
|
||||
@@ -273,9 +270,9 @@ InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQU
|
||||
InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkLeader , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -380,9 +377,7 @@ MissingPayload , InvalidRequest , BAD_REQU
|
||||
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
|
||||
NetworkVersionMismatch , InvalidRequest , PRECONDITION_FAILED ;
|
||||
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
|
||||
NotLeader , InvalidRequest , BAD_REQUEST ;
|
||||
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
|
||||
RemoteBadResponse , System , BAD_GATEWAY ;
|
||||
RemoteBadRequest , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -396,9 +391,6 @@ TaskFileNotFound , InvalidRequest , NOT_FOUN
|
||||
BatchNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
|
||||
TooManyVectors , InvalidRequest , BAD_REQUEST ;
|
||||
UnexpectedNetworkPreviousRemotes , InvalidRequest , BAD_REQUEST ;
|
||||
NetworkVersionTooOld , InvalidRequest , BAD_REQUEST ;
|
||||
UnprocessedNetworkTask , InvalidRequest , BAD_REQUEST ;
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -11,9 +10,7 @@ pub struct Network {
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub leader: Option<String>,
|
||||
#[serde(default)]
|
||||
pub version: Uuid,
|
||||
pub sharding: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
|
||||
@@ -9,12 +9,12 @@ use utoipa::ToSchema;
|
||||
use crate::batches::BatchId;
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::network::DbTaskNetwork;
|
||||
use crate::tasks::{
|
||||
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
|
||||
TaskNetwork,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
@@ -54,7 +54,7 @@ pub struct TaskView {
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<DbTaskNetwork>,
|
||||
pub network: Option<TaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
@@ -151,11 +151,6 @@ pub struct DetailsView {
|
||||
pub pre_compaction_size: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub post_compaction_size: Option<String>,
|
||||
// network topology change
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub moved_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub message: Option<String>,
|
||||
}
|
||||
|
||||
impl DetailsView {
|
||||
@@ -166,17 +161,6 @@ impl DetailsView {
|
||||
(None, Some(doc)) | (Some(doc), None) => Some(doc),
|
||||
(Some(left), Some(right)) => Some(left + right),
|
||||
},
|
||||
moved_documents: match (self.moved_documents, other.moved_documents) {
|
||||
(None, None) => None,
|
||||
(None, Some(doc)) | (Some(doc), None) => Some(doc),
|
||||
(Some(left), Some(right)) => Some(left + right),
|
||||
},
|
||||
message: match (&mut self.message, &other.message) {
|
||||
(None, None) => None,
|
||||
(None, Some(message)) => Some(message.clone()),
|
||||
(Some(message), None) => Some(std::mem::take(message)),
|
||||
(Some(message), Some(_)) => Some(std::mem::take(message)),
|
||||
},
|
||||
indexed_documents: match (self.indexed_documents, other.indexed_documents) {
|
||||
(None, None) => None,
|
||||
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
|
||||
@@ -467,11 +451,6 @@ impl From<Details> for DetailsView {
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
Details::NetworkTopologyChange { moved_documents, message } => DetailsView {
|
||||
moved_documents: Some(moved_documents),
|
||||
message: Some(message),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,6 @@ use crate::{versioning, InstanceUid};
|
||||
|
||||
pub type TaskId = u32;
|
||||
|
||||
pub mod network;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Task {
|
||||
@@ -46,7 +44,7 @@ pub struct Task {
|
||||
pub kind: KindWithContent,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<network::DbTaskNetwork>,
|
||||
pub network: Option<TaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
@@ -63,7 +61,6 @@ impl Task {
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| NetworkTopologyChange { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentEdition { index_uid, .. }
|
||||
@@ -102,7 +99,6 @@ impl Task {
|
||||
| KindWithContent::SnapshotCreation
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::NetworkTopologyChange { .. }
|
||||
| KindWithContent::IndexCompaction { .. } => None,
|
||||
}
|
||||
}
|
||||
@@ -182,7 +178,6 @@ pub enum KindWithContent {
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
NetworkTopologyChange(network::NetworkTopologyChange),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
@@ -220,7 +215,6 @@ impl KindWithContent {
|
||||
KindWithContent::Export { .. } => Kind::Export,
|
||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||
KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction,
|
||||
KindWithContent::NetworkTopologyChange { .. } => Kind::NetworkTopologyChange,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -233,7 +227,6 @@ impl KindWithContent {
|
||||
| TaskCancelation { .. }
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| NetworkTopologyChange { .. }
|
||||
| UpgradeDatabase { .. } => vec![],
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentEdition { index_uid, .. }
|
||||
@@ -347,10 +340,6 @@ impl KindWithContent {
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
KindWithContent::NetworkTopologyChange { .. } => Some(Details::NetworkTopologyChange {
|
||||
moved_documents: 0,
|
||||
message: "processing tasks for previous network versions".into(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -403,7 +392,7 @@ impl KindWithContent {
|
||||
})
|
||||
}
|
||||
KindWithContent::IndexSwap { .. } => {
|
||||
unimplemented!("do not call `default_finished_details` for `IndexSwap` tasks")
|
||||
todo!()
|
||||
}
|
||||
KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation {
|
||||
matched_tasks: tasks.len(),
|
||||
@@ -438,9 +427,6 @@ impl KindWithContent {
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change) => {
|
||||
Some(network_topology_change.to_details())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -508,9 +494,6 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change) => {
|
||||
Some(network_topology_change.to_details())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -622,7 +605,6 @@ pub enum Kind {
|
||||
Export,
|
||||
UpgradeDatabase,
|
||||
IndexCompaction,
|
||||
NetworkTopologyChange,
|
||||
}
|
||||
|
||||
impl Kind {
|
||||
@@ -642,7 +624,6 @@ impl Kind {
|
||||
| Kind::DumpCreation
|
||||
| Kind::Export
|
||||
| Kind::UpgradeDatabase
|
||||
| Kind::NetworkTopologyChange
|
||||
| Kind::SnapshotCreation => false,
|
||||
}
|
||||
}
|
||||
@@ -665,7 +646,6 @@ impl Display for Kind {
|
||||
Kind::Export => write!(f, "export"),
|
||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||
Kind::IndexCompaction => write!(f, "indexCompaction"),
|
||||
Kind::NetworkTopologyChange => write!(f, "networkTopologyChange"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -703,8 +683,6 @@ impl FromStr for Kind {
|
||||
Ok(Kind::UpgradeDatabase)
|
||||
} else if kind.eq_ignore_ascii_case("indexCompaction") {
|
||||
Ok(Kind::IndexCompaction)
|
||||
} else if kind.eq_ignore_ascii_case("networkTopologyChange") {
|
||||
Ok(Kind::NetworkTopologyChange)
|
||||
} else {
|
||||
Err(ParseTaskKindError(kind.to_owned()))
|
||||
}
|
||||
@@ -795,10 +773,36 @@ pub enum Details {
|
||||
pre_compaction_size: Option<Byte>,
|
||||
post_compaction_size: Option<Byte>,
|
||||
},
|
||||
NetworkTopologyChange {
|
||||
moved_documents: u64,
|
||||
message: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(untagged, rename_all = "camelCase")]
|
||||
pub enum TaskNetwork {
|
||||
Origin { origin: Origin },
|
||||
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
|
||||
}
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Origin {
|
||||
pub remote_name: String,
|
||||
pub task_uid: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RemoteTask {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
task_uid: Option<TaskId>,
|
||||
error: Option<ResponseError>,
|
||||
}
|
||||
|
||||
impl From<Result<TaskId, ResponseError>> for RemoteTask {
|
||||
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
|
||||
match res {
|
||||
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
|
||||
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
@@ -841,9 +845,6 @@ impl Details {
|
||||
| Self::Export { .. }
|
||||
| Self::UpgradeDatabase { .. }
|
||||
| Self::IndexSwap { .. } => (),
|
||||
Self::NetworkTopologyChange { moved_documents: _, message } => {
|
||||
*message = format!("Failed. Previous status: {}", message);
|
||||
}
|
||||
}
|
||||
|
||||
details
|
||||
@@ -899,17 +900,6 @@ pub enum BatchStopReason {
|
||||
SettingsWithDocumentOperation {
|
||||
id: TaskId,
|
||||
},
|
||||
NetworkTask {
|
||||
id: TaskId,
|
||||
},
|
||||
NetworkTaskOlderTasks {
|
||||
id: TaskId,
|
||||
inner_reason: String,
|
||||
},
|
||||
NetworkTaskImportTasks {
|
||||
id: TaskId,
|
||||
inner_reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchStopReason {
|
||||
@@ -998,24 +988,6 @@ impl Display for BatchStopReason {
|
||||
"stopped before task with id {id} because it is a document operation which cannot be batched with settings changes"
|
||||
)
|
||||
}
|
||||
BatchStopReason::NetworkTask { id } => {
|
||||
write!(
|
||||
f,
|
||||
"stopped after task with id {id} because it is a network topology change task"
|
||||
)
|
||||
}
|
||||
BatchStopReason::NetworkTaskOlderTasks { id, inner_reason } => {
|
||||
write!(
|
||||
f,
|
||||
"stopped after batching network task with id {id} and a batch of older tasks: {inner_reason}"
|
||||
)
|
||||
}
|
||||
BatchStopReason::NetworkTaskImportTasks { id, inner_reason } => {
|
||||
write!(
|
||||
f,
|
||||
"stopped after batching network task with id {id} and a batch of import tasks: {inner_reason}"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,783 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use base64::Engine as _;
|
||||
use itertools::{EitherOrBoth, Itertools as _};
|
||||
use milli::{DeCboRoaringBitmapCodec, DocumentId};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::network::Network;
|
||||
use crate::tasks::{Details, TaskId};
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
mod community_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
mod enterprise_edition;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(untagged, rename_all = "camelCase")]
|
||||
// This type is used in the database, care should be taken when modifying it.
|
||||
pub enum DbTaskNetwork {
|
||||
/// Tasks that were duplicated from `origin`
|
||||
Origin { origin: Origin },
|
||||
/// Tasks that were duplicated as `remote_tasks`
|
||||
Remotes {
|
||||
remote_tasks: BTreeMap<String, RemoteTask>,
|
||||
#[serde(default)]
|
||||
network_version: Uuid,
|
||||
},
|
||||
/// Document import tasks sent in the context of `network_change`
|
||||
Import { import_from: ImportData, network_change: Origin },
|
||||
}
|
||||
|
||||
impl DbTaskNetwork {
|
||||
pub fn network_version(&self) -> Uuid {
|
||||
match self {
|
||||
DbTaskNetwork::Origin { origin } => origin.network_version,
|
||||
DbTaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version,
|
||||
DbTaskNetwork::Import { import_from: _, network_change } => {
|
||||
network_change.network_version
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn import_data(&self) -> Option<&ImportData> {
|
||||
match self {
|
||||
DbTaskNetwork::Origin { .. } | DbTaskNetwork::Remotes { .. } => None,
|
||||
DbTaskNetwork::Import { import_from, .. } => Some(import_from),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn origin(&self) -> Option<&Origin> {
|
||||
match self {
|
||||
DbTaskNetwork::Origin { origin } => Some(origin),
|
||||
DbTaskNetwork::Remotes { .. } => None,
|
||||
DbTaskNetwork::Import { network_change, .. } => Some(network_change),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum TaskNetwork {
|
||||
/// Tasks that were duplicated from `origin`
|
||||
Origin { origin: Origin },
|
||||
/// Tasks that were duplicated as `remote_tasks`
|
||||
Remotes { remote_tasks: BTreeMap<String, RemoteTask>, network_version: Uuid },
|
||||
/// Document import tasks sent in the context of `network_change`
|
||||
Import { import_from: ImportData, network_change: Origin, metadata: ImportMetadata },
|
||||
}
|
||||
|
||||
impl TaskNetwork {
|
||||
pub fn network_version(&self) -> Uuid {
|
||||
match self {
|
||||
TaskNetwork::Origin { origin } => origin.network_version,
|
||||
TaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version,
|
||||
TaskNetwork::Import { import_from: _, network_change, metadata: _ } => {
|
||||
network_change.network_version
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TaskNetwork> for DbTaskNetwork {
|
||||
fn from(value: TaskNetwork) -> Self {
|
||||
match value {
|
||||
TaskNetwork::Origin { origin } => DbTaskNetwork::Origin { origin },
|
||||
TaskNetwork::Remotes { remote_tasks, network_version } => {
|
||||
DbTaskNetwork::Remotes { remote_tasks, network_version }
|
||||
}
|
||||
TaskNetwork::Import { import_from, network_change, metadata: _ } => {
|
||||
DbTaskNetwork::Import { import_from, network_change }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Origin {
|
||||
pub remote_name: String,
|
||||
pub task_uid: u32,
|
||||
#[serde(default)]
|
||||
pub network_version: Uuid,
|
||||
}
|
||||
|
||||
/// Import data stored in a task
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ImportData {
|
||||
/// Remote that this task is imported from
|
||||
pub remote_name: String,
|
||||
/// Index relevant to this task
|
||||
pub index_name: Option<String>,
|
||||
/// Number of documents in this task
|
||||
pub document_count: u64,
|
||||
}
|
||||
|
||||
/// Import metadata associated with a task but not stored in the task
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ImportMetadata {
|
||||
/// Total number of indexes to import from this host
|
||||
pub index_count: u64,
|
||||
/// Key unique to this (network_change, index, host, key).
|
||||
///
|
||||
/// In practice, an internal document id of one of the documents to import.
|
||||
pub task_key: Option<DocumentId>,
|
||||
/// Total number of documents to import for this index from this host.
|
||||
pub total_index_documents: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RemoteTask {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
task_uid: Option<TaskId>,
|
||||
error: Option<ResponseError>,
|
||||
}
|
||||
|
||||
impl From<Result<TaskId, ResponseError>> for RemoteTask {
|
||||
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
|
||||
match res {
|
||||
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
|
||||
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Contains the full state of a network topology change.
|
||||
///
|
||||
/// A network topology change task is unique in that it can be processed in multiple different batches, as its resolution
|
||||
/// depends on various document additions tasks being processed.
|
||||
///
|
||||
/// A network topology task has 4 states:
|
||||
///
|
||||
/// 1. Processing any task that was meant for an earlier version of the network. This is necessary to know that we have the right version of
|
||||
/// documents.
|
||||
/// 2. Sending all documents that must be moved to other remotes.
|
||||
/// 3. Processing any task coming from the remotes.
|
||||
/// 4. Finished.
|
||||
///
|
||||
/// Furthermore, it maintains some stats
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NetworkTopologyChange {
|
||||
state: NetworkTopologyState,
|
||||
in_remotes: BTreeMap<String, InRemote>,
|
||||
old_network: Network,
|
||||
new_network: Network,
|
||||
stats: NetworkTopologyStats,
|
||||
}
|
||||
|
||||
impl NetworkTopologyChange {
|
||||
pub fn new(old_network: Network, new_network: Network) -> Self {
|
||||
let in_name = new_network.local.as_deref();
|
||||
let out_name = old_network.local.as_deref().or(in_name);
|
||||
|
||||
let in_remotes = if in_name.is_some() {
|
||||
old_network
|
||||
.remotes
|
||||
.keys()
|
||||
.chain(new_network.remotes.keys())
|
||||
// don't await imports from ourselves
|
||||
.filter(|name| Some(name.as_str()) != out_name)
|
||||
.cloned()
|
||||
.map(|name| (name, InRemote::new()))
|
||||
.collect()
|
||||
} else {
|
||||
Default::default()
|
||||
};
|
||||
Self {
|
||||
state: NetworkTopologyState::WaitingForOlderTasks,
|
||||
in_remotes,
|
||||
stats: NetworkTopologyStats { moved_documents: 0 },
|
||||
new_network,
|
||||
old_network,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn in_name(&self) -> Option<&str> {
|
||||
self.new_network.local.as_deref()
|
||||
}
|
||||
|
||||
pub fn out_name(&self) -> Option<&str> {
|
||||
self.old_network.local.as_deref().or_else(|| self.in_name())
|
||||
}
|
||||
|
||||
pub fn state(&self) -> NetworkTopologyState {
|
||||
self.state
|
||||
}
|
||||
|
||||
pub fn to_details(&self) -> Details {
|
||||
let message = match self.state {
|
||||
NetworkTopologyState::WaitingForOlderTasks => {
|
||||
"Waiting for tasks enqueued before the network change to finish processing".into()
|
||||
}
|
||||
NetworkTopologyState::ExportingDocuments => "Exporting documents".into(),
|
||||
NetworkTopologyState::ImportingDocuments => {
|
||||
let mut finished_count = 0;
|
||||
let mut first_ongoing = None;
|
||||
let mut ongoing_total_indexes = 0;
|
||||
let mut ongoing_processed_documents = 0;
|
||||
let mut ongoing_missing_documents = 0;
|
||||
let mut ongoing_total_documents = 0;
|
||||
let mut other_ongoing_count = 0;
|
||||
let mut first_waiting = None;
|
||||
let mut other_waiting_count = 0;
|
||||
for (remote_name, in_remote) in &self.in_remotes {
|
||||
match &in_remote.import_state {
|
||||
ImportState::WaitingForInitialTask => {
|
||||
first_waiting = match first_waiting {
|
||||
None => Some(remote_name),
|
||||
first_waiting => {
|
||||
other_waiting_count += 1;
|
||||
first_waiting
|
||||
}
|
||||
};
|
||||
}
|
||||
ImportState::Ongoing { import_index_state, total_indexes } => {
|
||||
first_ongoing = match first_ongoing {
|
||||
None => {
|
||||
ongoing_total_indexes = *total_indexes;
|
||||
Some(remote_name)
|
||||
}
|
||||
first_ongoing => {
|
||||
other_ongoing_count += 1;
|
||||
first_ongoing
|
||||
}
|
||||
};
|
||||
for import_state in import_index_state.values() {
|
||||
match import_state {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
processed_documents,
|
||||
received_documents,
|
||||
task_keys: _,
|
||||
} => {
|
||||
ongoing_total_documents += total_documents;
|
||||
ongoing_processed_documents += processed_documents;
|
||||
ongoing_missing_documents +=
|
||||
total_documents.saturating_sub(*received_documents);
|
||||
}
|
||||
ImportIndexState::Finished { total_documents } => {
|
||||
ongoing_total_documents += total_documents;
|
||||
ongoing_processed_documents += total_documents;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportState::Finished { total_indexes, total_documents } => {
|
||||
finished_count += 1;
|
||||
ongoing_total_indexes = *total_indexes;
|
||||
ongoing_total_documents += *total_documents;
|
||||
ongoing_processed_documents += *total_documents;
|
||||
}
|
||||
}
|
||||
}
|
||||
format!(
|
||||
"Importing documents from {total} remotes{waiting}{ongoing}{finished}",
|
||||
total = self.in_remotes.len(),
|
||||
waiting = if let Some(first_waiting) = first_waiting {
|
||||
format!(
|
||||
", waiting on first task from `{}`{others}",
|
||||
first_waiting,
|
||||
others = if other_waiting_count > 0 {
|
||||
format!(" and {other_waiting_count} other remotes")
|
||||
} else {
|
||||
"".into()
|
||||
}
|
||||
)
|
||||
} else {
|
||||
"".into()
|
||||
},
|
||||
ongoing = if let Some(first_ongoing) = first_ongoing {
|
||||
format!(", awaiting {ongoing_missing_documents} and processed {ongoing_processed_documents} out of {ongoing_total_documents} documents in {ongoing_total_indexes} indexes from `{first_ongoing}`{others}",
|
||||
others=if other_ongoing_count > 0 {format!(" and {other_ongoing_count} other remotes")} else {"".into()})
|
||||
} else {
|
||||
"".into()
|
||||
},
|
||||
finished = if finished_count >= 0 {
|
||||
format!(", {finished_count} remotes finished processing")
|
||||
} else {
|
||||
"".into()
|
||||
}
|
||||
)
|
||||
}
|
||||
NetworkTopologyState::Finished => "Finished".into(),
|
||||
};
|
||||
Details::NetworkTopologyChange { moved_documents: self.stats.moved_documents, message }
|
||||
}
|
||||
|
||||
pub fn merge(&mut self, other: NetworkTopologyChange) {
|
||||
// The topology change has a guarantee of forward progress, so for each field we're going to keep the "most advanced" values.
|
||||
let Self { state, new_network: _, old_network: _, in_remotes, stats } = self;
|
||||
|
||||
*state = Ord::max(*state, other.state);
|
||||
*stats = Ord::max(*stats, other.stats);
|
||||
|
||||
for (old_value, new_value) in other.in_remotes.into_values().zip(in_remotes.values_mut()) {
|
||||
new_value.import_state = match (old_value.import_state, std::mem::take(&mut new_value.import_state)) {
|
||||
// waiting for initial task is always older
|
||||
(ImportState::WaitingForInitialTask, newer)
|
||||
| (newer, ImportState::WaitingForInitialTask)
|
||||
|
||||
// finished is always newer
|
||||
| (_, newer @ ImportState::Finished { .. })
|
||||
| (newer @ ImportState::Finished { .. }, _) => newer,
|
||||
(
|
||||
ImportState::Ongoing { import_index_state: left_import, total_indexes: left_total_indexes },
|
||||
ImportState::Ongoing { import_index_state: right_import, total_indexes: right_total_indexes },
|
||||
) => {
|
||||
let import_index_state = left_import.into_iter().merge_join_by(right_import.into_iter(), |(k,_), (x, _)|k.cmp(x)).map(|eob|
|
||||
match eob {
|
||||
EitherOrBoth::Both((name, left), (_, right)) => {
|
||||
let newer = merge_import_index_state(left, right);
|
||||
(name, newer)
|
||||
},
|
||||
EitherOrBoth::Left(import) |
|
||||
EitherOrBoth::Right(import) => import,
|
||||
}
|
||||
).collect();
|
||||
|
||||
ImportState::Ongoing{ import_index_state, total_indexes : u64::max(left_total_indexes, right_total_indexes) }
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn network_for_state(&self) -> &Network {
|
||||
match self.state {
|
||||
NetworkTopologyState::WaitingForOlderTasks => &self.old_network,
|
||||
NetworkTopologyState::ExportingDocuments
|
||||
| NetworkTopologyState::ImportingDocuments
|
||||
| NetworkTopologyState::Finished => &self.new_network,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_import_index_state(left: ImportIndexState, right: ImportIndexState) -> ImportIndexState {
|
||||
match (left, right) {
|
||||
(_, newer @ ImportIndexState::Finished { .. }) => newer,
|
||||
(newer @ ImportIndexState::Finished { .. }, _) => newer,
|
||||
(
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents: left_total_documents,
|
||||
received_documents: left_received_documents,
|
||||
processed_documents: left_processed_documents,
|
||||
task_keys: mut left_task_keys,
|
||||
},
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents: right_total_documents,
|
||||
received_documents: right_received_documents,
|
||||
processed_documents: right_processed_documents,
|
||||
task_keys: right_task_keys,
|
||||
},
|
||||
) => {
|
||||
let total_documents = u64::max(left_total_documents, right_total_documents);
|
||||
let received_documents = u64::max(left_received_documents, right_received_documents);
|
||||
let processed_documents = u64::max(left_processed_documents, right_processed_documents);
|
||||
left_task_keys.0 |= &right_task_keys.0;
|
||||
let task_keys = left_task_keys;
|
||||
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents,
|
||||
processed_documents,
|
||||
task_keys,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum NetworkTopologyState {
|
||||
WaitingForOlderTasks,
|
||||
ExportingDocuments,
|
||||
ImportingDocuments,
|
||||
Finished,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NetworkTopologyStats {
|
||||
#[serde(default)]
|
||||
pub moved_documents: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct InRemote {
|
||||
import_state: ImportState,
|
||||
}
|
||||
|
||||
impl InRemote {
|
||||
pub fn new() -> Self {
|
||||
Self { import_state: ImportState::WaitingForInitialTask }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum ImportState {
|
||||
/// Initially Meilisearch doesn't know how many documents it should expect from a remote.
|
||||
/// Any task from each remote contains the information of how many indexes will be imported,
|
||||
/// and the number of documents to import for the index of the task.
|
||||
#[default]
|
||||
WaitingForInitialTask,
|
||||
Ongoing {
|
||||
import_index_state: BTreeMap<String, ImportIndexState>,
|
||||
total_indexes: u64,
|
||||
},
|
||||
Finished {
|
||||
total_indexes: u64,
|
||||
total_documents: u64,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum ImportIndexState {
|
||||
Ongoing {
|
||||
total_documents: u64,
|
||||
received_documents: u64,
|
||||
processed_documents: u64,
|
||||
task_keys: TaskKeys,
|
||||
},
|
||||
Finished {
|
||||
total_documents: u64,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TaskKeys(pub RoaringBitmap);
|
||||
|
||||
impl Serialize for TaskKeys {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
let TaskKeys(task_keys) = self;
|
||||
let mut bytes = Vec::new();
|
||||
// TODO correctly handle this io::Error
|
||||
DeCboRoaringBitmapCodec::serialize_into(task_keys, &mut bytes).unwrap();
|
||||
let encoded = base64::prelude::BASE64_STANDARD.encode(&bytes);
|
||||
serializer.serialize_str(&encoded)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for TaskKeys {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_str(TaskKeysVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
struct TaskKeysVisitor;
|
||||
impl<'de> serde::de::Visitor<'de> for TaskKeysVisitor {
|
||||
type Value = TaskKeys;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
formatter.write_str("a base64 encoded cbo roaring bitmap")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, encoded: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
let decoded = base64::prelude::BASE64_STANDARD.decode(encoded).map_err(|_err| {
|
||||
E::invalid_value(serde::de::Unexpected::Str(encoded), &"a base64 string")
|
||||
})?;
|
||||
self.visit_bytes(&decoded)
|
||||
}
|
||||
|
||||
fn visit_bytes<E>(self, decoded: &[u8]) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
let task_keys = DeCboRoaringBitmapCodec::deserialize_from(decoded).map_err(|_err| {
|
||||
E::invalid_value(serde::de::Unexpected::Bytes(decoded), &"a cbo roaring bitmap")
|
||||
})?;
|
||||
Ok(TaskKeys(task_keys))
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ReceiveTaskError {
|
||||
UnknownRemote(String),
|
||||
DuplicateTask(DocumentId),
|
||||
}
|
||||
|
||||
pub mod headers {
|
||||
use std::borrow::Cow;
|
||||
use std::num::ParseIntError;
|
||||
use std::string::FromUtf8Error;
|
||||
|
||||
use milli::DocumentId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::tasks::TaskId;
|
||||
|
||||
/// Implement on response types to extract header values
|
||||
pub trait GetHeader: Sized {
|
||||
type Error: std::fmt::Debug + std::fmt::Display;
|
||||
fn get_header(&self, name: &str) -> Result<Option<&str>, Self::Error>;
|
||||
|
||||
fn get_origin_remote(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_REMOTE_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
|
||||
inner,
|
||||
header: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
})?))
|
||||
}
|
||||
|
||||
fn get_origin_task_uid(&self) -> Result<Option<TaskId>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_TASK_UID_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_ORIGIN_TASK_UID_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_origin_network_version(&self) -> Result<Option<Uuid>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_NETWORK_VERSION_HEADER)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_ORIGIN_NETWORK_VERSION_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseUuid {
|
||||
inner,
|
||||
header: PROXY_ORIGIN_NETWORK_VERSION_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_remote(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_REMOTE_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
|
||||
inner,
|
||||
header: PROXY_IMPORT_REMOTE_HEADER,
|
||||
})?))
|
||||
}
|
||||
|
||||
fn get_import_index_count(&self) -> Result<Option<u64>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_INDEX_COUNT_HEADER)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_INDEX_COUNT_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_INDEX_COUNT_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_index(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_INDEX_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
|
||||
inner,
|
||||
header: PROXY_IMPORT_INDEX_HEADER,
|
||||
})?))
|
||||
}
|
||||
|
||||
fn get_import_task_key(&self) -> Result<Option<DocumentId>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_TASK_KEY_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_TASK_KEY_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_TASK_KEY_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_docs(&self) -> Result<Option<u64>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_DOCS_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_DOCS_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_DOCS_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_index_docs(&self) -> Result<Option<u64>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement on query types to set header values
|
||||
pub trait SetHeader: Sized {
|
||||
fn set_header(self, name: &str, value: &str) -> Self;
|
||||
|
||||
fn set_origin_remote(self, value: &str) -> Self {
|
||||
let encoded = urlencoding::encode(value);
|
||||
set_header_and_legacy(self, PROXY_ORIGIN_REMOTE_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_origin_task_uid(self, value: TaskId) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_ORIGIN_TASK_UID_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_origin_network_version(self, value: Uuid) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_ORIGIN_NETWORK_VERSION_HEADER, &encoded)
|
||||
}
|
||||
fn set_import_remote(self, value: &str) -> Self {
|
||||
let encoded = urlencoding::encode(value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_REMOTE_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_index_count(self, value: u64) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_INDEX_COUNT_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_index(self, value: &str) -> Self {
|
||||
let encoded = urlencoding::encode(value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_INDEX_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_task_key(self, value: DocumentId) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_TASK_KEY_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_docs(self, value: u64) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_DOCS_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_index_docs(self, value: u64) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, &encoded)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum DecodeError<T: GetHeader> {
|
||||
#[error("while getting header: {inner}")]
|
||||
InResponse { inner: T::Error, header: &'static str },
|
||||
#[error("while url-decoding: {inner}")]
|
||||
UrlDecoding { inner: FromUtf8Error, header: &'static str },
|
||||
#[error("while parsing as an integer: {inner}")]
|
||||
ParseInt { inner: ParseIntError, header: &'static str },
|
||||
#[error("while parsing as a UUID: {inner}")]
|
||||
ParseUuid { inner: uuid::Error, header: &'static str },
|
||||
}
|
||||
|
||||
impl<T: GetHeader> DecodeError<T> {
|
||||
pub fn header(&self) -> &'static str {
|
||||
match self {
|
||||
DecodeError::InResponse { inner: _, header }
|
||||
| DecodeError::UrlDecoding { inner: _, header }
|
||||
| DecodeError::ParseInt { inner: _, header }
|
||||
| DecodeError::ParseUuid { inner: _, header } => header,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "X-Meili-Proxy-Origin-Remote";
|
||||
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "X-Meili-Proxy-Origin-TaskUid";
|
||||
pub const PROXY_ORIGIN_NETWORK_VERSION_HEADER: &str = "X-Meili-Proxy-Origin-Network-Version";
|
||||
pub const PROXY_IMPORT_REMOTE_HEADER: &str = "X-Meili-Proxy-Import-Remote";
|
||||
pub const PROXY_IMPORT_INDEX_COUNT_HEADER: &str = "X-Meili-Proxy-Import-Index-Count";
|
||||
pub const PROXY_IMPORT_INDEX_HEADER: &str = "X-Meili-Proxy-Import-Index";
|
||||
pub const PROXY_IMPORT_TASK_KEY_HEADER: &str = "X-Meili-Proxy-Import-Task-Key";
|
||||
pub const PROXY_IMPORT_DOCS_HEADER: &str = "X-Meili-Proxy-Import-Docs";
|
||||
pub const PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER: &str = "X-Meili-Proxy-Import-Total-Index-Docs";
|
||||
|
||||
fn get_header_and_legacy<'a, T: GetHeader>(
|
||||
t: &'a T,
|
||||
header: &'static str,
|
||||
) -> Result<Option<&'a str>, DecodeError<T>> {
|
||||
Ok(Some(
|
||||
if let Some(encoded) =
|
||||
t.get_header(header).map_err(|inner| DecodeError::InResponse { inner, header })?
|
||||
{
|
||||
encoded
|
||||
} else {
|
||||
let header = header.strip_prefix("X-").unwrap();
|
||||
let Some(encoded) = t
|
||||
.get_header(header)
|
||||
.map_err(|inner| DecodeError::InResponse { inner, header })?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
encoded
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
fn set_header_and_legacy<T: SetHeader>(t: T, name: &'static str, value: &str) -> T {
|
||||
let t = t.set_header(name, value);
|
||||
let name = name.strip_prefix("X-").unwrap();
|
||||
t.set_header(name, value)
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::DocumentId;
|
||||
|
||||
use crate::network::Remote;
|
||||
use crate::tasks::network::{ImportState, InRemote, NetworkTopologyChange, ReceiveTaskError};
|
||||
|
||||
impl NetworkTopologyChange {
|
||||
pub fn export_to_process(&self) -> Option<(&BTreeMap<String, Remote>, &str)> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn set_moved(&mut self, _moved_documents: u64) {}
|
||||
|
||||
pub fn update_state(&mut self) {}
|
||||
|
||||
pub fn receive_remote_task(
|
||||
&mut self,
|
||||
_remote_name: &str,
|
||||
_index_name: Option<&str>,
|
||||
_task_key: Option<DocumentId>,
|
||||
_document_count: u64,
|
||||
_total_indexes: u64,
|
||||
_total_index_documents: u64,
|
||||
) -> Result<(), ReceiveTaskError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn process_remote_tasks(
|
||||
&mut self,
|
||||
_remote_name: &str,
|
||||
_index_name: &str,
|
||||
_document_count: u64,
|
||||
) {
|
||||
}
|
||||
|
||||
pub fn is_import_finished(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl InRemote {
|
||||
pub fn is_finished(&self) -> bool {
|
||||
matches!(self.import_state, ImportState::Finished { .. })
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for InRemote {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
@@ -1,253 +0,0 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::update::new::indexer::current_edition::sharding::Shards;
|
||||
use milli::DocumentId;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::TaskKeys;
|
||||
use crate::network::Remote;
|
||||
use crate::tasks::network::{
|
||||
ImportIndexState, ImportState, InRemote, NetworkTopologyChange, NetworkTopologyState,
|
||||
ReceiveTaskError,
|
||||
};
|
||||
|
||||
impl NetworkTopologyChange {
|
||||
pub fn export_to_process(
|
||||
&self,
|
||||
) -> Option<(impl Iterator<Item = (&str, &Remote)> + Clone, &str)> {
|
||||
if self.state != NetworkTopologyState::ExportingDocuments {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.new_network.remotes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let out_name = self.out_name()?;
|
||||
Some((
|
||||
self.new_network.remotes.iter().filter_map(|(name, remote)| {
|
||||
// don't export to ourselves
|
||||
|
||||
(Some(name.as_str()) != self.in_name()).then_some((name.as_str(), remote))
|
||||
}),
|
||||
out_name,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn new_shards(&self) -> Option<Shards> {
|
||||
self.new_network.shards()
|
||||
}
|
||||
|
||||
pub fn set_moved(&mut self, moved_documents: u64) {
|
||||
self.stats.moved_documents = moved_documents;
|
||||
}
|
||||
|
||||
/// Compute the next state from the current state of the task.
|
||||
pub fn update_state(&mut self) {
|
||||
self.state = match self.state {
|
||||
NetworkTopologyState::WaitingForOlderTasks => {
|
||||
// no more older tasks, so finished waiting
|
||||
NetworkTopologyState::ExportingDocuments
|
||||
}
|
||||
NetworkTopologyState::ExportingDocuments => {
|
||||
// processed all exported documents
|
||||
if self.is_import_finished() {
|
||||
NetworkTopologyState::Finished
|
||||
} else {
|
||||
NetworkTopologyState::ImportingDocuments
|
||||
}
|
||||
}
|
||||
NetworkTopologyState::ImportingDocuments => {
|
||||
if self.is_import_finished() {
|
||||
NetworkTopologyState::Finished
|
||||
} else {
|
||||
NetworkTopologyState::ImportingDocuments
|
||||
}
|
||||
}
|
||||
NetworkTopologyState::Finished => NetworkTopologyState::Finished,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn receive_remote_task(
|
||||
&mut self,
|
||||
remote_name: &str,
|
||||
index_name: Option<&str>,
|
||||
task_key: Option<DocumentId>,
|
||||
document_count: u64,
|
||||
total_indexes: u64,
|
||||
total_index_documents: u64,
|
||||
) -> Result<(), ReceiveTaskError> {
|
||||
let remote = self
|
||||
.in_remotes
|
||||
.get_mut(remote_name)
|
||||
.ok_or_else(|| ReceiveTaskError::UnknownRemote(remote_name.to_string()))?;
|
||||
remote.import_state = match std::mem::take(&mut remote.import_state) {
|
||||
ImportState::WaitingForInitialTask => {
|
||||
if total_indexes == 0 {
|
||||
ImportState::Finished { total_indexes, total_documents: 0 }
|
||||
} else {
|
||||
let mut task_keys = RoaringBitmap::new();
|
||||
if let Some(index_name) = index_name {
|
||||
if let Some(task_key) = task_key {
|
||||
task_keys.insert(task_key);
|
||||
}
|
||||
let mut import_index_state = BTreeMap::new();
|
||||
import_index_state.insert(
|
||||
index_name.to_owned(),
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents: total_index_documents,
|
||||
received_documents: document_count,
|
||||
task_keys: TaskKeys(task_keys),
|
||||
processed_documents: 0,
|
||||
},
|
||||
);
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
} else {
|
||||
ImportState::WaitingForInitialTask
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportState::Ongoing { mut import_index_state, total_indexes } => {
|
||||
if let Some(index_name) = index_name {
|
||||
if let Some((index_name, mut index_state)) =
|
||||
import_index_state.remove_entry(index_name)
|
||||
{
|
||||
index_state = match index_state {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents: previously_received,
|
||||
processed_documents,
|
||||
mut task_keys,
|
||||
} => {
|
||||
if let Some(task_key) = task_key {
|
||||
if !task_keys.0.insert(task_key) {
|
||||
return Err(ReceiveTaskError::DuplicateTask(task_key));
|
||||
}
|
||||
}
|
||||
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents: previously_received + document_count,
|
||||
processed_documents,
|
||||
task_keys,
|
||||
}
|
||||
}
|
||||
ImportIndexState::Finished { total_documents } => {
|
||||
ImportIndexState::Finished { total_documents }
|
||||
}
|
||||
};
|
||||
import_index_state.insert(index_name, index_state);
|
||||
} else {
|
||||
let mut task_keys = RoaringBitmap::new();
|
||||
if let Some(task_key) = task_key {
|
||||
task_keys.insert(task_key);
|
||||
}
|
||||
let state = ImportIndexState::Ongoing {
|
||||
total_documents: total_index_documents,
|
||||
received_documents: document_count,
|
||||
processed_documents: 0,
|
||||
task_keys: TaskKeys(task_keys),
|
||||
};
|
||||
import_index_state.insert(index_name.to_string(), state);
|
||||
}
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
} else {
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
}
|
||||
}
|
||||
ImportState::Finished { total_indexes, total_documents } => {
|
||||
ImportState::Finished { total_indexes, total_documents }
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn process_remote_tasks(
|
||||
&mut self,
|
||||
remote_name: &str,
|
||||
index_name: &str,
|
||||
document_count: u64,
|
||||
) {
|
||||
let remote = self
|
||||
.in_remotes
|
||||
.get_mut(remote_name)
|
||||
.expect("process_remote_tasks called on a remote that is not in `in_remotes`");
|
||||
remote.import_state = match std::mem::take(&mut remote.import_state) {
|
||||
ImportState::WaitingForInitialTask => panic!("no task received yet one processed"),
|
||||
ImportState::Ongoing { mut import_index_state, total_indexes } => {
|
||||
let (index_name, mut index_state) =
|
||||
import_index_state.remove_entry(index_name).unwrap();
|
||||
index_state = match index_state {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents,
|
||||
processed_documents: previously_processed,
|
||||
task_keys,
|
||||
} => {
|
||||
let newly_processed_documents = previously_processed + document_count;
|
||||
if newly_processed_documents >= total_documents {
|
||||
ImportIndexState::Finished { total_documents }
|
||||
} else {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents,
|
||||
processed_documents: newly_processed_documents,
|
||||
task_keys,
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportIndexState::Finished { total_documents } => {
|
||||
ImportIndexState::Finished { total_documents }
|
||||
}
|
||||
};
|
||||
import_index_state.insert(index_name, index_state);
|
||||
if import_index_state.len() as u64 == total_indexes
|
||||
&& import_index_state.values().all(|index| index.is_finished())
|
||||
{
|
||||
let total_documents =
|
||||
import_index_state.values().map(|index| index.total_documents()).sum();
|
||||
ImportState::Finished { total_indexes, total_documents }
|
||||
} else {
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
}
|
||||
}
|
||||
ImportState::Finished { total_indexes, total_documents } => {
|
||||
ImportState::Finished { total_indexes, total_documents }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_import_finished(&self) -> bool {
|
||||
self.in_remotes.values().all(|remote| remote.is_finished())
|
||||
}
|
||||
}
|
||||
|
||||
impl InRemote {
|
||||
pub fn is_finished(&self) -> bool {
|
||||
matches!(self.import_state, ImportState::Finished { .. })
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for InRemote {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportIndexState {
|
||||
pub fn is_finished(&self) -> bool {
|
||||
matches!(self, ImportIndexState::Finished { .. })
|
||||
}
|
||||
|
||||
fn total_documents(&self) -> u64 {
|
||||
match *self {
|
||||
ImportIndexState::Ongoing { total_documents, .. }
|
||||
| ImportIndexState::Finished { total_documents } => total_documents,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -160,7 +160,7 @@ mini-dashboard = [
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
|
||||
enterprise = ["meilisearch-types/enterprise", "index-scheduler/enterprise"]
|
||||
enterprise = ["meilisearch-types/enterprise"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
korean = ["meilisearch-types/korean"]
|
||||
|
||||
@@ -300,7 +300,6 @@ impl Infos {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_disable_delta_encoding: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
|
||||
@@ -6,14 +6,10 @@ use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use meilisearch_types::tasks::network::headers::{
|
||||
PROXY_IMPORT_DOCS_HEADER, PROXY_IMPORT_INDEX_COUNT_HEADER, PROXY_IMPORT_INDEX_HEADER,
|
||||
PROXY_IMPORT_REMOTE_HEADER, PROXY_IMPORT_TASK_KEY_HEADER, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
|
||||
PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
};
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
@@ -97,58 +93,8 @@ pub enum MeilisearchHttpError {
|
||||
} else { PROXY_ORIGIN_TASK_UID_HEADER }
|
||||
)]
|
||||
InconsistentOriginHeaders { is_remote_missing: bool },
|
||||
#[error("Inconsistent `Import` headers: {remote}: {remote_status}, {index}: {index_status}, {docs}: {docs_status}.\n - Hint: either all three headers should be provided, or none of them",
|
||||
remote = PROXY_IMPORT_REMOTE_HEADER,
|
||||
remote_status = if *is_remote_missing { "missing" } else{ "provided" },
|
||||
index = PROXY_IMPORT_INDEX_HEADER,
|
||||
index_status = if *is_index_missing { "missing" } else { "provided" },
|
||||
docs = PROXY_IMPORT_DOCS_HEADER,
|
||||
docs_status = if *is_docs_missing { "missing" } else { "provided" }
|
||||
)]
|
||||
InconsistentImportHeaders {
|
||||
is_remote_missing: bool,
|
||||
is_index_missing: bool,
|
||||
is_docs_missing: bool,
|
||||
},
|
||||
#[error("Inconsistent `Import-Metadata` headers: {index_count}: {index_count_status}, {task_key}: {task_key_status}, {total_index_documents}: {total_index_documents_status}.\n - Hint: either all three headers should be provided, or none of them",
|
||||
index_count = PROXY_IMPORT_INDEX_COUNT_HEADER,
|
||||
index_count_status = if *is_index_count_missing { "missing" } else { "provided"},
|
||||
task_key = PROXY_IMPORT_TASK_KEY_HEADER,
|
||||
task_key_status = if *is_task_key_missing { "missing" } else { "provided"},
|
||||
total_index_documents = PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
|
||||
total_index_documents_status = if *is_total_index_documents_missing { "missing" } else { "provided"},
|
||||
)]
|
||||
InconsistentImportMetadataHeaders {
|
||||
is_index_count_missing: bool,
|
||||
is_task_key_missing: bool,
|
||||
is_total_index_documents_missing: bool,
|
||||
},
|
||||
|
||||
#[error(
|
||||
"Inconsistent task network headers: origin headers: {origin_status}, import headers: {import_status}, import metadata: {import_metadata_status}",
|
||||
origin_status = if *is_missing_origin { "missing"} else { "present" },
|
||||
import_status = if *is_missing_import { "missing"} else { "present" },
|
||||
import_metadata_status = if *is_missing_import_metadata { "missing"} else { "present" })]
|
||||
InconsistentTaskNetworkHeaders {
|
||||
is_missing_origin: bool,
|
||||
is_missing_import: bool,
|
||||
is_missing_import_metadata: bool,
|
||||
},
|
||||
#[error("Invalid value for header `{header_name}`: {msg}")]
|
||||
#[error("Invalid value for header {header_name}: {msg}")]
|
||||
InvalidHeaderValue { header_name: &'static str, msg: String },
|
||||
#[error("This remote is not the leader of the network.\n - Note: only the leader `{leader}` can receive new tasks.")]
|
||||
NotLeader { leader: String },
|
||||
#[error("Unexpected `previousRemotes` in network call.\n - Note: `previousRemote` is reserved for internal use.")]
|
||||
UnexpectedNetworkPreviousRemotes,
|
||||
#[error("The network version in request is too old.\n - Received: {received}\n - Expected at least: {expected_at_least}")]
|
||||
NetworkVersionTooOld { received: Uuid, expected_at_least: Uuid },
|
||||
#[error("Remote `{remote}` encountered an error: {error}")]
|
||||
RemoteIndexScheduler { remote: String, error: index_scheduler::Error },
|
||||
#[error("{if_remote}Already has a pending network task with uid {task_uid}.\n - Note: No network task can be registered while any previous network task is not done processing.\n - Hint: Wait for task {task_uid} to complete or cancel it.",
|
||||
if_remote=if let Some(remote) = remote {
|
||||
format!("Remote `{remote}` encountered an error: ")
|
||||
} else {"".into()} )]
|
||||
UnprocessedNetworkTask { remote: Option<String>, task_uid: meilisearch_types::tasks::TaskId },
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
@@ -176,7 +122,6 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
||||
MeilisearchHttpError::RemoteIndexScheduler { error, .. } => error.error_code(),
|
||||
MeilisearchHttpError::Milli { error, .. } => error.error_code(),
|
||||
MeilisearchHttpError::Payload(e) => e.error_code(),
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
@@ -197,19 +142,10 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::PersonalizationInFederatedQuery(_) => {
|
||||
Code::InvalidMultiSearchQueryPersonalization
|
||||
}
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. }
|
||||
| MeilisearchHttpError::InconsistentImportHeaders { .. }
|
||||
| MeilisearchHttpError::InconsistentImportMetadataHeaders { .. }
|
||||
| MeilisearchHttpError::InconsistentTaskNetworkHeaders { .. } => {
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
||||
Code::InconsistentDocumentChangeHeaders
|
||||
}
|
||||
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
|
||||
MeilisearchHttpError::NotLeader { .. } => Code::NotLeader,
|
||||
MeilisearchHttpError::UnexpectedNetworkPreviousRemotes => {
|
||||
Code::UnexpectedNetworkPreviousRemotes
|
||||
}
|
||||
MeilisearchHttpError::NetworkVersionTooOld { .. } => Code::NetworkVersionTooOld,
|
||||
MeilisearchHttpError::UnprocessedNetworkTask { .. } => Code::UnprocessedNetworkTask,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -233,14 +169,6 @@ impl From<aweb::error::PayloadError> for MeilisearchHttpError {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: meilisearch_types::tasks::network::headers::GetHeader>
|
||||
From<meilisearch_types::tasks::network::headers::DecodeError<T>> for MeilisearchHttpError
|
||||
{
|
||||
fn from(value: meilisearch_types::tasks::network::headers::DecodeError<T>) -> Self {
|
||||
Self::InvalidHeaderValue { header_name: value.header(), msg: value.to_string() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ActixPayloadError {
|
||||
#[error("The provided payload is incomplete and cannot be parsed")]
|
||||
|
||||
@@ -12,7 +12,6 @@ pub mod option;
|
||||
#[cfg(test)]
|
||||
mod option_test;
|
||||
pub mod personalization;
|
||||
pub mod proxy;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
pub mod search_queue;
|
||||
@@ -230,7 +229,6 @@ pub fn setup_meilisearch(
|
||||
autobatching_enabled: true,
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
export_default_payload_size_bytes: almost_as_big_as(opt.http_payload_size_limit),
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.map_or_else(
|
||||
|| {
|
||||
@@ -341,13 +339,6 @@ pub fn setup_meilisearch(
|
||||
Ok((index_scheduler, auth_controller))
|
||||
}
|
||||
|
||||
/// Returns the input - 1MiB, or at least 20MiB
|
||||
fn almost_as_big_as(input: byte_unit::Byte) -> byte_unit::Byte {
|
||||
let with_margin = input.subtract(byte_unit::Byte::MEBIBYTE);
|
||||
let at_least = byte_unit::Byte::MEBIBYTE.multiply(20).unwrap();
|
||||
with_margin.unwrap_or(at_least).max(at_least)
|
||||
}
|
||||
|
||||
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
|
||||
fn open_or_create_database_unchecked(
|
||||
opt: &Opt,
|
||||
|
||||
@@ -21,7 +21,6 @@ use meilisearch::{
|
||||
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use meilisearch_types::milli::heed_codec::DELTA_ENCODING_STATUS;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
@@ -96,14 +95,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
// Disables the delta encoding of bitmaps as soon as possible
|
||||
if opt.indexer_options.experimental_disable_delta_encoding {
|
||||
DELTA_ENCODING_STATUS.set_to_disabled()
|
||||
} else {
|
||||
DELTA_ENCODING_STATUS.set_to_enabled()
|
||||
}
|
||||
.expect("the delta-encoding status to be set only once");
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
anyhow::ensure!(
|
||||
|
||||
@@ -60,7 +60,6 @@ const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING: &str = "MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@@ -846,14 +845,6 @@ pub struct IndexerOpts {
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
|
||||
/// Experimental disable delta-encoding for bitmaps. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/875>
|
||||
///
|
||||
/// Enables the experimental disable delta-encoding for bitmaps feature.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING)]
|
||||
#[serde(default)]
|
||||
pub experimental_disable_delta_encoding: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
@@ -867,7 +858,6 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_disable_delta_encoding,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
@@ -905,12 +895,6 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_disable_delta_encoding {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING,
|
||||
experimental_disable_delta_encoding.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -926,7 +910,6 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_disable_delta_encoding: _, // managed in try_main
|
||||
} = other;
|
||||
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||
@@ -1262,7 +1245,7 @@ where
|
||||
T: AsRef<OsStr>,
|
||||
{
|
||||
if let Err(VarError::NotPresent) = std::env::var(key) {
|
||||
unsafe { std::env::set_var(key, value) }
|
||||
std::env::set_var(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
use std::fs::File;
|
||||
|
||||
use meilisearch_types::network::Remote;
|
||||
|
||||
pub enum Body<T, F>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
NdJsonPayload(File),
|
||||
Inline(T),
|
||||
Generated(T, F),
|
||||
None,
|
||||
}
|
||||
|
||||
impl Body<(), fn(&str, &Remote, &mut ())> {
|
||||
pub fn with_ndjson_payload(file: File) -> Self {
|
||||
Self::NdJsonPayload(file)
|
||||
}
|
||||
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Body<T, fn(&str, &Remote, &mut T)>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
{
|
||||
pub fn inline(payload: T) -> Self {
|
||||
Self::Inline(payload)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F> Body<T, F>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
pub fn generated(initial: T, f: F) -> Self {
|
||||
Self::Generated(initial, f)
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
use actix_web::HttpRequest;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use meilisearch_types::tasks::network::{DbTaskNetwork, TaskNetwork};
|
||||
use meilisearch_types::tasks::Task;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::proxy::Body;
|
||||
|
||||
pub fn task_network_and_check_leader_and_version(
|
||||
_req: &HttpRequest,
|
||||
_network: &Network,
|
||||
) -> Result<Option<TaskNetwork>, MeilisearchHttpError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub async fn proxy<T, F>(
|
||||
_index_scheduler: &IndexScheduler,
|
||||
_index_uid: Option<&str>,
|
||||
_req: &HttpRequest,
|
||||
_task_network: DbTaskNetwork,
|
||||
_network: Network,
|
||||
_body: Body<T, F>,
|
||||
task: &Task,
|
||||
) -> Result<Task, MeilisearchHttpError>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
Ok(task.clone())
|
||||
}
|
||||
@@ -1,618 +0,0 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::HttpRequest;
|
||||
use bytes::Bytes;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::network::Remote;
|
||||
use meilisearch_types::tasks::network::headers::{GetHeader, SetHeader};
|
||||
use meilisearch_types::tasks::network::{
|
||||
DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork,
|
||||
};
|
||||
use meilisearch_types::tasks::{Task, TaskId};
|
||||
use reqwest::{RequestBuilder, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::proxy::{Body, ProxyError, ReqwestErrorWithoutUrl};
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
mod timeouts {
|
||||
use std::sync::LazyLock;
|
||||
|
||||
pub static CONNECT_SECONDS: LazyLock<u64> =
|
||||
LazyLock::new(|| fetch_or_default("MEILI_EXPERIMENTAL_PROXY_CONNECT_TIMEOUT_SECONDS", 3));
|
||||
|
||||
pub static BACKOFF_SECONDS: LazyLock<u64> =
|
||||
LazyLock::new(|| fetch_or_default("MEILI_EXPERIMENTAL_PROXY_BACKOFF_TIMEOUT_SECONDS", 25));
|
||||
|
||||
pub static REQUEST_SECONDS: LazyLock<u64> =
|
||||
LazyLock::new(|| fetch_or_default("MEILI_EXPERIMENTAL_PROXY_REQUEST_TIMEOUT_SECONDS", 30));
|
||||
|
||||
fn fetch_or_default(key: &str, default: u64) -> u64 {
|
||||
match std::env::var(key) {
|
||||
Ok(timeout) => timeout.parse().unwrap_or_else(|_| {
|
||||
panic!("`{key}` environment variable is not parseable as an integer: {timeout}")
|
||||
}),
|
||||
Err(std::env::VarError::NotPresent) => default,
|
||||
Err(std::env::VarError::NotUnicode(_)) => {
|
||||
panic!("`{key}` environment variable is not set to a integer")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F> Body<T, F>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
pub fn into_bytes_iter(
|
||||
self,
|
||||
remotes: impl IntoIterator<Item = (String, Remote)>,
|
||||
) -> Result<
|
||||
impl Iterator<Item = (Option<Bytes>, (String, Remote))>,
|
||||
meilisearch_types::milli::Error,
|
||||
> {
|
||||
let bytes = match self {
|
||||
Body::NdJsonPayload(file) => {
|
||||
Some(Bytes::from_owner(unsafe { memmap2::Mmap::map(&file)? }))
|
||||
}
|
||||
|
||||
Body::Inline(payload) => {
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
|
||||
}
|
||||
|
||||
Body::None => None,
|
||||
|
||||
Body::Generated(mut initial, mut f) => {
|
||||
return Ok(either::Right(remotes.into_iter().map(move |(name, remote)| {
|
||||
f(&name, &remote, &mut initial);
|
||||
let bytes =
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&initial).unwrap()));
|
||||
(bytes, (name, remote))
|
||||
})));
|
||||
}
|
||||
};
|
||||
Ok(either::Left(std::iter::repeat(bytes).zip(remotes)))
|
||||
}
|
||||
|
||||
pub fn into_bytes(
|
||||
self,
|
||||
remote_name: &str,
|
||||
remote: &Remote,
|
||||
) -> Result<Option<Bytes>, meilisearch_types::milli::Error> {
|
||||
Ok(match self {
|
||||
Body::NdJsonPayload(file) => {
|
||||
Some(Bytes::from_owner(unsafe { memmap2::Mmap::map(&file)? }))
|
||||
}
|
||||
|
||||
Body::Inline(payload) => {
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
|
||||
}
|
||||
|
||||
Body::None => None,
|
||||
|
||||
Body::Generated(mut initial, mut f) => {
|
||||
f(remote_name, remote, &mut initial);
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&initial).unwrap()))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the header to determine if this task is a duplicate and originates with a remote.
|
||||
///
|
||||
/// If not, checks whether this remote is the leader and return `MeilisearchHttpError::NotLeader` if not.
|
||||
///
|
||||
/// If there is no leader, returns `Ok(None)`
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - `MeiliearchHttpError::NotLeader`: if the following are true simultaneously:
|
||||
/// 1. The task originates with the current node
|
||||
/// 2. There's a declared `leader`
|
||||
/// 3. The declared leader is **not** the current node
|
||||
/// - `MeilisearchHttpError::InvalidHeaderValue`: if headers cannot be parsed as a task network.
|
||||
/// - `MeilisearchHttpError::InconsistentTaskNetwork`: if only some of the headers are present.
|
||||
pub fn task_network_and_check_leader_and_version(
|
||||
req: &HttpRequest,
|
||||
network: &meilisearch_types::network::Network,
|
||||
) -> Result<Option<TaskNetwork>, MeilisearchHttpError> {
|
||||
let task_network =
|
||||
match (origin_from_req(req)?, import_data_from_req(req)?, import_metadata_from_req(req)?) {
|
||||
(Some(network_change), Some(import_from), Some(metadata)) => {
|
||||
TaskNetwork::Import { import_from, network_change, metadata }
|
||||
}
|
||||
(Some(origin), None, None) => TaskNetwork::Origin { origin },
|
||||
(None, None, None) => {
|
||||
match (network.leader.as_deref(), network.local.as_deref()) {
|
||||
// 1. Always allowed if there is no leader
|
||||
(None, _) => return Ok(None),
|
||||
// 2. Allowed if the leader is self
|
||||
(Some(leader), Some(this)) if leader == this => (),
|
||||
// 3. Any other change is disallowed
|
||||
(Some(leader), _) => {
|
||||
return Err(MeilisearchHttpError::NotLeader { leader: leader.to_string() })
|
||||
}
|
||||
}
|
||||
|
||||
TaskNetwork::Remotes {
|
||||
remote_tasks: Default::default(),
|
||||
network_version: network.version,
|
||||
}
|
||||
}
|
||||
// all good cases were matched, so this is always an error
|
||||
(origin, import_from, metadata) => {
|
||||
return Err(MeilisearchHttpError::InconsistentTaskNetworkHeaders {
|
||||
is_missing_origin: origin.is_none(),
|
||||
is_missing_import: import_from.is_none(),
|
||||
is_missing_import_metadata: metadata.is_none(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
if task_network.network_version() < network.version {
|
||||
return Err(MeilisearchHttpError::NetworkVersionTooOld {
|
||||
received: task_network.network_version(),
|
||||
expected_at_least: network.version,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Some(task_network))
|
||||
}
|
||||
|
||||
/// Updates the task description and, if necessary, proxies the passed request to the network and update the task description.
|
||||
///
|
||||
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
|
||||
/// has already been proxied.
|
||||
///
|
||||
/// - when it must proxy the request, the endpoint, method and query params are retrieved from the passed `req`, then the `body` is
|
||||
/// sent to all remotes of the `network` (except `self`). The response from the remotes are collected to update the passed `task`
|
||||
/// with the task ids from the task queues of the remotes.
|
||||
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
|
||||
/// This information is copied to the passed task.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The updated task. The task is read back from the database to avoid erasing concurrent changes.
|
||||
pub async fn proxy<T, F>(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: Option<&str>,
|
||||
req: &HttpRequest,
|
||||
mut task_network: DbTaskNetwork,
|
||||
network: meilisearch_types::network::Network,
|
||||
body: Body<T, F>,
|
||||
task: &Task,
|
||||
) -> Result<Task, MeilisearchHttpError>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
if let DbTaskNetwork::Remotes { remote_tasks, network_version } = &mut task_network {
|
||||
let network_version = *network_version;
|
||||
let this = network
|
||||
.local
|
||||
.as_deref()
|
||||
.expect("inconsistent `network.leader` and `network.self`")
|
||||
.to_owned();
|
||||
|
||||
let content_type = match &body {
|
||||
// for file bodies, force x-ndjson
|
||||
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
|
||||
// otherwise get content type from request
|
||||
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
|
||||
};
|
||||
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_secs(*timeouts::CONNECT_SECONDS))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let method = from_old_http_method(req.method());
|
||||
|
||||
// send payload to all remotes
|
||||
for (body, (node_name, node)) in body
|
||||
.into_bytes_iter(network.remotes.into_iter().filter(|(name, _)| name.as_str() != this))
|
||||
.map_err(|err| {
|
||||
MeilisearchHttpError::from_milli(err, index_uid.map(ToOwned::to_owned))
|
||||
})?
|
||||
{
|
||||
tracing::trace!(node_name, "proxying task to remote");
|
||||
|
||||
let client = client.clone();
|
||||
let api_key = node.write_api_key;
|
||||
let this = this.clone();
|
||||
let task_uid = task.uid;
|
||||
let method = method.clone();
|
||||
let path_and_query = req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
|
||||
|
||||
in_flight_remote_queries.insert(
|
||||
node_name,
|
||||
tokio::spawn({
|
||||
let url = format!("{}{}", node.url, path_and_query);
|
||||
|
||||
let content_type = content_type.map(|b| b.to_owned());
|
||||
|
||||
let backoff = backoff::ExponentialBackoffBuilder::new()
|
||||
.with_max_elapsed_time(Some(std::time::Duration::from_secs(
|
||||
*timeouts::BACKOFF_SECONDS,
|
||||
)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(backoff, move || {
|
||||
let url = url.clone();
|
||||
let client = client.clone();
|
||||
let this = this.clone();
|
||||
let content_type = content_type.clone();
|
||||
|
||||
let body = body.clone();
|
||||
let api_key = api_key.clone();
|
||||
let method = method.clone();
|
||||
|
||||
async move {
|
||||
try_proxy(
|
||||
method,
|
||||
&url,
|
||||
content_type.as_deref(),
|
||||
network_version,
|
||||
api_key.as_deref(),
|
||||
&client,
|
||||
&this,
|
||||
task_uid,
|
||||
body,
|
||||
)
|
||||
.await
|
||||
}
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// wait for all in-flight queries to finish and collect their results
|
||||
for (node_name, handle) in in_flight_remote_queries {
|
||||
match handle.await {
|
||||
Ok(Ok(res)) => {
|
||||
let task_uid = res.task_uid;
|
||||
|
||||
remote_tasks.insert(node_name, Ok(task_uid).into());
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
|
||||
}
|
||||
Err(panic) => match panic.try_into_panic() {
|
||||
Ok(panic) => {
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
remote_tasks.insert(
|
||||
node_name,
|
||||
Err(ResponseError::from_msg(
|
||||
msg.to_string(),
|
||||
meilisearch_types::error::Code::Internal,
|
||||
))
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::error!("proxy task was unexpectedly cancelled")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(index_scheduler.set_task_network(task.uid, task_network)?)
|
||||
}
|
||||
|
||||
pub async fn send_request<T, F, U>(
|
||||
path_and_query: &str,
|
||||
method: reqwest::Method,
|
||||
content_type: Option<String>,
|
||||
body: Body<T, F>,
|
||||
remote_name: &str,
|
||||
remote: &Remote,
|
||||
) -> Result<U, ProxyError>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
U: DeserializeOwned,
|
||||
{
|
||||
let content_type = match &body {
|
||||
// for file bodies, force x-ndjson
|
||||
Body::NdJsonPayload(_) => Some("application/x-ndjson".into()),
|
||||
// otherwise get content type from request
|
||||
_ => content_type,
|
||||
};
|
||||
|
||||
let body = body.into_bytes(remote_name, remote).map_err(Box::new)?;
|
||||
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_secs(*timeouts::CONNECT_SECONDS))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let url = format!("{}{}", remote.url, path_and_query);
|
||||
|
||||
// send payload to remote
|
||||
tracing::trace!(remote_name, "sending request to remote");
|
||||
let api_key = remote.write_api_key.clone();
|
||||
|
||||
let backoff = backoff::ExponentialBackoffBuilder::new()
|
||||
.with_max_elapsed_time(Some(std::time::Duration::from_secs(*timeouts::BACKOFF_SECONDS)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(backoff, move || {
|
||||
let url = url.clone();
|
||||
let client = client.clone();
|
||||
let content_type = content_type.clone();
|
||||
|
||||
let body = body.clone();
|
||||
let api_key = api_key.clone();
|
||||
let method = method.clone();
|
||||
|
||||
async move {
|
||||
let request = client
|
||||
.request(method, url)
|
||||
.timeout(std::time::Duration::from_secs(*timeouts::REQUEST_SECONDS));
|
||||
let request = if let Some(body) = body { request.body(body) } else { request };
|
||||
let request =
|
||||
if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
|
||||
let request = if let Some(content_type) = content_type {
|
||||
request.header(CONTENT_TYPE.as_str(), content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => {
|
||||
return Err(backoff::Error::transient(ProxyError::Timeout))
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(backoff::Error::transient(ProxyError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
handle_response(response).await
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_response<U>(response: reqwest::Response) -> Result<U, backoff::Error<ProxyError>>
|
||||
where
|
||||
U: DeserializeOwned,
|
||||
{
|
||||
match response.status() {
|
||||
status_code if status_code.is_success() => (),
|
||||
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
|
||||
return Err(backoff::Error::Permanent(ProxyError::AuthenticationError))
|
||||
}
|
||||
status_code if status_code.is_client_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::Permanent(ProxyError::BadRequest {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code if status_code.is_server_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::transient(ProxyError::RemoteError {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code => {
|
||||
tracing::warn!(
|
||||
status_code = status_code.as_u16(),
|
||||
"remote replied with unexpected status code"
|
||||
);
|
||||
}
|
||||
}
|
||||
let response: U = match parse_response(response).await {
|
||||
Ok(response) => response,
|
||||
Err(response) => {
|
||||
return Err(backoff::Error::permanent(ProxyError::CouldNotParseResponse { response }))
|
||||
}
|
||||
};
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
|
||||
match method {
|
||||
&actix_http::Method::CONNECT => reqwest::Method::CONNECT,
|
||||
&actix_http::Method::DELETE => reqwest::Method::DELETE,
|
||||
&actix_http::Method::GET => reqwest::Method::GET,
|
||||
&actix_http::Method::HEAD => reqwest::Method::HEAD,
|
||||
&actix_http::Method::OPTIONS => reqwest::Method::OPTIONS,
|
||||
&actix_http::Method::PATCH => reqwest::Method::PATCH,
|
||||
&actix_http::Method::POST => reqwest::Method::POST,
|
||||
&actix_http::Method::PUT => reqwest::Method::PUT,
|
||||
&actix_http::Method::TRACE => reqwest::Method::TRACE,
|
||||
method => reqwest::Method::from_bytes(method.as_str().as_bytes()).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn try_proxy(
|
||||
method: reqwest::Method,
|
||||
url: &str,
|
||||
content_type: Option<&[u8]>,
|
||||
network_version: Uuid,
|
||||
api_key: Option<&str>,
|
||||
client: &reqwest::Client,
|
||||
this: &str,
|
||||
task_uid: TaskId,
|
||||
body: Option<Bytes>,
|
||||
) -> Result<SummarizedTaskView, backoff::Error<ProxyError>> {
|
||||
let request = client
|
||||
.request(method, url)
|
||||
.timeout(std::time::Duration::from_secs(*timeouts::REQUEST_SECONDS));
|
||||
let request = if let Some(body) = body { request.body(body) } else { request };
|
||||
let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
|
||||
let RequestWrapper(request) = RequestWrapper(request)
|
||||
.set_origin_task_uid(task_uid)
|
||||
.set_origin_network_version(network_version)
|
||||
.set_origin_remote(this);
|
||||
|
||||
let request = if let Some(content_type) = content_type {
|
||||
request.header(CONTENT_TYPE.as_str(), content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => {
|
||||
return Err(backoff::Error::transient(ProxyError::Timeout))
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(backoff::Error::transient(ProxyError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
handle_response(response).await
|
||||
}
|
||||
|
||||
struct RequestWrapper(RequestBuilder);
|
||||
impl meilisearch_types::tasks::network::headers::SetHeader for RequestWrapper {
|
||||
fn set_header(self, name: &str, value: &str) -> Self {
|
||||
Self(self.0.header(name, value))
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_error(response: reqwest::Response) -> Result<String, ReqwestErrorWithoutUrl> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
|
||||
};
|
||||
|
||||
Ok(parse_bytes_as_error(&bytes))
|
||||
}
|
||||
|
||||
fn parse_bytes_as_error(bytes: &[u8]) -> String {
|
||||
match serde_json::from_slice::<Value>(bytes) {
|
||||
Ok(value) => value.to_string(),
|
||||
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_response<T: DeserializeOwned>(
|
||||
response: reqwest::Response,
|
||||
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
|
||||
};
|
||||
|
||||
match serde_json::from_slice::<T>(&bytes) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
|
||||
}
|
||||
}
|
||||
|
||||
struct ResponseWrapper<'a>(&'a HttpRequest);
|
||||
impl<'a> meilisearch_types::tasks::network::headers::GetHeader for ResponseWrapper<'a> {
|
||||
type Error = actix_http::header::ToStrError;
|
||||
|
||||
fn get_header(&self, name: &str) -> Result<Option<&str>, Self::Error> {
|
||||
self.0.headers().get(name).map(|value| value.to_str()).transpose()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
|
||||
let req = ResponseWrapper(req);
|
||||
let (remote_name, task_uid, network_version) = match (
|
||||
req.get_origin_remote()?,
|
||||
req.get_origin_task_uid()?,
|
||||
req.get_origin_network_version()?,
|
||||
) {
|
||||
(None, None, _) => return Ok(None),
|
||||
(None, Some(_), _) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
|
||||
}
|
||||
(Some(_), None, _) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
|
||||
is_remote_missing: false,
|
||||
})
|
||||
}
|
||||
(Some(remote_name), Some(task_uid), network_version) => {
|
||||
(remote_name, task_uid, network_version)
|
||||
}
|
||||
};
|
||||
|
||||
let network_version = network_version.unwrap_or_else(Uuid::nil);
|
||||
|
||||
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid, network_version }))
|
||||
}
|
||||
|
||||
pub fn import_data_from_req(req: &HttpRequest) -> Result<Option<ImportData>, MeilisearchHttpError> {
|
||||
let req = ResponseWrapper(req);
|
||||
let (remote_name, index_name, document_count) =
|
||||
match (req.get_import_remote()?, req.get_import_index()?, req.get_import_docs()?) {
|
||||
(None, None, None) => return Ok(None),
|
||||
(Some(remote_name), index_name, Some(documents)) => {
|
||||
(remote_name, index_name, documents)
|
||||
}
|
||||
// catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some)
|
||||
(remote_name, index_name, documents) => {
|
||||
return Err(MeilisearchHttpError::InconsistentImportHeaders {
|
||||
is_remote_missing: remote_name.is_none(),
|
||||
is_index_missing: index_name.is_none(),
|
||||
is_docs_missing: documents.is_none(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(ImportData {
|
||||
remote_name: remote_name.to_string(),
|
||||
index_name: index_name.map(|index_name| index_name.to_string()),
|
||||
document_count,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn import_metadata_from_req(
|
||||
req: &HttpRequest,
|
||||
) -> Result<Option<ImportMetadata>, MeilisearchHttpError> {
|
||||
let req = ResponseWrapper(req);
|
||||
let (index_count, task_key, total_index_documents) = match (
|
||||
req.get_import_index_count()?,
|
||||
req.get_import_task_key()?,
|
||||
req.get_import_index_docs()?,
|
||||
) {
|
||||
(None, None, None) => return Ok(None),
|
||||
(Some(index_count), task_key, Some(total_index_documents)) => {
|
||||
(index_count, task_key, total_index_documents)
|
||||
}
|
||||
// catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some)
|
||||
(index_count, task_key, total_index_documents) => {
|
||||
return Err(MeilisearchHttpError::InconsistentImportMetadataHeaders {
|
||||
is_index_count_missing: index_count.is_none(),
|
||||
is_task_key_missing: task_key.is_none(),
|
||||
is_total_index_documents_missing: total_index_documents.is_none(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(ImportMetadata { index_count, task_key, total_index_documents }))
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||
use reqwest::StatusCode;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ProxyError {
|
||||
#[error("{0}")]
|
||||
CouldNotSendRequest(ReqwestErrorWithoutUrl),
|
||||
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `documents.add` action")]
|
||||
AuthenticationError,
|
||||
#[error(
|
||||
"could not parse response from the remote host as a document addition response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
|
||||
response_from_remote(response)
|
||||
)]
|
||||
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
|
||||
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host did not answer before the deadline")]
|
||||
Timeout,
|
||||
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
|
||||
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("error while preparing the request: {error}")]
|
||||
Milli {
|
||||
#[from]
|
||||
error: Box<meilisearch_types::milli::Error>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ProxyError {
|
||||
pub fn as_response_error(&self) -> ResponseError {
|
||||
use meilisearch_types::error::Code;
|
||||
let message = self.to_string();
|
||||
let code = match self {
|
||||
ProxyError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
|
||||
ProxyError::AuthenticationError => Code::RemoteInvalidApiKey,
|
||||
ProxyError::BadRequest { .. } => Code::RemoteBadRequest,
|
||||
ProxyError::Timeout => Code::RemoteTimeout,
|
||||
ProxyError::RemoteError { .. } => Code::RemoteRemoteError,
|
||||
ProxyError::CouldNotParseResponse { .. } => Code::RemoteBadResponse,
|
||||
ProxyError::Milli { error } => error.error_code(),
|
||||
};
|
||||
ResponseError::from_msg(message, code)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error(transparent)]
|
||||
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
|
||||
impl ReqwestErrorWithoutUrl {
|
||||
pub fn new(inner: reqwest::Error) -> Self {
|
||||
Self(inner.without_url())
|
||||
}
|
||||
}
|
||||
|
||||
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
|
||||
match response {
|
||||
Ok(response) => {
|
||||
format!(":\n - response from remote: {}", response)
|
||||
}
|
||||
Err(error) => {
|
||||
format!(":\n - additionally, could not retrieve response from remote: {error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub mod community_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub mod enterprise_edition;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub use community_edition::{proxy, task_network_and_check_leader_and_version};
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub use enterprise_edition::{
|
||||
import_data_from_req, import_metadata_from_req, origin_from_req, proxy, send_request,
|
||||
task_network_and_check_leader_and_version,
|
||||
};
|
||||
|
||||
mod body;
|
||||
mod error;
|
||||
|
||||
pub use body::Body;
|
||||
pub use error::{ProxyError, ReqwestErrorWithoutUrl};
|
||||
39
crates/meilisearch/src/routes/indexes/community_edition.rs
Normal file
39
crates/meilisearch/src/routes/indexes/community_edition.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
pub mod proxy {
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use index_scheduler::IndexScheduler;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
pub enum Body<T: serde::Serialize> {
|
||||
NdJsonPayload,
|
||||
Inline(T),
|
||||
None,
|
||||
}
|
||||
|
||||
impl Body<()> {
|
||||
pub fn with_ndjson_payload(_file: File) -> Self {
|
||||
Self::NdJsonPayload
|
||||
}
|
||||
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
|
||||
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
|
||||
|
||||
pub async fn proxy<T: serde::Serialize>(
|
||||
_index_scheduler: &IndexScheduler,
|
||||
_index_uid: &str,
|
||||
_req: &HttpRequest,
|
||||
_network: meilisearch_types::network::Network,
|
||||
_body: Body<T>,
|
||||
_task: &meilisearch_types::tasks::Task,
|
||||
) -> Result<(), MeilisearchHttpError> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -45,7 +45,7 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::proxy::{proxy, task_network_and_check_leader_and_version, Body};
|
||||
use crate::routes::indexes::current_edition::proxy::{proxy, Body};
|
||||
use crate::routes::indexes::search::fix_sort_query_parameters;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
@@ -342,7 +342,6 @@ pub async fn delete_document(
|
||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -360,23 +359,16 @@ pub async fn delete_document(
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let mut task = {
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
uid,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network,
|
||||
)
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(&index_scheduler, Some(&index_uid), &req, task_network, network, Body::none(), &task)
|
||||
.await?;
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
@@ -975,7 +967,6 @@ async fn document_addition(
|
||||
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
|
||||
let mime_type = extract_mime_type(req)?;
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(req, &network)?;
|
||||
|
||||
let format = match (
|
||||
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
|
||||
@@ -1094,16 +1085,9 @@ async fn document_addition(
|
||||
index_uid: index_uid.to_string(),
|
||||
};
|
||||
|
||||
// FIXME: not new to #6000, but _any_ error here will cause the payload to unduly persist
|
||||
let scheduler = index_scheduler.clone();
|
||||
let mut task = match tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
task_id,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network,
|
||||
)
|
||||
let task = match tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, task_id, custom_metadata, dry_run)
|
||||
})
|
||||
.await?
|
||||
{
|
||||
@@ -1114,13 +1098,12 @@ async fn document_addition(
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
if network.sharding() {
|
||||
if let Some(file) = file {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
Some(&index_uid),
|
||||
&index_uid,
|
||||
req,
|
||||
task_network,
|
||||
network,
|
||||
Body::with_ndjson_payload(file),
|
||||
&task,
|
||||
@@ -1211,7 +1194,6 @@ pub async fn delete_documents_batch(
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1232,31 +1214,16 @@ pub async fn delete_documents_batch(
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let mut task = {
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
uid,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network,
|
||||
)
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
Some(&index_uid),
|
||||
&req,
|
||||
task_network,
|
||||
network,
|
||||
Body::inline(body),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
@@ -1319,7 +1286,6 @@ pub async fn delete_documents_by_filter(
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner();
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1346,31 +1312,16 @@ pub async fn delete_documents_by_filter(
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let mut task = {
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
uid,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network,
|
||||
)
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
Some(&index_uid),
|
||||
&req,
|
||||
task_network,
|
||||
network,
|
||||
Body::inline(filter),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
@@ -1470,7 +1421,6 @@ pub async fn edit_documents_by_function(
|
||||
.check_edit_documents_by_function("Using the documents edit route")?;
|
||||
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
@@ -1517,31 +1467,16 @@ pub async fn edit_documents_by_function(
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let mut task = {
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
uid,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network,
|
||||
)
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
Some(&index_uid),
|
||||
&req,
|
||||
task_network,
|
||||
network,
|
||||
Body::inline(body),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
@@ -1590,7 +1525,6 @@ pub async fn clear_all_documents(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1606,24 +1540,17 @@ pub async fn clear_all_documents(
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
|
||||
let mut task = {
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
uid,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network,
|
||||
)
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(&index_scheduler, Some(&index_uid), &req, task_network, network, Body::none(), &task)
|
||||
.await?;
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
@@ -0,0 +1,426 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::HttpRequest;
|
||||
use bytes::Bytes;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::{Origin, RemoteTask, TaskNetwork};
|
||||
use reqwest::StatusCode;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::routes::indexes::enterprise_edition::proxy::error::{
|
||||
ProxyDocumentChangeError, ReqwestErrorWithoutUrl,
|
||||
};
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
pub enum Body<T: serde::Serialize> {
|
||||
NdJsonPayload(File),
|
||||
Inline(T),
|
||||
None,
|
||||
}
|
||||
|
||||
impl Body<()> {
|
||||
pub fn with_ndjson_payload(file: File) -> Self {
|
||||
Self::NdJsonPayload(file)
|
||||
}
|
||||
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
/// If necessary, proxies the passed request to the network and update the task description.
|
||||
///
|
||||
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
|
||||
/// has already been proxied.
|
||||
///
|
||||
/// - when it must proxy the request, the endpoint, method and query params are retrieved from the passed `req`, then the `body` is
|
||||
/// sent to all remotes of the `network` (except `self`). The response from the remotes are collected to update the passed `task`
|
||||
/// with the task ids from the task queues of the remotes.
|
||||
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
|
||||
/// This information is copied to the passed task.
|
||||
pub async fn proxy<T: serde::Serialize>(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: &str,
|
||||
req: &HttpRequest,
|
||||
network: meilisearch_types::network::Network,
|
||||
body: Body<T>,
|
||||
task: &meilisearch_types::tasks::Task,
|
||||
) -> Result<(), MeilisearchHttpError> {
|
||||
match origin_from_req(req)? {
|
||||
Some(origin) => {
|
||||
index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })?
|
||||
}
|
||||
None => {
|
||||
let this = network
|
||||
.local
|
||||
.as_deref()
|
||||
.expect("inconsistent `network.sharding` and `network.self`")
|
||||
.to_owned();
|
||||
|
||||
let content_type = match &body {
|
||||
// for file bodies, force x-ndjson
|
||||
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
|
||||
// otherwise get content type from request
|
||||
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
|
||||
};
|
||||
|
||||
let body = match body {
|
||||
Body::NdJsonPayload(file) => Some(Bytes::from_owner(unsafe {
|
||||
memmap2::Mmap::map(&file).map_err(|err| {
|
||||
MeilisearchHttpError::from_milli(err.into(), Some(index_uid.to_owned()))
|
||||
})?
|
||||
})),
|
||||
|
||||
Body::Inline(payload) => {
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
|
||||
}
|
||||
|
||||
Body::None => None,
|
||||
};
|
||||
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_secs(3))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let method = from_old_http_method(req.method());
|
||||
|
||||
// send payload to all remotes
|
||||
for (node_name, node) in
|
||||
network.remotes.into_iter().filter(|(name, _)| name.as_str() != this)
|
||||
{
|
||||
let body = body.clone();
|
||||
let client = client.clone();
|
||||
let api_key = node.write_api_key;
|
||||
let this = this.clone();
|
||||
let method = method.clone();
|
||||
let path_and_query =
|
||||
req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
|
||||
|
||||
in_flight_remote_queries.insert(
|
||||
node_name,
|
||||
tokio::spawn({
|
||||
let url = format!("{}{}", node.url, path_and_query);
|
||||
|
||||
let url_encoded_this = urlencoding::encode(&this).into_owned();
|
||||
let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize
|
||||
|
||||
let content_type = content_type.map(|b| b.to_owned());
|
||||
|
||||
let backoff = backoff::ExponentialBackoffBuilder::new()
|
||||
.with_max_elapsed_time(Some(std::time::Duration::from_secs(25)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(backoff, move || {
|
||||
let url = url.clone();
|
||||
let client = client.clone();
|
||||
let url_encoded_this = url_encoded_this.clone();
|
||||
let url_encoded_task_uid = url_encoded_task_uid.clone();
|
||||
let content_type = content_type.clone();
|
||||
|
||||
let body = body.clone();
|
||||
let api_key = api_key.clone();
|
||||
let method = method.clone();
|
||||
|
||||
async move {
|
||||
try_proxy(
|
||||
method,
|
||||
&url,
|
||||
content_type.as_deref(),
|
||||
api_key.as_deref(),
|
||||
&client,
|
||||
&url_encoded_this,
|
||||
&url_encoded_task_uid,
|
||||
body,
|
||||
)
|
||||
.await
|
||||
}
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// wait for all in-flight queries to finish and collect their results
|
||||
let mut remote_tasks: BTreeMap<String, RemoteTask> = BTreeMap::new();
|
||||
for (node_name, handle) in in_flight_remote_queries {
|
||||
match handle.await {
|
||||
Ok(Ok(res)) => {
|
||||
let task_uid = res.task_uid;
|
||||
|
||||
remote_tasks.insert(node_name, Ok(task_uid).into());
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
|
||||
}
|
||||
Err(panic) => match panic.try_into_panic() {
|
||||
Ok(panic) => {
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
remote_tasks.insert(
|
||||
node_name,
|
||||
Err(ResponseError::from_msg(
|
||||
msg.to_string(),
|
||||
meilisearch_types::error::Code::Internal,
|
||||
))
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::error!("proxy task was unexpectedly cancelled")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// edit details to contain the return values from the remotes
|
||||
index_scheduler.set_task_network(task.uid, TaskNetwork::Remotes { remote_tasks })?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
|
||||
match method {
|
||||
&actix_http::Method::CONNECT => reqwest::Method::CONNECT,
|
||||
&actix_http::Method::DELETE => reqwest::Method::DELETE,
|
||||
&actix_http::Method::GET => reqwest::Method::GET,
|
||||
&actix_http::Method::HEAD => reqwest::Method::HEAD,
|
||||
&actix_http::Method::OPTIONS => reqwest::Method::OPTIONS,
|
||||
&actix_http::Method::PATCH => reqwest::Method::PATCH,
|
||||
&actix_http::Method::POST => reqwest::Method::POST,
|
||||
&actix_http::Method::PUT => reqwest::Method::PUT,
|
||||
&actix_http::Method::TRACE => reqwest::Method::TRACE,
|
||||
method => reqwest::Method::from_bytes(method.as_str().as_bytes()).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn try_proxy(
|
||||
method: reqwest::Method,
|
||||
url: &str,
|
||||
content_type: Option<&[u8]>,
|
||||
api_key: Option<&str>,
|
||||
client: &reqwest::Client,
|
||||
url_encoded_this: &str,
|
||||
url_encoded_task_uid: &str,
|
||||
body: Option<Bytes>,
|
||||
) -> Result<SummarizedTaskView, backoff::Error<ProxyDocumentChangeError>> {
|
||||
let request = client.request(method, url).timeout(std::time::Duration::from_secs(30));
|
||||
let request = if let Some(body) = body { request.body(body) } else { request };
|
||||
let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
|
||||
let request = request.header(PROXY_ORIGIN_TASK_UID_HEADER, url_encoded_task_uid);
|
||||
let request = request.header(PROXY_ORIGIN_REMOTE_HEADER, url_encoded_this);
|
||||
let request = if let Some(content_type) = content_type {
|
||||
request.header(CONTENT_TYPE.as_str(), content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => {
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::Timeout))
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
match response.status() {
|
||||
status_code if status_code.is_success() => (),
|
||||
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
|
||||
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::AuthenticationError))
|
||||
}
|
||||
status_code if status_code.is_client_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::BadRequest {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code if status_code.is_server_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::RemoteError {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code => {
|
||||
tracing::warn!(
|
||||
status_code = status_code.as_u16(),
|
||||
"remote replied with unexpected status code"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let response = match parse_response(response).await {
|
||||
Ok(response) => response,
|
||||
Err(response) => {
|
||||
return Err(backoff::Error::transient(
|
||||
ProxyDocumentChangeError::CouldNotParseResponse { response },
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn parse_error(response: reqwest::Response) -> Result<String, ReqwestErrorWithoutUrl> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
|
||||
};
|
||||
|
||||
Ok(parse_bytes_as_error(&bytes))
|
||||
}
|
||||
|
||||
fn parse_bytes_as_error(bytes: &[u8]) -> String {
|
||||
match serde_json::from_slice::<Value>(bytes) {
|
||||
Ok(value) => value.to_string(),
|
||||
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_response<T: DeserializeOwned>(
|
||||
response: reqwest::Response,
|
||||
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
|
||||
};
|
||||
|
||||
match serde_json::from_slice::<T>(&bytes) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
|
||||
}
|
||||
}
|
||||
|
||||
mod error {
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use reqwest::StatusCode;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ProxyDocumentChangeError {
|
||||
#[error("{0}")]
|
||||
CouldNotSendRequest(ReqwestErrorWithoutUrl),
|
||||
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `documents.add` action")]
|
||||
AuthenticationError,
|
||||
#[error(
|
||||
"could not parse response from the remote host as a document addition response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
|
||||
response_from_remote(response)
|
||||
)]
|
||||
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
|
||||
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host did not answer before the deadline")]
|
||||
Timeout,
|
||||
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
|
||||
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
}
|
||||
|
||||
impl ProxyDocumentChangeError {
|
||||
pub fn as_response_error(&self) -> ResponseError {
|
||||
use meilisearch_types::error::Code;
|
||||
let message = self.to_string();
|
||||
let code = match self {
|
||||
ProxyDocumentChangeError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
|
||||
ProxyDocumentChangeError::AuthenticationError => Code::RemoteInvalidApiKey,
|
||||
ProxyDocumentChangeError::BadRequest { .. } => Code::RemoteBadRequest,
|
||||
ProxyDocumentChangeError::Timeout => Code::RemoteTimeout,
|
||||
ProxyDocumentChangeError::RemoteError { .. } => Code::RemoteRemoteError,
|
||||
ProxyDocumentChangeError::CouldNotParseResponse { .. } => Code::RemoteBadResponse,
|
||||
};
|
||||
ResponseError::from_msg(message, code)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error(transparent)]
|
||||
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
|
||||
impl ReqwestErrorWithoutUrl {
|
||||
pub fn new(inner: reqwest::Error) -> Self {
|
||||
Self(inner.without_url())
|
||||
}
|
||||
}
|
||||
|
||||
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
|
||||
match response {
|
||||
Ok(response) => {
|
||||
format!(":\n - response from remote: {}", response)
|
||||
}
|
||||
Err(error) => {
|
||||
format!(":\n - additionally, could not retrieve response from remote: {error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
|
||||
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
|
||||
|
||||
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
|
||||
let (remote_name, task_uid) = match (
|
||||
req.headers().get(PROXY_ORIGIN_REMOTE_HEADER),
|
||||
req.headers().get(PROXY_ORIGIN_TASK_UID_HEADER),
|
||||
) {
|
||||
(None, None) => return Ok(None),
|
||||
(None, Some(_)) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
|
||||
is_remote_missing: false,
|
||||
})
|
||||
}
|
||||
(Some(remote_name), Some(task_uid)) => (
|
||||
urlencoding::decode(remote_name.to_str().map_err(|err| {
|
||||
MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
msg: format!("while parsing remote name as UTF-8: {err}"),
|
||||
}
|
||||
})?)
|
||||
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
msg: format!("while URL-decoding remote name: {err}"),
|
||||
})?,
|
||||
urlencoding::decode(task_uid.to_str().map_err(|err| {
|
||||
MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while parsing task UID as UTF-8: {err}"),
|
||||
}
|
||||
})?)
|
||||
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while URL-decoding task UID: {err}"),
|
||||
})?,
|
||||
),
|
||||
};
|
||||
|
||||
let task_uid: usize =
|
||||
task_uid.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while parsing the task UID as an integer: {err}"),
|
||||
})?;
|
||||
|
||||
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid }))
|
||||
}
|
||||
@@ -25,13 +25,21 @@ use crate::analytics::{Aggregate, Analytics};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::proxy::{proxy, task_network_and_check_leader_and_version, Body};
|
||||
use crate::routes::is_dry_run;
|
||||
use crate::Opt;
|
||||
|
||||
pub mod compact;
|
||||
pub mod documents;
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
mod community_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
mod enterprise_edition;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
use community_edition as current_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use enterprise_edition as current_edition;
|
||||
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
mod search_analytics;
|
||||
@@ -42,6 +50,8 @@ mod settings_analytics;
|
||||
pub mod similar;
|
||||
mod similar_analytics;
|
||||
|
||||
pub use current_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
nest(
|
||||
@@ -193,7 +203,7 @@ pub async fn list_indexes(
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
#[derive(Deserr, Serialize, Debug, ToSchema)]
|
||||
#[derive(Deserr, Debug, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct IndexCreateRequest {
|
||||
@@ -263,10 +273,6 @@ pub async fn create_index(
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Create index");
|
||||
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
|
||||
@@ -276,32 +282,13 @@ pub async fn create_index(
|
||||
&req,
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexCreation {
|
||||
index_uid: uid.to_string(),
|
||||
primary_key: primary_key.clone(),
|
||||
};
|
||||
let tuid = get_task_id(&req, &opt)?;
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let scheduler = index_scheduler.clone();
|
||||
let mut task = tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, tuid, None, dry_run, task_network)
|
||||
})
|
||||
.await??;
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
None,
|
||||
&req,
|
||||
task_network,
|
||||
network,
|
||||
Body::inline(IndexCreateRequest { primary_key, uid }),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let task = SummarizedTaskView::from(task);
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!(returns = ?task, "Create index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -395,7 +382,7 @@ impl Aggregate for IndexUpdatedAggregate {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserr, Serialize, Debug, ToSchema)]
|
||||
#[derive(Deserr, Debug, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_index)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct UpdateIndexRequest {
|
||||
@@ -447,10 +434,6 @@ pub async fn update_index(
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Update index");
|
||||
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let body = body.into_inner();
|
||||
|
||||
@@ -465,33 +448,17 @@ pub async fn update_index(
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexUpdate {
|
||||
index_uid: index_uid.clone().into_inner(),
|
||||
primary_key: body.primary_key.clone(),
|
||||
new_index_uid: body.uid.clone(),
|
||||
index_uid: index_uid.into_inner(),
|
||||
primary_key: body.primary_key,
|
||||
new_index_uid: body.uid,
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let scheduler = index_scheduler.clone();
|
||||
let mut task = tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, uid, None, dry_run, task_network)
|
||||
})
|
||||
.await??;
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
Some(&index_uid),
|
||||
&req,
|
||||
task_network,
|
||||
network,
|
||||
Body::inline(body),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let task = SummarizedTaskView::from(task);
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Update index");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -532,27 +499,14 @@ pub async fn delete_index(
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.clone().into_inner() };
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let scheduler = index_scheduler.clone();
|
||||
|
||||
let mut task = tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, uid, None, dry_run, task_network)
|
||||
})
|
||||
.await??;
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(&index_scheduler, Some(&index_uid), &req, task_network, network, Body::none(), &task)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let task = SummarizedTaskView::from(task);
|
||||
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!(returns = ?task, "Delete index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -17,7 +17,6 @@ use super::settings_analytics::*;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::proxy::{proxy, task_network_and_check_leader_and_version, Body};
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
@@ -77,13 +76,14 @@ macro_rules! make_setting_route {
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, Settings};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
use $crate::analytics::Analytics;
|
||||
use $crate::extractors::authentication::policies::*;
|
||||
use $crate::extractors::authentication::GuardedData;
|
||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||
use $crate::Opt;
|
||||
use $crate::routes::SummarizedTaskView;
|
||||
use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView};
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
@@ -130,7 +130,21 @@ macro_rules! make_setting_route {
|
||||
|
||||
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
|
||||
|
||||
let task = register_new_settings(new_settings, true, index_scheduler, &req, index_uid, opt).await?;
|
||||
let allow_index_creation =
|
||||
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid: index_uid.to_string(),
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -197,7 +211,26 @@ macro_rules! make_setting_route {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let task = register_new_settings(new_settings, false, index_scheduler, &req, index_uid, opt).await?;
|
||||
let new_settings = $crate::routes::indexes::settings::validate_settings(
|
||||
new_settings,
|
||||
&index_scheduler,
|
||||
)?;
|
||||
|
||||
let allow_index_creation =
|
||||
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid: index_uid.to_string(),
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Update settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -538,13 +571,14 @@ pub async fn update_all(
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: Data<Opt>,
|
||||
analytics: Data<Analytics>,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let new_settings: Settings<Unchecked> = body.into_inner();
|
||||
let new_settings = body.into_inner();
|
||||
debug!(parameters = ?new_settings, "Update all settings");
|
||||
let new_settings = validate_settings(new_settings, &index_scheduler)?;
|
||||
|
||||
analytics.publish(
|
||||
SettingsAnalytics {
|
||||
@@ -592,62 +626,23 @@ pub async fn update_all(
|
||||
&req,
|
||||
);
|
||||
|
||||
let task =
|
||||
register_new_settings(new_settings, false, index_scheduler, &req, index_uid, opt).await?;
|
||||
|
||||
debug!(returns = ?task, "Update all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
async fn register_new_settings(
|
||||
new_settings: Settings<Unchecked>,
|
||||
is_deletion: bool,
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
|
||||
req: &HttpRequest,
|
||||
index_uid: IndexUid,
|
||||
opt: Data<Opt>,
|
||||
) -> Result<SummarizedTaskView, ResponseError> {
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(req, &network)?;
|
||||
|
||||
// validate settings unless this is a duplicated task
|
||||
let new_settings = if task_network.is_none() {
|
||||
validate_settings(new_settings, &index_scheduler)?
|
||||
} else {
|
||||
new_settings
|
||||
};
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid: index_uid.clone(),
|
||||
new_settings: Box::new(new_settings.clone()),
|
||||
is_deletion,
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(req, &opt)?;
|
||||
let dry_run = is_dry_run(req, &opt)?;
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let mut task = tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, uid, None, dry_run, task_network)
|
||||
})
|
||||
.await??;
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
Some(&index_uid),
|
||||
req,
|
||||
task_network,
|
||||
network,
|
||||
Body::inline(new_settings),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(task.into())
|
||||
debug!(returns = ?task, "Update all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
@@ -736,8 +731,20 @@ pub async fn delete_all(
|
||||
|
||||
let new_settings = Settings::cleared().into_unchecked();
|
||||
|
||||
let task =
|
||||
register_new_settings(new_settings, true, index_scheduler, &req, index_uid, opt).await?;
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -185,7 +185,7 @@ pub async fn get_metrics(
|
||||
// Fetch the finished batches...
|
||||
&Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Failed]),
|
||||
limit: Some(1),
|
||||
limit: 1,
|
||||
..Query::default()
|
||||
},
|
||||
auth_filters,
|
||||
@@ -214,7 +214,7 @@ pub async fn get_metrics(
|
||||
let task_queue_latency_seconds = index_scheduler
|
||||
.get_tasks_from_authorized_indexes(
|
||||
&Query {
|
||||
limit: Some(1),
|
||||
limit: 1,
|
||||
reverse: Some(true),
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
..Query::default()
|
||||
|
||||
@@ -204,22 +204,22 @@ pub fn parse_include_metadata_header(req: &HttpRequest) -> bool {
|
||||
pub struct SummarizedTaskView {
|
||||
/// The task unique identifier.
|
||||
#[schema(value_type = u32)]
|
||||
pub task_uid: TaskId,
|
||||
task_uid: TaskId,
|
||||
/// The index affected by this task. May be `null` if the task is not linked to any index.
|
||||
pub index_uid: Option<String>,
|
||||
index_uid: Option<String>,
|
||||
/// The status of the task.
|
||||
pub status: Status,
|
||||
status: Status,
|
||||
/// The type of the task.
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
kind: Kind,
|
||||
/// The date on which the task was enqueued.
|
||||
#[serde(
|
||||
serialize_with = "time::serde::rfc3339::serialize",
|
||||
deserialize_with = "time::serde::rfc3339::deserialize"
|
||||
)]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
enqueued_at: OffsetDateTime,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
impl From<Task> for SummarizedTaskView {
|
||||
|
||||
@@ -8,7 +8,7 @@ use index_scheduler::IndexScheduler;
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidNetworkLeader, InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf,
|
||||
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
|
||||
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
@@ -20,21 +20,10 @@ use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
|
||||
use crate::analytics::{Aggregate, Analytics};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
mod community_edition;
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
mod enterprise_edition;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
use community_edition as current_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use enterprise_edition as current_edition;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(get_network, patch_network),
|
||||
@@ -94,7 +83,7 @@ async fn get_network(
|
||||
Ok(HttpResponse::Ok().json(network))
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserr, ToSchema, Serialize)]
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
@@ -117,19 +106,12 @@ pub struct Remote {
|
||||
pub write_api_key: Setting<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserr, ToSchema, Serialize)]
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!({
|
||||
"ms-00": {
|
||||
"url": "http://localhost:7700"
|
||||
},
|
||||
"ms-01": {
|
||||
"url": "http://localhost:7701"
|
||||
}
|
||||
}))]
|
||||
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
|
||||
#[serde(default)]
|
||||
pub remotes: Setting<BTreeMap<String, Option<Remote>>>,
|
||||
@@ -137,21 +119,10 @@ pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
|
||||
pub local: Setting<String>,
|
||||
#[schema(value_type = Option<String>, example = json!("ms-00"))]
|
||||
#[schema(value_type = Option<bool>, example = json!(true))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkLeader>)]
|
||||
pub leader: Setting<String>,
|
||||
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!({
|
||||
"ms-00": {
|
||||
"url": "http://localhost:7700"
|
||||
},
|
||||
"ms-01": {
|
||||
"url": "http://localhost:7701"
|
||||
}
|
||||
}))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
|
||||
#[serde(default)]
|
||||
pub previous_remotes: Setting<BTreeMap<String, Option<Remote>>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
|
||||
pub sharding: Setting<bool>,
|
||||
}
|
||||
|
||||
impl Remote {
|
||||
@@ -235,34 +206,40 @@ async fn patch_network(
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_network("Using the /network route")?;
|
||||
current_edition::patch_network(index_scheduler, new_network, req, analytics).await
|
||||
}
|
||||
|
||||
fn merge_networks(
|
||||
old_network: DbNetwork,
|
||||
new_network: Network,
|
||||
) -> Result<DbNetwork, ResponseError> {
|
||||
let new_network = new_network.0;
|
||||
let old_network = index_scheduler.network();
|
||||
debug!(parameters = ?new_network, "Patch network");
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
if new_network.sharding.set().is_some() {
|
||||
use meilisearch_types::error::Code;
|
||||
|
||||
return Err(ResponseError::from_msg(
|
||||
"Meilisearch Enterprise Edition is required to set `network.sharding`".into(),
|
||||
Code::RequiresEnterpriseEdition,
|
||||
));
|
||||
}
|
||||
|
||||
let merged_self = match new_network.local {
|
||||
Setting::Set(new_self) => Some(new_self),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_network.local,
|
||||
};
|
||||
let merged_leader = match new_network.leader {
|
||||
Setting::Set(new_leader) => Some(new_leader),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_network.leader,
|
||||
|
||||
let merged_sharding = match new_network.sharding {
|
||||
Setting::Set(new_sharding) => new_sharding,
|
||||
Setting::Reset => false,
|
||||
Setting::NotSet => old_network.sharding,
|
||||
};
|
||||
match (merged_leader.as_deref(), merged_self.as_deref()) {
|
||||
// 1. Always allowed if there is no leader
|
||||
(None, _) => (),
|
||||
// 2. Allowed if the leader is self
|
||||
(Some(leader), Some(this)) if leader == this => (),
|
||||
// 3. Any other change is disallowed
|
||||
(Some(leader), _) => {
|
||||
return Err(MeilisearchHttpError::NotLeader { leader: leader.to_string() }.into())
|
||||
}
|
||||
|
||||
if merged_sharding && merged_self.is_none() {
|
||||
return Err(ResponseError::from_msg(
|
||||
"`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(),
|
||||
meilisearch_types::error::Code::InvalidNetworkSharding,
|
||||
));
|
||||
}
|
||||
let new_version = uuid::Uuid::now_v7();
|
||||
|
||||
let merged_remotes = match new_network.remotes {
|
||||
Setting::Set(new_remotes) => {
|
||||
let mut merged_remotes = BTreeMap::new();
|
||||
@@ -334,11 +311,19 @@ fn merge_networks(
|
||||
Setting::Reset => BTreeMap::new(),
|
||||
Setting::NotSet => old_network.remotes,
|
||||
};
|
||||
let merged_network = DbNetwork {
|
||||
local: merged_self,
|
||||
remotes: merged_remotes,
|
||||
leader: merged_leader,
|
||||
version: new_version,
|
||||
};
|
||||
Ok(merged_network)
|
||||
|
||||
analytics.publish(
|
||||
PatchNetworkAnalytics {
|
||||
network_size: merged_remotes.len(),
|
||||
network_has_self: merged_self.is_some(),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
let merged_network =
|
||||
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
|
||||
|
||||
index_scheduler.put_network(merged_network.clone())?;
|
||||
debug!(returns = ?merged_network, "Patch network");
|
||||
Ok(HttpResponse::Ok().json(merged_network))
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{merge_networks, Network, PatchNetworkAnalytics};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
|
||||
pub async fn patch_network(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
|
||||
new_network: AwebJson<Network, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let new_network = new_network.0;
|
||||
let old_network = index_scheduler.network();
|
||||
debug!(parameters = ?new_network, "Patch network");
|
||||
|
||||
if new_network.leader.as_ref().set().is_some() {
|
||||
use meilisearch_types::error::Code;
|
||||
|
||||
return Err(ResponseError::from_msg(
|
||||
"Meilisearch Enterprise Edition is required to set `network.leader`".into(),
|
||||
Code::RequiresEnterpriseEdition,
|
||||
));
|
||||
}
|
||||
|
||||
if !matches!(new_network.previous_remotes, Setting::NotSet) {
|
||||
return Err(MeilisearchHttpError::UnexpectedNetworkPreviousRemotes.into());
|
||||
}
|
||||
|
||||
let merged_network = merge_networks(old_network.clone(), new_network)?;
|
||||
|
||||
index_scheduler.put_network(merged_network.clone())?;
|
||||
|
||||
analytics.publish(
|
||||
PatchNetworkAnalytics {
|
||||
network_size: merged_network.remotes.len(),
|
||||
network_has_self: merged_network.local.is_some(),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
Ok(HttpResponse::Ok().json(merged_network))
|
||||
}
|
||||
@@ -1,389 +0,0 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use futures::TryStreamExt;
|
||||
use index_scheduler::{IndexScheduler, Query, RoFeatures};
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::network::{Network as DbNetwork, Remote as DbRemote};
|
||||
use meilisearch_types::tasks::network::{headers, NetworkTopologyChange, Origin, TaskNetwork};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{merge_networks, Network, PatchNetworkAnalytics, Remote};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::proxy::{self, proxy, Body, ProxyError};
|
||||
use crate::routes::tasks::AllTasks;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
pub async fn patch_network(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
|
||||
new_network: AwebJson<Network, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
match (
|
||||
proxy::origin_from_req(&req)?,
|
||||
proxy::import_data_from_req(&req)?,
|
||||
proxy::import_metadata_from_req(&req)?,
|
||||
) {
|
||||
(Some(origin), None, None) => {
|
||||
patch_network_with_origin(index_scheduler, new_network, req, origin, analytics).await
|
||||
}
|
||||
(None, None, None) => {
|
||||
patch_network_without_origin(index_scheduler, new_network, req, analytics).await
|
||||
}
|
||||
(Some(origin), Some(import_data), Some(metadata)) => {
|
||||
if metadata.index_count == 0 {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.network_no_index_for_remote(import_data.remote_name, origin)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
Ok(HttpResponse::Ok().finish())
|
||||
} else {
|
||||
Err(MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: headers::PROXY_IMPORT_INDEX_COUNT_HEADER,
|
||||
msg: format!("Expected 0 indexes, got `{}`", metadata.index_count),
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
(origin, import_data, metadata) => {
|
||||
Err(MeilisearchHttpError::InconsistentTaskNetworkHeaders {
|
||||
is_missing_origin: origin.is_none(),
|
||||
is_missing_import: import_data.is_none(),
|
||||
is_missing_import_metadata: metadata.is_none(),
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn patch_network_without_origin(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
|
||||
new_network: AwebJson<Network, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let new_network = new_network.0;
|
||||
let old_network = index_scheduler.network();
|
||||
debug!(parameters = ?new_network, "Patch network");
|
||||
|
||||
if !matches!(new_network.previous_remotes, Setting::NotSet) {
|
||||
return Err(MeilisearchHttpError::UnexpectedNetworkPreviousRemotes.into());
|
||||
}
|
||||
|
||||
let merged_network = merge_networks(old_network.clone(), new_network)?;
|
||||
|
||||
// When a network task must be created, perform some sanity checks against common errors:
|
||||
// - missing experimental feature on an host from the network
|
||||
// - a network task is already enqueued
|
||||
//
|
||||
// These checks are by no mean perfect (they are not atomic since the network is involved), but they should
|
||||
// help preventing a bad situation.
|
||||
if let Some(merged_leader) = &merged_network.leader {
|
||||
let query = Query {
|
||||
statuses: Some(vec![
|
||||
meilisearch_types::tasks::Status::Enqueued,
|
||||
meilisearch_types::tasks::Status::Processing,
|
||||
]),
|
||||
types: Some(vec![meilisearch_types::tasks::Kind::NetworkTopologyChange]),
|
||||
limit: Some(1),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let filters = AuthFilter::default();
|
||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(&query, &filters)?;
|
||||
|
||||
if let Some(first) = tasks.min() {
|
||||
return Err(MeilisearchHttpError::UnprocessedNetworkTask {
|
||||
remote: None,
|
||||
task_uid: first,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
let mut kept_leader = false;
|
||||
|
||||
futures::stream::iter(
|
||||
old_network
|
||||
.remotes
|
||||
.iter()
|
||||
.merge_join_by(merged_network.remotes.iter(), |(left, _), (right, _)| {
|
||||
left.cmp(right)
|
||||
})
|
||||
.map(|eob| -> Result<_, ResponseError> {
|
||||
match eob {
|
||||
EitherOrBoth::Both(_, (remote_name, remote))
|
||||
| EitherOrBoth::Right((remote_name, remote)) => {
|
||||
kept_leader |= remote_name == merged_leader;
|
||||
Ok((remote_name, remote, false))
|
||||
}
|
||||
EitherOrBoth::Left((remote_name, remote)) => {
|
||||
Ok((remote_name, remote, true))
|
||||
}
|
||||
}
|
||||
}),
|
||||
)
|
||||
.try_for_each_concurrent(Some(40), |(remote_name, remote, allow_unreachable)| {
|
||||
async move {
|
||||
{
|
||||
// 1. check that the experimental feature is enabled
|
||||
let remote_features: RuntimeTogglableFeatures = match proxy::send_request(
|
||||
"/experimental-features",
|
||||
reqwest::Method::GET,
|
||||
None,
|
||||
Body::none(),
|
||||
remote_name,
|
||||
remote,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(remote_features) => remote_features,
|
||||
Err(ProxyError::Timeout | ProxyError::CouldNotSendRequest(_))
|
||||
if allow_unreachable =>
|
||||
{
|
||||
return Ok(())
|
||||
}
|
||||
Err(err) => return Err(err.as_response_error()),
|
||||
};
|
||||
let remote_features = RoFeatures::from_runtime_features(remote_features);
|
||||
remote_features.check_network("receiving a proxied network task").map_err(
|
||||
|error| MeilisearchHttpError::RemoteIndexScheduler {
|
||||
remote: remote_name.to_owned(),
|
||||
error,
|
||||
},
|
||||
)?;
|
||||
|
||||
// 2. check whether there are any unfinished network task
|
||||
let network_tasks: AllTasks = match proxy::send_request(
|
||||
"/tasks?types=networkTopologyChange&statuses=enqueued,processing&limit=1",
|
||||
reqwest::Method::GET,
|
||||
None,
|
||||
Body::none(),
|
||||
remote_name,
|
||||
remote,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(network_tasks) => network_tasks,
|
||||
Err(ProxyError::Timeout | ProxyError::CouldNotSendRequest(_))
|
||||
if allow_unreachable =>
|
||||
{
|
||||
return Ok(())
|
||||
}
|
||||
Err(err) => return Err(err.as_response_error()),
|
||||
};
|
||||
|
||||
if let [first, ..] = network_tasks.results.as_slice() {
|
||||
return Err(ResponseError::from(
|
||||
MeilisearchHttpError::UnprocessedNetworkTask {
|
||||
remote: Some(remote_name.to_owned()),
|
||||
task_uid: first.uid,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
})
|
||||
.await?;
|
||||
|
||||
if !kept_leader {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("leader `{merged_leader}` is missing from remotes"),
|
||||
Code::InvalidNetworkRemotes,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
index_scheduler.put_network(merged_network.clone())?;
|
||||
|
||||
analytics.publish(
|
||||
PatchNetworkAnalytics {
|
||||
network_size: merged_network.remotes.len(),
|
||||
network_has_self: merged_network.local.is_some(),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
if merged_network.leader.is_some() {
|
||||
let network_topology_change =
|
||||
NetworkTopologyChange::new(old_network.clone(), merged_network.clone());
|
||||
let task = KindWithContent::NetworkTopologyChange(network_topology_change);
|
||||
let mut task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
Some(TaskNetwork::Remotes {
|
||||
remote_tasks: Default::default(),
|
||||
network_version: merged_network.version,
|
||||
}),
|
||||
)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
let mut proxied_network = Network {
|
||||
remotes: Setting::Set(to_settings_remotes(&merged_network.remotes)),
|
||||
local: Setting::NotSet,
|
||||
leader: Setting::some_or_not_set(merged_network.leader.clone()),
|
||||
previous_remotes: Setting::Set(to_settings_remotes(&old_network.remotes)),
|
||||
};
|
||||
let mut deleted_network = old_network;
|
||||
|
||||
// only keep the deleted remotes, to inform them that they're deleted.
|
||||
// deleted remotes are remotes that appear in the old version of the network, but not the new version.
|
||||
let deleted_remotes = &mut deleted_network.remotes;
|
||||
deleted_remotes.retain(|node, _| !merged_network.remotes.contains_key(node));
|
||||
|
||||
// proxy network change to the remaining remotes.
|
||||
let updated_task = proxy(
|
||||
&index_scheduler,
|
||||
None,
|
||||
&req,
|
||||
task.network.take().unwrap(), // set in register
|
||||
merged_network,
|
||||
Body::generated(proxied_network.clone(), |name, _remote, network| {
|
||||
network.local = Setting::Set(name.to_string());
|
||||
}),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
// unwrap: network was set by `proxy`
|
||||
let task_network = updated_task.network.unwrap();
|
||||
|
||||
proxied_network.previous_remotes = Setting::NotSet;
|
||||
|
||||
if deleted_network.leader.is_some() {
|
||||
// proxy network change to the deleted remotes
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
None,
|
||||
&req,
|
||||
task_network,
|
||||
deleted_network,
|
||||
Body::generated(proxied_network.clone(), |_name, _remote, network| {
|
||||
network.local = Setting::Reset;
|
||||
}),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
Ok(HttpResponse::Ok().json(merged_network))
|
||||
}
|
||||
}
|
||||
|
||||
async fn patch_network_with_origin(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
|
||||
merged_network: AwebJson<Network, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
origin: Origin,
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let merged_network = merged_network.into_inner();
|
||||
debug!(parameters = ?merged_network, ?origin, "Patch network");
|
||||
let mut remotes = BTreeMap::new();
|
||||
let mut old_network = index_scheduler.network();
|
||||
|
||||
for (name, remote) in merged_network.remotes.set().into_iter().flat_map(|x| x.into_iter()) {
|
||||
let Some(remote) = remote else { continue };
|
||||
let remote = remote.try_into_db_node(&name)?;
|
||||
remotes.insert(name, remote);
|
||||
}
|
||||
let mut previous_remotes = BTreeMap::new();
|
||||
for (name, remote) in
|
||||
merged_network.previous_remotes.set().into_iter().flat_map(|x| x.into_iter())
|
||||
{
|
||||
let Some(remote) = remote else {
|
||||
continue;
|
||||
};
|
||||
let remote = remote.try_into_db_node(&name)?;
|
||||
previous_remotes.insert(name, remote);
|
||||
}
|
||||
|
||||
old_network.remotes = previous_remotes;
|
||||
|
||||
let new_leader = merged_network.leader.set().ok_or_else(|| {
|
||||
ResponseError::from_msg("Duplicated task without leader".into(), Code::InvalidNetworkLeader)
|
||||
})?;
|
||||
|
||||
let new_network = DbNetwork {
|
||||
local: merged_network.local.set(),
|
||||
remotes,
|
||||
leader: Some(new_leader),
|
||||
version: origin.network_version,
|
||||
};
|
||||
index_scheduler.put_network(new_network.clone())?;
|
||||
|
||||
analytics.publish(
|
||||
PatchNetworkAnalytics {
|
||||
network_size: new_network.remotes.len(),
|
||||
network_has_self: new_network.local.is_some(),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
let network_topology_change = NetworkTopologyChange::new(old_network, new_network);
|
||||
let task = KindWithContent::NetworkTopologyChange(network_topology_change);
|
||||
let task = {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(
|
||||
task,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
Some(TaskNetwork::Origin { origin }),
|
||||
)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
fn to_settings_remotes(
|
||||
db_remotes: &BTreeMap<String, DbRemote>,
|
||||
) -> BTreeMap<String, Option<Remote>> {
|
||||
db_remotes
|
||||
.iter()
|
||||
.map(|(name, remote)| {
|
||||
(
|
||||
name.clone(),
|
||||
Some(Remote {
|
||||
url: Setting::Set(remote.url.clone()),
|
||||
search_api_key: Setting::some_or_not_set(remote.search_api_key.clone()),
|
||||
write_api_key: Setting::some_or_not_set(remote.write_api_key.clone()),
|
||||
}),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
@@ -17,7 +17,6 @@ use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::proxy::{proxy, task_network_and_check_leader_and_version, Body};
|
||||
use crate::Opt;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
@@ -28,7 +27,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
|
||||
}
|
||||
|
||||
#[derive(Deserr, Serialize, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
#[derive(Deserr, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SwapIndexesPayload {
|
||||
/// Array of the two indexUids to be swapped
|
||||
@@ -101,10 +100,6 @@ pub async fn swap_indexes(
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = params.into_inner();
|
||||
|
||||
let network = index_scheduler.network();
|
||||
let task_network = task_network_and_check_leader_and_version(&req, &network)?;
|
||||
|
||||
analytics.publish(
|
||||
IndexSwappedAnalytics {
|
||||
swap_operation_number: params.len(),
|
||||
@@ -115,36 +110,26 @@ pub async fn swap_indexes(
|
||||
let filters = index_scheduler.filters();
|
||||
|
||||
let mut swaps = vec![];
|
||||
for SwapIndexesPayload { indexes, rename } in ¶ms {
|
||||
for SwapIndexesPayload { indexes, rename } in params.into_iter() {
|
||||
// TODO: switch to deserr
|
||||
let (lhs, rhs) = match indexes.as_slice() {
|
||||
[lhs, rhs] => (lhs, rhs),
|
||||
_ => {
|
||||
return Err(
|
||||
MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes.clone()).into()
|
||||
);
|
||||
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
|
||||
}
|
||||
};
|
||||
if !filters.is_index_authorized(lhs) || !filters.is_index_authorized(rhs) {
|
||||
return Err(AuthenticationError::InvalidToken.into());
|
||||
}
|
||||
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()), rename: *rename });
|
||||
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()), rename });
|
||||
}
|
||||
|
||||
let task = KindWithContent::IndexSwap { swaps };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let scheduler = index_scheduler.clone();
|
||||
let mut task = tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, uid, None, dry_run, task_network)
|
||||
})
|
||||
.await??;
|
||||
|
||||
if let Some(task_network) = task.network.take() {
|
||||
proxy(&index_scheduler, None, &req, task_network, network, Body::inline(params), &task)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let task = SummarizedTaskView::from(task);
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::Serialize;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::macros::format_description;
|
||||
use time::{Date, Duration, OffsetDateTime, Time};
|
||||
@@ -126,7 +126,7 @@ pub struct TasksFilterQuery {
|
||||
impl TasksFilterQuery {
|
||||
pub(crate) fn into_query(self) -> Query {
|
||||
Query {
|
||||
limit: Some(self.limit.0),
|
||||
limit: self.limit.0 as usize,
|
||||
from: self.from.as_deref().copied(),
|
||||
reverse: self.reverse.as_deref().copied(),
|
||||
batch_uids: self.batch_uids.merge_star_and_none(),
|
||||
@@ -225,7 +225,8 @@ pub struct TaskDeletionOrCancelationQuery {
|
||||
impl TaskDeletionOrCancelationQuery {
|
||||
fn into_query(self) -> Query {
|
||||
Query {
|
||||
limit: None,
|
||||
// We want to delete all tasks that match the given filters
|
||||
limit: usize::MAX,
|
||||
from: None,
|
||||
reverse: None,
|
||||
batch_uids: self.batch_uids.merge_star_and_none(),
|
||||
@@ -488,18 +489,18 @@ async fn delete_tasks(
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
pub struct AllTasks {
|
||||
/// The list of tasks that matched the filter.
|
||||
pub results: Vec<TaskView>,
|
||||
results: Vec<TaskView>,
|
||||
/// Total number of browsable results using offset/limit parameters for the given resource.
|
||||
pub total: u64,
|
||||
total: u64,
|
||||
/// Limit given for the query. If limit is not provided as a query parameter, this parameter displays the default limit value.
|
||||
pub limit: u32,
|
||||
limit: u32,
|
||||
/// The first task uid returned.
|
||||
pub from: Option<u32>,
|
||||
from: Option<u32>,
|
||||
/// Represents the value to send in from to fetch the next slice of the results. The first item for the next slice starts at this exact number. When the returned value is null, it means that all the data have been browsed in the given order.
|
||||
pub next: Option<u32>,
|
||||
next: Option<u32>,
|
||||
}
|
||||
|
||||
/// Get all tasks
|
||||
|
||||
@@ -228,7 +228,7 @@ mod tests {
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`, `networkTopologyChange`.",
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
|
||||
@@ -42,7 +42,7 @@ async fn batch_bad_types() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`, `networkTopologyChange`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
|
||||
@@ -143,8 +143,6 @@ impl Display for Value {
|
||||
".processingTimeMs" => "[duration]",
|
||||
".details.embedders.*.url" => "[url]",
|
||||
".details.dumpUid" => "[dump_uid]",
|
||||
".network.network_version" => "[version]",
|
||||
".network.origin.networkVersion" => "[version]",
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
@@ -43,9 +43,9 @@ impl Server<Owned> {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
if cfg!(windows) {
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
} else {
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
@@ -58,9 +58,9 @@ impl Server<Owned> {
|
||||
|
||||
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
|
||||
if cfg!(windows) {
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
} else {
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
}
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
@@ -215,9 +215,9 @@ impl Server<Shared> {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
if cfg!(windows) {
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
} else {
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
@@ -508,8 +508,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
// It has no effect to set the delta encoding here as the toggle is done in try_main
|
||||
experimental_disable_delta_encoding: false,
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
|
||||
@@ -93,20 +93,6 @@ impl Service {
|
||||
self.request(req).await
|
||||
}
|
||||
|
||||
pub async fn patch_str(
|
||||
&self,
|
||||
url: impl AsRef<str>,
|
||||
body: impl AsRef<str>,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let mut req =
|
||||
test::TestRequest::patch().uri(url.as_ref()).set_payload(body.as_ref().to_string());
|
||||
for header in headers {
|
||||
req = req.insert_header(header);
|
||||
}
|
||||
self.request(req).await
|
||||
}
|
||||
|
||||
pub async fn patch(&self, url: impl AsRef<str>, body: Value) -> (Value, StatusCode) {
|
||||
self.patch_encoded(url, body, Encoder::Plain).await
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ async fn errors_on_param() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `selfie`: expected one of `remotes`, `self`, `leader`, `previousRemotes`",
|
||||
"message": "Unknown field `selfie`: expected one of `remotes`, `self`, `sharding`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
@@ -186,7 +186,7 @@ async fn errors_on_param() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {
|
||||
@@ -196,8 +196,7 @@ async fn errors_on_param() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = server
|
||||
@@ -266,24 +265,22 @@ async fn auth() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -292,12 +289,11 @@ async fn auth() {
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -311,12 +307,11 @@ async fn auth() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "api_key",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -395,20 +390,18 @@ async fn get_and_set_network() {
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "00000000-0000-0000-0000-000000000000"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// adding self
|
||||
let (response, code) = server.set_network(json!({"self": "myself"})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -426,7 +419,7 @@ async fn get_and_set_network() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
@@ -441,8 +434,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -456,7 +448,7 @@ async fn get_and_set_network() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
@@ -471,8 +463,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -487,7 +478,7 @@ async fn get_and_set_network() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
@@ -507,8 +498,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -520,7 +510,7 @@ async fn get_and_set_network() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
@@ -535,8 +525,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -544,7 +533,7 @@ async fn get_and_set_network() {
|
||||
let (response, code) = server.set_network(json!({"self": Null})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {
|
||||
@@ -559,8 +548,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -568,7 +556,7 @@ async fn get_and_set_network() {
|
||||
let (response, code) = server.set_network(json!({"self": "thy"})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
@@ -583,8 +571,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -592,7 +579,7 @@ async fn get_and_set_network() {
|
||||
let (response, code) = server.set_network(json!({})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
@@ -607,8 +594,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -616,7 +602,7 @@ async fn get_and_set_network() {
|
||||
let (response, code) = server.set_network(json!({"remotes": {}})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
@@ -631,8 +617,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -640,7 +625,7 @@ async fn get_and_set_network() {
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
@@ -655,8 +640,7 @@ async fn get_and_set_network() {
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -668,12 +652,11 @@ async fn get_and_set_network() {
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, {".version" => "[version]"}), @r###"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -128,32 +128,29 @@ async fn remote_sharding() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -192,6 +189,8 @@ async fn remote_sharding() {
|
||||
}
|
||||
}});
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&network).unwrap());
|
||||
|
||||
let (_response, status_code) = ms0.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
let (_response, status_code) = ms1.set_network(network.clone()).await;
|
||||
@@ -447,32 +446,29 @@ async fn remote_sharding_retrieve_vectors() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -948,22 +944,20 @@ async fn error_unregistered_remote() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1070,22 +1064,20 @@ async fn error_no_weighted_score() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1208,22 +1200,20 @@ async fn error_bad_response() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1350,22 +1340,20 @@ async fn error_bad_request() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1485,22 +1473,20 @@ async fn error_bad_request_facets_by_index() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1631,22 +1617,20 @@ async fn error_bad_request_facets_by_index_facet() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1786,7 +1770,7 @@ async fn error_remote_does_not_answer() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
@@ -1795,7 +1779,7 @@ async fn error_remote_does_not_answer() {
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
@@ -1989,22 +1973,20 @@ async fn error_remote_404() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2190,22 +2172,20 @@ async fn error_remote_sharding_auth() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2355,22 +2335,20 @@ async fn remote_sharding_auth() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2515,22 +2493,20 @@ async fn error_remote_500() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2700,22 +2676,20 @@ async fn error_remote_500_once() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"leader": null,
|
||||
"version": "[version]"
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2889,7 +2863,7 @@ async fn error_remote_timeout() {
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
@@ -2898,7 +2872,7 @@ async fn error_remote_timeout() {
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]"}), @r###"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
@@ -3108,8 +3082,8 @@ impl LocalMeili {
|
||||
let (value, code) = rt.block_on(async {
|
||||
match req.method.as_str() {
|
||||
"POST" => server.service.post_str(&req.url, body, headers.clone()).await,
|
||||
"PUT" => server.service.put_str(&req.url, body, headers.clone()).await,
|
||||
"PATCH" => server.service.patch_str(&req.url, body, headers).await,
|
||||
"PUT" => server.service.put_str(&req.url, body, headers).await,
|
||||
"PATCH" => server.service.patch(&req.url, req.body_json().unwrap()).await,
|
||||
"GET" => server.service.get(&req.url).await,
|
||||
"DELETE" => server.service.delete(&req.url).await,
|
||||
_ => unimplemented!(),
|
||||
@@ -3187,6 +3161,35 @@ async fn remote_auto_sharding() {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
|
||||
// set self & sharding
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
|
||||
// wrap servers
|
||||
let ms0 = Arc::new(ms0);
|
||||
let ms1 = Arc::new(ms1);
|
||||
@@ -3197,10 +3200,7 @@ async fn remote_auto_sharding() {
|
||||
let rms2 = LocalMeili::new(ms2.clone()).await;
|
||||
|
||||
// set network
|
||||
let network = json!({
|
||||
"self": "ms0",
|
||||
"leader": "ms0",
|
||||
"remotes": {
|
||||
let network = json!({"remotes": {
|
||||
"ms0": {
|
||||
"url": rms0.url()
|
||||
},
|
||||
@@ -3214,99 +3214,12 @@ async fn remote_auto_sharding() {
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&network).unwrap());
|
||||
|
||||
let (task, status_code) = ms0.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"202 Accepted");
|
||||
|
||||
let t0 = task.uid();
|
||||
let (t, _) = ms0.get_task(t0).await;
|
||||
|
||||
let t1 = t["network"]["remote_tasks"]["ms1"]["taskUid"].as_u64().unwrap();
|
||||
let t2 = t["network"]["remote_tasks"]["ms2"]["taskUid"].as_u64().unwrap();
|
||||
|
||||
ms0.wait_task(t0).await.succeeded();
|
||||
ms1.wait_task(t1).await.succeeded();
|
||||
ms2.wait_task(t2).await.succeeded();
|
||||
|
||||
let (response, status_code) = ms0.get_network().await;
|
||||
let (_response, status_code) = ms0.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]", ".remotes.*.url" => "[url]"}), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {
|
||||
"ms0": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms1": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms2": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": "ms0",
|
||||
"version": "[version]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, status_code) = ms1.get_network().await;
|
||||
let (_response, status_code) = ms1.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]", ".remotes.*.url" => "[url]"}), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {
|
||||
"ms0": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms1": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms2": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": "ms0",
|
||||
"version": "[version]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, status_code) = ms2.get_network().await;
|
||||
let (_response, status_code) = ms2.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]", ".remotes.*.url" => "[url]"}), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {
|
||||
"ms0": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms1": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms2": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": "ms0",
|
||||
"version": "[version]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// add documents
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
@@ -3560,11 +3473,11 @@ async fn sharding_not_enterprise() {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "leader": "ms0"})).await;
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||
snapshot!(code, @"451 Unavailable For Legal Reasons");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Meilisearch Enterprise Edition is required to set `network.leader`",
|
||||
"message": "Meilisearch Enterprise Edition is required to set `network.sharding`",
|
||||
"code": "requires_enterprise_edition",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#requires_enterprise_edition"
|
||||
@@ -3591,6 +3504,36 @@ async fn remote_auto_sharding_with_custom_metadata() {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
|
||||
// set self & sharding
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
|
||||
// wrap servers
|
||||
let ms0 = Arc::new(ms0);
|
||||
let ms1 = Arc::new(ms1);
|
||||
@@ -3601,10 +3544,7 @@ async fn remote_auto_sharding_with_custom_metadata() {
|
||||
let rms2 = LocalMeili::new(ms2.clone()).await;
|
||||
|
||||
// set network
|
||||
let network = json!({
|
||||
"self": "ms0",
|
||||
"leader": "ms0",
|
||||
"remotes": {
|
||||
let network = json!({"remotes": {
|
||||
"ms0": {
|
||||
"url": rms0.url()
|
||||
},
|
||||
@@ -3618,99 +3558,12 @@ async fn remote_auto_sharding_with_custom_metadata() {
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&network).unwrap());
|
||||
|
||||
let (task, status_code) = ms0.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"202 Accepted");
|
||||
|
||||
let t0 = task.uid();
|
||||
let (t, _) = ms0.get_task(t0).await;
|
||||
|
||||
let t1 = t["network"]["remote_tasks"]["ms1"]["taskUid"].as_u64().unwrap();
|
||||
let t2 = t["network"]["remote_tasks"]["ms2"]["taskUid"].as_u64().unwrap();
|
||||
|
||||
ms0.wait_task(t0).await.succeeded();
|
||||
ms1.wait_task(t1).await.succeeded();
|
||||
ms2.wait_task(t2).await.succeeded();
|
||||
|
||||
let (response, status_code) = ms0.get_network().await;
|
||||
let (_response, status_code) = ms0.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]", ".remotes.*.url" => "[url]"}), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {
|
||||
"ms0": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms1": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms2": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": "ms0",
|
||||
"version": "[version]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, status_code) = ms1.get_network().await;
|
||||
let (_response, status_code) = ms1.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]", ".remotes.*.url" => "[url]"}), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {
|
||||
"ms0": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms1": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms2": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": "ms0",
|
||||
"version": "[version]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, status_code) = ms2.get_network().await;
|
||||
let (_response, status_code) = ms2.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
snapshot!(json_string!(response, {".version" => "[version]", ".remotes.*.url" => "[url]"}), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {
|
||||
"ms0": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms1": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"ms2": {
|
||||
"url": "[url]",
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
},
|
||||
"leader": "ms0",
|
||||
"version": "[version]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// add documents
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
@@ -3733,7 +3586,6 @@ async fn remote_auto_sharding_with_custom_metadata() {
|
||||
let t2 = t["network"]["remote_tasks"]["ms2"]["taskUid"].as_u64().unwrap();
|
||||
|
||||
let t = ms0.wait_task(t0).await.succeeded();
|
||||
|
||||
snapshot!(t, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
@@ -3754,15 +3606,14 @@ async fn remote_auto_sharding_with_custom_metadata() {
|
||||
"network": {
|
||||
"remote_tasks": {
|
||||
"ms1": {
|
||||
"taskUid": 1,
|
||||
"taskUid": 0,
|
||||
"error": null
|
||||
},
|
||||
"ms2": {
|
||||
"taskUid": 1,
|
||||
"taskUid": 0,
|
||||
"error": null
|
||||
}
|
||||
},
|
||||
"network_version": "[version]"
|
||||
}
|
||||
},
|
||||
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||
}
|
||||
@@ -3789,8 +3640,7 @@ async fn remote_auto_sharding_with_custom_metadata() {
|
||||
"network": {
|
||||
"origin": {
|
||||
"remoteName": "ms0",
|
||||
"taskUid": 1,
|
||||
"networkVersion": "[version]"
|
||||
"taskUid": 0
|
||||
}
|
||||
},
|
||||
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||
@@ -3818,8 +3668,7 @@ async fn remote_auto_sharding_with_custom_metadata() {
|
||||
"network": {
|
||||
"origin": {
|
||||
"remoteName": "ms0",
|
||||
"taskUid": 1,
|
||||
"networkVersion": "[version]"
|
||||
"taskUid": 0
|
||||
}
|
||||
},
|
||||
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||
|
||||
@@ -95,36 +95,36 @@ async fn task_bad_types() {
|
||||
|
||||
let (response, code) = server.tasks_filter("types=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`, `networkTopologyChange`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"###);
|
||||
"#);
|
||||
|
||||
let (response, code) = server.cancel_tasks("types=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`, `networkTopologyChange`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"###);
|
||||
"#);
|
||||
|
||||
let (response, code) = server.delete_tasks("types=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`, `networkTopologyChange`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"###);
|
||||
"#);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -120,16 +120,14 @@ twox-hash = { version = "2.1.2", default-features = false, features = [
|
||||
] }
|
||||
geo-types = "0.7.17"
|
||||
zerometry = "0.3.0"
|
||||
bitpacking = "0.9.2"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.8.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
quickcheck = "1.0.3"
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
|
||||
@@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
|
||||
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||
use super::StrRefCodec;
|
||||
use crate::{DeCboRoaringBitmapCodec, BEU16};
|
||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||
|
||||
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||
@@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![value.size];
|
||||
DeCboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v)?;
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
@@ -107,7 +107,7 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let size = bytes[0];
|
||||
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
Ok(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,11 +22,9 @@ pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{
|
||||
BoRoaringBitmapCodec, DeCboRoaringBitmapCodec, RoaringBitmapCodec, DELTA_ENCODING_STATUS,
|
||||
};
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, DeCboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
|
||||
@@ -7,6 +7,7 @@ use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
@@ -18,19 +19,8 @@ pub const THRESHOLD: usize = 7;
|
||||
pub struct CboRoaringBitmapCodec;
|
||||
|
||||
impl CboRoaringBitmapCodec {
|
||||
/// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
/// it means that it would weigh the same or less than the RoaringBitmap
|
||||
/// header, so we directly encode them using ByteOrder instead.
|
||||
pub fn bitmap_serialize_as_raw_u32s(roaring: &RoaringBitmap) -> bool {
|
||||
roaring.len() <= THRESHOLD as u64
|
||||
}
|
||||
|
||||
pub fn bytes_deserialize_as_raw_u32s(bytes: &[u8]) -> bool {
|
||||
bytes.len() <= THRESHOLD * size_of::<u32>()
|
||||
}
|
||||
|
||||
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
roaring.len() as usize * size_of::<u32>()
|
||||
} else {
|
||||
roaring.serialized_size()
|
||||
@@ -45,7 +35,10 @@ impl CboRoaringBitmapCodec {
|
||||
roaring: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
) -> io::Result<()> {
|
||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
for integer in roaring {
|
||||
writer.write_u32::<NativeEndian>(integer)?;
|
||||
}
|
||||
@@ -58,7 +51,7 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
// If there is threshold or less than threshold integers that can fit into this array
|
||||
// of bytes it means that we used the ByteOrder codec serializer.
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
@@ -78,7 +71,7 @@ impl CboRoaringBitmapCodec {
|
||||
other: &RoaringBitmap,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// See above `deserialize_from` method for implementation details.
|
||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||
if other.contains(integer) {
|
||||
@@ -90,6 +83,78 @@ impl CboRoaringBitmapCodec {
|
||||
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||
///
|
||||
/// if the merged values length is under the threshold, values are directly
|
||||
/// serialized in the buffer else a RoaringBitmap is created from the
|
||||
/// values and is serialized in the buffer.
|
||||
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<[u8]>,
|
||||
{
|
||||
let mut roaring = RoaringBitmap::new();
|
||||
let mut vec = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
}
|
||||
} else {
|
||||
roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
if roaring.is_empty() {
|
||||
vec.sort_unstable();
|
||||
vec.dedup();
|
||||
|
||||
if vec.len() <= THRESHOLD {
|
||||
for integer in vec {
|
||||
buffer.extend_from_slice(&integer.to_ne_bytes());
|
||||
}
|
||||
} else {
|
||||
// We can unwrap safely because the vector is sorted upper.
|
||||
let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
} else {
|
||||
roaring.extend(vec);
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Merges a DelAdd delta into a CboRoaringBitmap.
|
||||
pub fn merge_deladd_into<'a>(
|
||||
deladd: &KvReaderDelAdd,
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> io::Result<Option<&'a [u8]>> {
|
||||
// Deserialize the bitmap that is already there
|
||||
let mut previous = Self::deserialize_from(previous)?;
|
||||
|
||||
// Remove integers we no more want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Deletion) {
|
||||
previous -= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
// Insert the new integers we want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Addition) {
|
||||
previous |= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
if previous.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into_vec(&previous, buffer);
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
@@ -117,3 +182,75 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
Ok(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn verify_encoding_decoding() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&input).unwrap();
|
||||
let output = CboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!(input, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_threshold() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
|
||||
// use roaring bitmap
|
||||
let mut bytes = Vec::new();
|
||||
input.serialize_into(&mut bytes).unwrap();
|
||||
let roaring_size = bytes.len();
|
||||
|
||||
// use byteorder directly
|
||||
let mut bytes = Vec::new();
|
||||
for integer in input {
|
||||
bytes.write_u32::<NativeEndian>(integer).unwrap();
|
||||
}
|
||||
let bo_size = bytes.len();
|
||||
|
||||
assert!(roaring_size > bo_size);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_cbo_roaring_bitmaps() {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
let small_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(1..3).unwrap(),
|
||||
];
|
||||
|
||||
let small_data: Vec<_> =
|
||||
small_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
CboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
|
||||
let medium_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..8).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(0..3).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(7..23).unwrap(),
|
||||
];
|
||||
|
||||
let medium_data: Vec<_> =
|
||||
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,342 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, Cursor, ErrorKind};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt as _};
|
||||
use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
use super::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
|
||||
use crate::heed_codec::roaring_bitmap::take_all_blocks;
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
|
||||
/// Defines the status of the delta encoding on whether we have enabled it or not.
|
||||
pub static DELTA_ENCODING_STATUS: DeltaEncodingStatusLock = DeltaEncodingStatusLock::new();
|
||||
|
||||
pub struct DeCboRoaringBitmapCodec;
|
||||
|
||||
impl DeCboRoaringBitmapCodec {
|
||||
pub fn serialized_size_with_tmp_buffer(
|
||||
bitmap: &RoaringBitmap,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> usize {
|
||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
||||
|| DELTA_ENCODING_STATUS.is_disabled()
|
||||
{
|
||||
CboRoaringBitmapCodec::serialized_size(bitmap)
|
||||
} else {
|
||||
DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, tmp_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes the delta-encoded compressed version of
|
||||
/// the given roaring bitmap into the provided writer.
|
||||
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: &mut W) -> io::Result<()> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// Note that we always serialize the bitmap with the delta-encoded compressed version.
|
||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
||||
bitmap: &RoaringBitmap,
|
||||
writer: &mut W,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<()> {
|
||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
||||
|| DELTA_ENCODING_STATUS.is_disabled()
|
||||
{
|
||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, writer)
|
||||
} else {
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(bitmap, writer, tmp_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
|
||||
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// It tries to decode the input by using the delta-decoded version and
|
||||
/// if it fails, falls back to the CboRoaringBitmap version.
|
||||
pub fn deserialize_from_with_tmp_buffer(
|
||||
input: &[u8],
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// The input is too short to be a valid delta-decoded bitmap.
|
||||
// We fall back to the CboRoaringBitmap version with raw u32s.
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(input) {
|
||||
return CboRoaringBitmapCodec::deserialize_from(input);
|
||||
}
|
||||
|
||||
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
|
||||
input,
|
||||
take_all_blocks,
|
||||
tmp_buffer,
|
||||
) {
|
||||
Ok(bitmap) => Ok(bitmap),
|
||||
// If the error kind is Other it means that the delta-decoder found
|
||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
||||
Err(e) if e.kind() == ErrorKind::Other => {
|
||||
CboRoaringBitmapCodec::deserialize_from(input)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge serialized DeCboRoaringBitmaps in a buffer.
|
||||
///
|
||||
/// If the merged values length is under the threshold, values are directly
|
||||
/// serialized in the buffer else a delta-encoded list of integers is created
|
||||
/// from the values and is serialized in the buffer.
|
||||
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<[u8]>,
|
||||
{
|
||||
let mut roaring = RoaringBitmap::new();
|
||||
let mut vec = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
}
|
||||
} else {
|
||||
roaring |= DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
|
||||
bytes.as_ref(),
|
||||
&mut tmp_buffer,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
roaring.extend(vec);
|
||||
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&roaring, buffer, &mut tmp_buffer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Do an intersection directly with a serialized delta-encoded bitmap.
|
||||
///
|
||||
/// When doing the intersection we only need to deserialize the necessary
|
||||
/// bitmap containers and avoid a lot of unnecessary allocations. We do
|
||||
/// that by skipping entire delta-encoded blocks when possible to avoid
|
||||
/// storing them in the bitmap we use for the final intersection.
|
||||
pub fn intersection_with_serialized(
|
||||
bytes: &[u8],
|
||||
other: &RoaringBitmap,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
return CboRoaringBitmapCodec::intersection_with_serialized(bytes, other);
|
||||
}
|
||||
|
||||
// TODO move this tmp buffer outside
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let filter_block = |first, last| {
|
||||
// Rank returns the number of elements less than or equal
|
||||
// to the given value. Doing the difference between the
|
||||
// ranks of the last and first elements gives the number
|
||||
// of elements in the range. We don't use the range method
|
||||
// because the ExactSizeIterator::len method always returns
|
||||
// usize::MAX.
|
||||
let last_rank = other.rank(last);
|
||||
let first_rank = other.rank(first);
|
||||
// Equal to zero means skip/filter out this block
|
||||
last_rank - first_rank == 0
|
||||
};
|
||||
|
||||
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
|
||||
bytes,
|
||||
filter_block,
|
||||
&mut tmp_buffer,
|
||||
) {
|
||||
Ok(bitmap) => Ok(bitmap & other),
|
||||
// If the error kind is Other it means that the delta-decoder found
|
||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
||||
Err(e) if e.kind() == ErrorKind::Other => {
|
||||
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn merge_deladd_into<'a>(
|
||||
deladd: &KvReaderDelAdd,
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<Option<&'a [u8]>> {
|
||||
// Deserialize the bitmap that is already there
|
||||
let mut previous = Self::deserialize_from_with_tmp_buffer(previous, tmp_buffer)?;
|
||||
|
||||
// Remove integers we no more want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Deletion) {
|
||||
previous -= Self::deserialize_from_with_tmp_buffer(value, tmp_buffer)?;
|
||||
}
|
||||
|
||||
// Insert the new integers we want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Addition) {
|
||||
previous |= Self::deserialize_from_with_tmp_buffer(value, tmp_buffer)?;
|
||||
}
|
||||
|
||||
if previous.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into_with_tmp_buffer(&previous, buffer, tmp_buffer)?;
|
||||
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for DeCboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for DeCboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for DeCboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let capacity = Self::serialized_size_with_tmp_buffer(item, &mut tmp_buffer);
|
||||
let mut output = Vec::with_capacity(capacity);
|
||||
Self::serialize_into_with_tmp_buffer(item, &mut output, &mut tmp_buffer)?;
|
||||
Ok(Cow::Owned(output))
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages the global status of the delta encoding.
|
||||
///
|
||||
/// Whether we must use delta encoding or not when encoding roaring bitmaps.
|
||||
#[derive(Default)]
|
||||
pub struct DeltaEncodingStatusLock(OnceLock<DeltaEncodingStatus>);
|
||||
|
||||
impl DeltaEncodingStatusLock {
|
||||
pub const fn new() -> Self {
|
||||
Self(OnceLock::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
enum DeltaEncodingStatus {
|
||||
Enabled,
|
||||
#[default]
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl DeltaEncodingStatusLock {
|
||||
pub fn set_to_enabled(&self) -> Result<(), ()> {
|
||||
self.0.set(DeltaEncodingStatus::Enabled).map_err(drop)
|
||||
}
|
||||
|
||||
pub fn set_to_disabled(&self) -> Result<(), ()> {
|
||||
self.0.set(DeltaEncodingStatus::Disabled).map_err(drop)
|
||||
}
|
||||
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
matches!(self.0.get(), Some(DeltaEncodingStatus::Enabled))
|
||||
}
|
||||
|
||||
pub fn is_disabled(&self) -> bool {
|
||||
!self.is_enabled()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use byteorder::WriteBytesExt as _;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
use super::*;
|
||||
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
|
||||
|
||||
#[test]
|
||||
fn verify_encoding_decoding() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
let bytes = DeCboRoaringBitmapCodec::bytes_encode(&input).unwrap();
|
||||
let output = DeCboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!(input, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_threshold() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
|
||||
// use roaring bitmap
|
||||
let mut bytes = Vec::new();
|
||||
input.serialize_into(&mut bytes).unwrap();
|
||||
let roaring_size = bytes.len();
|
||||
|
||||
// use byteorder directly
|
||||
let mut bytes = Vec::new();
|
||||
for integer in input {
|
||||
bytes.write_u32::<NativeEndian>(integer).unwrap();
|
||||
}
|
||||
let bo_size = bytes.len();
|
||||
|
||||
assert!(roaring_size > bo_size);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_de_cbo_roaring_bitmaps() {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
let small_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(1..3).unwrap(),
|
||||
];
|
||||
|
||||
let small_data: Vec<_> =
|
||||
small_data.iter().map(|b| DeCboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
DeCboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
|
||||
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
|
||||
let medium_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..8).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(0..3).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(7..23).unwrap(),
|
||||
];
|
||||
|
||||
let medium_data: Vec<_> =
|
||||
medium_data.iter().map(|b| DeCboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
buffer.clear();
|
||||
DeCboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||
|
||||
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
}
|
||||
}
|
||||
@@ -1,454 +0,0 @@
|
||||
use std::io::{self, ErrorKind};
|
||||
use std::mem::{self, size_of, size_of_val};
|
||||
|
||||
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
/// The magic header for our custom encoding format
|
||||
const MAGIC_HEADER: u16 = 36869;
|
||||
|
||||
pub struct DeRoaringBitmapCodec;
|
||||
|
||||
// TODO reintroduce:
|
||||
// - serialized_size?
|
||||
// - serialize_into_vec
|
||||
// - intersection_with_serialized
|
||||
// - merge_into
|
||||
// - merge_deladd_into
|
||||
impl DeRoaringBitmapCodec {
|
||||
/// Returns the serialized size of the given roaring bitmap with the delta encoding format.
|
||||
pub fn serialized_size_with_tmp_buffer(
|
||||
bitmap: &RoaringBitmap,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> usize {
|
||||
let mut size = 2; // u16 magic header
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
// This temporary buffer is used to store each chunk of decompressed u32s.
|
||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
||||
let decompressed = &mut tmp_buffer[..];
|
||||
|
||||
let mut buffer_index = 0;
|
||||
let mut initial = None;
|
||||
// We initially collect all the integers into a flat buffer of the size
|
||||
// of the largest bitpacker. We encode them with it until we don't have
|
||||
// enough of them...
|
||||
for n in bitmap {
|
||||
decompressed[buffer_index] = n;
|
||||
buffer_index += 1;
|
||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
||||
let num_bits = bitpacker8x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker8x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = Some(n);
|
||||
buffer_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
||||
let decompressed = &decompressed[..buffer_index];
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let num_bits = bitpacker4x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker4x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...And so on...
|
||||
let decompressed = chunks.remainder();
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let num_bits = bitpacker1x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker1x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
||||
// at the end of out buffer with a header indicating the matter.
|
||||
let decompressed = chunks.remainder();
|
||||
if !decompressed.is_empty() {
|
||||
size += 1; // u8 chunk header
|
||||
size += mem::size_of_val(decompressed); // remaining uncompressed u32s
|
||||
}
|
||||
|
||||
size
|
||||
}
|
||||
|
||||
/// Writes the delta-encoded compressed version of the given roaring bitmap
|
||||
/// into the provided writer. Accepts a buffer to avoid allocating one.
|
||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
||||
bitmap: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<()> {
|
||||
// Insert the magic header
|
||||
writer.write_all(&MAGIC_HEADER.to_ne_bytes())?;
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
// This temporary buffer is used to store each chunk of decompressed and
|
||||
// compressed and delta-encoded u32s. We need room for the decompressed
|
||||
// u32s coming from the roaring bitmap, the compressed output that can
|
||||
// be as large as the decompressed u32s, and the chunk header.
|
||||
tmp_buffer.resize((BitPacker8x::BLOCK_LEN * 2) + 1, 0u32);
|
||||
let (decompressed, compressed) = tmp_buffer.split_at_mut(BitPacker8x::BLOCK_LEN);
|
||||
let compressed = bytemuck::cast_slice_mut(compressed);
|
||||
|
||||
let mut buffer_index = 0;
|
||||
let mut initial = None;
|
||||
// We initially collect all the integers into a flat buffer of the size
|
||||
// of the largest bitpacker. We encode them with it until we don't have
|
||||
// enough of them...
|
||||
for n in bitmap {
|
||||
decompressed[buffer_index] = n;
|
||||
buffer_index += 1;
|
||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
||||
let output = encode_with_packer(&bitpacker8x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = Some(n);
|
||||
buffer_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
||||
let decompressed = &decompressed[..buffer_index];
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let output = encode_with_packer(&bitpacker4x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...And so on...
|
||||
let decompressed = chunks.remainder();
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let output = encode_with_packer(&bitpacker1x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
||||
// at the end of out buffer with a header indicating the matter.
|
||||
let decompressed = chunks.remainder();
|
||||
if !decompressed.is_empty() {
|
||||
let header = encode_chunk_header(BitPackerLevel::None, u32::BITS as u8);
|
||||
// Note: Not convinced about the performance of writing a single
|
||||
// byte followed by a larger write. However, we will use this
|
||||
// codec with a BufWriter or directly with a Vec of bytes.
|
||||
writer.write_all(&[header])?;
|
||||
writer.write_all(bytemuck::cast_slice(decompressed))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// The `filter_block` function is used to filter out blocks. It takes the first
|
||||
/// and last u32 values of a block and returns `true` if the block must be kept.
|
||||
pub fn deserialize_from_with_tmp_buffer<F>(
|
||||
input: &[u8],
|
||||
filter_block: F,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<RoaringBitmap>
|
||||
where
|
||||
F: Fn(u32, u32) -> bool,
|
||||
{
|
||||
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
|
||||
else {
|
||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
|
||||
};
|
||||
|
||||
// Safety: This unwrap cannot happen as the header buffer is the right size
|
||||
let header = u16::from_ne_bytes(header.try_into().unwrap());
|
||||
|
||||
if header != MAGIC_HEADER {
|
||||
return Err(io::Error::other("invalid header value"));
|
||||
}
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
||||
let decompressed = &mut tmp_buffer[..];
|
||||
let mut initial = None;
|
||||
|
||||
while let Some((&chunk_header, encoded)) = compressed.split_first() {
|
||||
let (level, num_bits) = decode_chunk_header(chunk_header);
|
||||
let (bytes_read, decompressed) = match level {
|
||||
BitPackerLevel::None => {
|
||||
if num_bits != u32::BITS as u8 {
|
||||
return Err(io::Error::new(
|
||||
ErrorKind::InvalidData,
|
||||
"invalid number of bits to encode non-compressed u32s",
|
||||
));
|
||||
}
|
||||
|
||||
let chunks = encoded.chunks_exact(size_of::<u32>());
|
||||
if !chunks.remainder().is_empty() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"expecting last chunk to be a multiple of the size of an u32",
|
||||
));
|
||||
}
|
||||
|
||||
let integers = chunks
|
||||
// safety: This unwrap cannot happen as
|
||||
// the size of u32 is set correctly.
|
||||
.map(|b| b.try_into().unwrap())
|
||||
.map(u32::from_ne_bytes);
|
||||
|
||||
if let Some((first, last)) =
|
||||
integers.clone().next().zip(integers.clone().next_back())
|
||||
{
|
||||
if !(filter_block)(first, last) {
|
||||
bitmap
|
||||
.append(integers)
|
||||
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
|
||||
}
|
||||
}
|
||||
|
||||
// This is basically always the last chunk that exists in
|
||||
// this delta-encoded format as the raw u32s are appended
|
||||
// when there is not enough of them to fit in a bitpacker.
|
||||
break;
|
||||
}
|
||||
BitPackerLevel::BitPacker1x => {
|
||||
decode_with_packer(&bitpacker1x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker4x => {
|
||||
decode_with_packer(&bitpacker4x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker8x => {
|
||||
decode_with_packer(&bitpacker8x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
};
|
||||
|
||||
initial = decompressed.iter().last().copied();
|
||||
if let Some((first, last)) = decompressed.first().copied().zip(initial) {
|
||||
if !(filter_block)(first, last) {
|
||||
// TODO investigate perf
|
||||
// Safety: Bitpackers cannot output unsorter integers when
|
||||
// used with the compress_strictly_sorted function.
|
||||
bitmap.append(decompressed.iter().copied()).unwrap();
|
||||
}
|
||||
}
|
||||
// What the delta-decoding read plus the chunk header size
|
||||
compressed = &compressed[bytes_read + 1..];
|
||||
}
|
||||
|
||||
Ok(bitmap)
|
||||
}
|
||||
|
||||
/// Returns the length of the serialized DeRoaringBitmap.
|
||||
pub fn deserialize_length_from(input: &[u8]) -> io::Result<u64> {
|
||||
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
|
||||
else {
|
||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
|
||||
};
|
||||
|
||||
// Safety: This unwrap cannot happen as the header buffer is the right size
|
||||
let header = u16::from_ne_bytes(header.try_into().unwrap());
|
||||
|
||||
if header != MAGIC_HEADER {
|
||||
return Err(io::Error::other("invalid header value"));
|
||||
}
|
||||
|
||||
let mut length = 0;
|
||||
while let Some((&chunk_header, encoded)) = compressed.split_first() {
|
||||
let (level, num_bits) = decode_chunk_header(chunk_header);
|
||||
let bytes_read = match level {
|
||||
BitPackerLevel::None => {
|
||||
if num_bits != u32::BITS as u8 {
|
||||
return Err(io::Error::new(
|
||||
ErrorKind::InvalidData,
|
||||
"invalid number of bits to encode non-compressed u32s",
|
||||
));
|
||||
}
|
||||
|
||||
let chunks = encoded.chunks_exact(size_of::<u32>());
|
||||
if !chunks.remainder().is_empty() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"expecting last chunk to be a multiple of the size of an u32",
|
||||
));
|
||||
}
|
||||
|
||||
// This call is optimized for performance
|
||||
// and will not iterate over the chunks.
|
||||
length += chunks.count() as u64;
|
||||
|
||||
// This is basically always the last chunk that exists in
|
||||
// this delta-encoded format as the raw u32s are appended
|
||||
// when there is not enough of them to fit in a bitpacker.
|
||||
break;
|
||||
}
|
||||
BitPackerLevel::BitPacker1x => {
|
||||
length += BitPacker1x::BLOCK_LEN as u64;
|
||||
BitPacker1x::compressed_block_size(num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker4x => {
|
||||
length += BitPacker4x::BLOCK_LEN as u64;
|
||||
BitPacker4x::compressed_block_size(num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker8x => {
|
||||
length += BitPacker8x::BLOCK_LEN as u64;
|
||||
BitPacker8x::compressed_block_size(num_bits)
|
||||
}
|
||||
};
|
||||
|
||||
// What the delta-decoding read plus the chunk header size
|
||||
compressed = &compressed[bytes_read + 1..];
|
||||
}
|
||||
|
||||
Ok(length)
|
||||
}
|
||||
}
|
||||
|
||||
/// A utility function to take all blocks.
|
||||
pub fn take_all_blocks(_first: u32, _last: u32) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Takes a strickly sorted list of u32s and outputs delta-encoded
|
||||
/// bytes with a chunk header. We expect the output buffer to be
|
||||
/// at least BLOCK_LEN + 1.
|
||||
fn encode_with_packer<'c, B: BitPackerExt>(
|
||||
bitpacker: &B,
|
||||
decompressed: &[u32],
|
||||
initial: Option<u32>,
|
||||
output: &'c mut [u8],
|
||||
) -> &'c [u8] {
|
||||
let num_bits = bitpacker.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = B::compressed_block_size(num_bits);
|
||||
let chunk_header = encode_chunk_header(B::level(), num_bits);
|
||||
let buffer = &mut output[..compressed_len + 1];
|
||||
// Safety: The buffer is at least one byte
|
||||
let (header_in_buffer, encoded) = buffer.split_first_mut().unwrap();
|
||||
*header_in_buffer = chunk_header;
|
||||
bitpacker.compress_strictly_sorted(initial, decompressed, encoded, num_bits);
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Returns the number of bytes read and the decoded unsigned integers.
|
||||
fn decode_with_packer<'d, B: BitPacker>(
|
||||
bitpacker: &B,
|
||||
decompressed: &'d mut [u32],
|
||||
initial: Option<u32>,
|
||||
compressed: &[u8],
|
||||
num_bits: u8,
|
||||
) -> (usize, &'d [u32]) {
|
||||
let decompressed = &mut decompressed[..B::BLOCK_LEN];
|
||||
let read = bitpacker.decompress_strictly_sorted(initial, compressed, decompressed, num_bits);
|
||||
(read, decompressed)
|
||||
}
|
||||
|
||||
/// An identifier for the bitpacker to be able
|
||||
/// to correctly decode the compressed integers.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
enum BitPackerLevel {
|
||||
/// The remaining bytes are raw little endian encoded u32s.
|
||||
None,
|
||||
/// The remaining bits are encoded using a `BitPacker1x`.
|
||||
BitPacker1x,
|
||||
/// The remaining bits are encoded using a `BitPacker4x`.
|
||||
BitPacker4x,
|
||||
/// The remaining bits are encoded using a `BitPacker8x`.
|
||||
BitPacker8x,
|
||||
}
|
||||
|
||||
/// Returns the chunk header based on the bitpacker level
|
||||
/// and the number of bits to encode the list of integers.
|
||||
fn encode_chunk_header(level: BitPackerLevel, num_bits: u8) -> u8 {
|
||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
||||
let level = level as u8;
|
||||
debug_assert!(level <= 3);
|
||||
num_bits | (level << 6)
|
||||
}
|
||||
|
||||
/// Decodes the chunk header and output the bitpacker level
|
||||
/// and the number of bits to decode the following bytes.
|
||||
fn decode_chunk_header(data: u8) -> (BitPackerLevel, u8) {
|
||||
let num_bits = data & 0b00111111;
|
||||
let level = match data >> 6 {
|
||||
0 => BitPackerLevel::None,
|
||||
1 => BitPackerLevel::BitPacker1x,
|
||||
2 => BitPackerLevel::BitPacker4x,
|
||||
3 => BitPackerLevel::BitPacker8x,
|
||||
invalid => panic!("Invalid bitpacker level: {invalid}"),
|
||||
};
|
||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
||||
(level, num_bits)
|
||||
}
|
||||
|
||||
/// A simple helper trait to get the BitPackerLevel
|
||||
/// and correctly generate the chunk header.
|
||||
trait BitPackerExt: BitPacker {
|
||||
/// Returns the level of the bitpacker: an identifier to be
|
||||
/// able to decode the numbers with the right bitpacker.
|
||||
fn level() -> BitPackerLevel;
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker8x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker8x
|
||||
}
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker4x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker4x
|
||||
}
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker1x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker1x
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use quickcheck::quickcheck;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{take_all_blocks, DeRoaringBitmapCodec};
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
let length = DeRoaringBitmapCodec::deserialize_length_from(&compressed[..]).unwrap();
|
||||
let decompressed = DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed[..], take_all_blocks, &mut tmp_buffer).unwrap();
|
||||
length == bitmap.len() && decompressed == bitmap
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
let length = DeRoaringBitmapCodec::deserialize_length_from(&compressed).unwrap();
|
||||
let expected_len = DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
|
||||
length == bitmap.len() && compressed.len() == expected_len
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,7 @@
|
||||
mod bo_roaring_bitmap_codec;
|
||||
pub mod cbo_roaring_bitmap_codec;
|
||||
pub mod de_cbo_roaring_bitmap_codec;
|
||||
mod de_roaring_bitmap_codec;
|
||||
mod roaring_bitmap_codec;
|
||||
|
||||
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
pub use self::de_cbo_roaring_bitmap_codec::{DeCboRoaringBitmapCodec, DELTA_ENCODING_STATUS};
|
||||
pub use self::de_roaring_bitmap_codec::{take_all_blocks, DeRoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
|
||||
use super::BoRoaringBitmapLenCodec;
|
||||
use crate::heed_codec::roaring_bitmap::{CboRoaringBitmapCodec, DeRoaringBitmapCodec};
|
||||
use crate::heed_codec::roaring_bitmap_length::CboRoaringBitmapLenCodec;
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct DeCboRoaringBitmapLenCodec;
|
||||
|
||||
impl BytesDecode<'_> for DeCboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
// If there is threshold or less than threshold integers that can fit
|
||||
// into this array of bytes it means that we used the ByteOrder codec
|
||||
// serializer.
|
||||
BoRoaringBitmapLenCodec::bytes_decode(bytes)
|
||||
} else {
|
||||
match DeRoaringBitmapCodec::deserialize_length_from(bytes) {
|
||||
Ok(bitmap) => Ok(bitmap),
|
||||
// If the error kind is Other it means that the delta-decoder found
|
||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
||||
Err(e) if e.kind() == ErrorKind::Other => {
|
||||
CboRoaringBitmapLenCodec::bytes_decode(bytes)
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for DeCboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
mod bo_roaring_bitmap_len_codec;
|
||||
mod cbo_roaring_bitmap_len_codec;
|
||||
mod de_cbo_roaring_bitmap_len_codec;
|
||||
mod roaring_bitmap_len_codec;
|
||||
|
||||
pub use self::bo_roaring_bitmap_len_codec::BoRoaringBitmapLenCodec;
|
||||
use self::cbo_roaring_bitmap_len_codec::CboRoaringBitmapLenCodec;
|
||||
pub use self::de_cbo_roaring_bitmap_len_codec::DeCboRoaringBitmapLenCodec;
|
||||
pub use self::cbo_roaring_bitmap_len_codec::CboRoaringBitmapLenCodec;
|
||||
pub use self::roaring_bitmap_len_codec::RoaringBitmapLenCodec;
|
||||
|
||||
@@ -34,7 +34,7 @@ use crate::update::new::StdResult;
|
||||
use crate::vector::db::IndexEmbeddingConfigs;
|
||||
use crate::vector::{Embedding, VectorStore, VectorStoreBackend, VectorStoreStats};
|
||||
use crate::{
|
||||
default_criteria, Criterion, DeCboRoaringBitmapCodec, DocumentId, ExternalDocumentsIds,
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||
FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
|
||||
@@ -133,38 +133,38 @@ pub struct Index {
|
||||
pub external_documents_ids: Database<Str, BEU32>,
|
||||
|
||||
/// A word and all the documents ids containing the word.
|
||||
pub word_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
|
||||
/// A word and all the documents ids containing the word, from attributes for which typos are not allowed.
|
||||
pub exact_word_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
pub exact_word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
|
||||
/// A prefix of word and all the documents ids containing this prefix.
|
||||
pub word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
pub word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
|
||||
/// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
|
||||
pub exact_word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
pub exact_word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
||||
pub word_pair_proximity_docids: Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
|
||||
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the word and the position with the docids that corresponds to it.
|
||||
pub word_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word and the field id with the docids that corresponds to it.
|
||||
pub word_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the field id and the word count with the docids that corresponds to it.
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, DeCboRoaringBitmapCodec>,
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and the docids for which this field exists
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is set as null
|
||||
pub facet_id_is_null_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
|
||||
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is considered empty
|
||||
pub facet_id_is_empty_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
|
||||
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
|
||||
@@ -73,7 +73,7 @@ pub use self::filterable_attributes_rules::{
|
||||
};
|
||||
pub use self::heed_codec::{
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||
RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
|
||||
UncheckedU8StrStrCodec,
|
||||
};
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{DeCboRoaringBitmapCodec, DocumentId};
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId};
|
||||
|
||||
/// Call the given closure on the facet distribution of the candidate documents.
|
||||
///
|
||||
@@ -88,7 +88,7 @@ where
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
@@ -120,7 +120,7 @@ where
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
@@ -173,7 +173,7 @@ where
|
||||
if key.field_id != self.field_id {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
@@ -210,7 +210,7 @@ where
|
||||
if key.field_id != self.field_id {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{DeCboRoaringBitmapCodec, Result};
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
/// Find all the document ids for which the given field contains a value contained within
|
||||
/// the two bounds.
|
||||
@@ -114,11 +114,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
|
||||
|
||||
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => DeCboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -211,11 +211,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
previous_key = next_key;
|
||||
previous_value = next_value;
|
||||
@@ -313,11 +313,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
} else {
|
||||
let level = level - 1;
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::MergeCboRoaringBitmaps;
|
||||
use crate::{
|
||||
DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||
};
|
||||
|
||||
/// A cache storing pointers to values in the LMDB databases.
|
||||
@@ -72,11 +72,11 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
|
||||
match (bitmap_bytes, universe) {
|
||||
(bytes, Some(universe)) => {
|
||||
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
.map(Some)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
@@ -105,7 +105,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
DeCboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
|
||||
CboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into)
|
||||
@@ -157,11 +157,11 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
|
||||
match (bitmap_bytes, universe) {
|
||||
(bytes, Some(universe)) => {
|
||||
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
.map(Some)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
@@ -226,22 +226,14 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
MergeCboRoaringBitmaps,
|
||||
)
|
||||
}
|
||||
None => {
|
||||
let output = DatabaseCache::get_value(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
universe,
|
||||
self.index.word_docids.remap_data_type::<Bytes>(),
|
||||
)?;
|
||||
|
||||
if self.word_interner.get(word).as_str() == "la" {
|
||||
dbg!(self.word_interner.get(word).as_str(), &output);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
None => DatabaseCache::get_value(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
universe,
|
||||
self.index.word_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -385,7 +377,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
{
|
||||
docids
|
||||
.as_ref()
|
||||
.map(|d| DeCboRoaringBitmapCodec::bytes_decode_owned(d))
|
||||
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
|
||||
.transpose()
|
||||
.map_err(heed::Error::Decoding)?
|
||||
} else {
|
||||
@@ -403,7 +395,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
docids |= word1_docids & word2_docids;
|
||||
}
|
||||
}
|
||||
let encoded = DeCboRoaringBitmapCodec::bytes_encode(&docids)
|
||||
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
|
||||
.map(Cow::into_owned)
|
||||
.map(Cow::Owned)
|
||||
.map(Some)
|
||||
|
||||
@@ -6,7 +6,7 @@ use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::ExactTerm;
|
||||
use crate::{DeCboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
|
||||
/// A ranking rule that produces 3 disjoint buckets:
|
||||
///
|
||||
@@ -219,7 +219,7 @@ impl State {
|
||||
|
||||
match bitmap_bytes {
|
||||
Some(bytes) => {
|
||||
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
|
||||
}
|
||||
None => RoaringBitmap::default(),
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::heed_codec::BytesRefCodec;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
|
||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::{DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, FieldId, Index, Result};
|
||||
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
|
||||
|
||||
/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
|
||||
/// by rebuilding the database "from scratch".
|
||||
@@ -143,7 +143,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
}
|
||||
} else {
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let database = self.db.remap_types::<Bytes, Bytes>();
|
||||
|
||||
let mut iter = delta_data.into_stream_merger_iter()?;
|
||||
@@ -163,12 +162,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
Some(prev_value) => {
|
||||
// prev_value is the group size for level 0, followed by the previous bitmap.
|
||||
let old_bitmap = &prev_value[1..];
|
||||
DeCboRoaringBitmapCodec::merge_deladd_into(
|
||||
value,
|
||||
old_bitmap,
|
||||
&mut buffer,
|
||||
&mut tmp_buffer,
|
||||
)?;
|
||||
CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
|
||||
}
|
||||
None => {
|
||||
// it is safe to ignore the del in that case.
|
||||
@@ -182,7 +176,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
};
|
||||
let new_bitmap = &buffer[1..];
|
||||
// if the new bitmap is empty, let's remove it
|
||||
if DeCboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
|
||||
if CboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
|
||||
database.delete(wtxn, key)?;
|
||||
} else {
|
||||
database.put(wtxn, key, &buffer)?;
|
||||
|
||||
@@ -16,7 +16,7 @@ use crate::search::facet::get_highest_level;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::index_documents::valid_lmdb_key;
|
||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::{DeCboRoaringBitmapCodec, Index, Result};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||
|
||||
/// Enum used as a return value for the facet incremental indexing.
|
||||
///
|
||||
@@ -112,13 +112,13 @@ impl FacetsUpdateIncremental {
|
||||
let value = KvReader::from_slice(value);
|
||||
let docids_to_delete = value
|
||||
.get(DelAdd::Deletion)
|
||||
.map(DeCboRoaringBitmapCodec::bytes_decode)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.map_err(heed::Error::Encoding))
|
||||
.transpose()?;
|
||||
|
||||
let docids_to_add = value
|
||||
.get(DelAdd::Addition)
|
||||
.map(DeCboRoaringBitmapCodec::bytes_decode)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.map_err(heed::Error::Encoding))
|
||||
.transpose()?;
|
||||
|
||||
|
||||
@@ -366,7 +366,7 @@ pub(crate) mod test_helpers {
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::update::FacetsUpdateIncrementalInner;
|
||||
use crate::DeCboRoaringBitmapCodec;
|
||||
use crate::CboRoaringBitmapCodec;
|
||||
|
||||
/// Utility function to generate a string whose position in a lexicographically
|
||||
/// ordered list is `i`.
|
||||
@@ -496,7 +496,7 @@ pub(crate) mod test_helpers {
|
||||
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
|
||||
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
|
||||
let mut inner_writer = KvWriterDelAdd::memory();
|
||||
let value = DeCboRoaringBitmapCodec::bytes_encode(docids).unwrap();
|
||||
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
|
||||
inner_writer.insert(DelAdd::Addition, value).unwrap();
|
||||
writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::{DeCboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
|
||||
@@ -311,8 +311,8 @@ fn deladd_obkv_cbo_roaring_bitmaps(
|
||||
) -> io::Result<()> {
|
||||
buffer.clear();
|
||||
let mut obkv = KvWriterDelAdd::new(buffer);
|
||||
let del_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
|
||||
let add_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
|
||||
let del_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
|
||||
let add_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
|
||||
obkv.insert(DelAdd::Deletion, del_bitmap_bytes)?;
|
||||
obkv.insert(DelAdd::Addition, add_bitmap_bytes)?;
|
||||
obkv.finish()
|
||||
|
||||
@@ -7,7 +7,7 @@ use either::Either;
|
||||
use grenad::MergeFunction;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::DeCboRoaringBitmapCodec;
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::transform::Operation;
|
||||
use crate::Result;
|
||||
@@ -200,7 +200,7 @@ impl MergeFunction for MergeCboRoaringBitmaps {
|
||||
Ok(values[0].clone())
|
||||
} else {
|
||||
let mut vec = Vec::new();
|
||||
DeCboRoaringBitmapCodec::merge_into(values, &mut vec)?;
|
||||
CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
|
||||
Ok(Cow::from(vec))
|
||||
}
|
||||
}
|
||||
@@ -232,10 +232,10 @@ impl MergeFunction for MergeDeladdCboRoaringBitmaps {
|
||||
|
||||
let mut output_deladd_obkv = KvWriterDelAdd::memory();
|
||||
let mut buffer = Vec::new();
|
||||
DeCboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
|
||||
CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
|
||||
output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
|
||||
buffer.clear();
|
||||
DeCboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
|
||||
CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
|
||||
output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
|
||||
output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
|
||||
}
|
||||
@@ -251,11 +251,10 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> Result<Option<&'a [u8]>> {
|
||||
Ok(DeCboRoaringBitmapCodec::merge_deladd_into(
|
||||
Ok(CboRoaringBitmapCodec::merge_deladd_into(
|
||||
KvReaderDelAdd::from_slice(deladd_obkv),
|
||||
previous,
|
||||
buffer,
|
||||
&mut Vec::new(), // tmp_buffer
|
||||
)?)
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ use crate::update::{
|
||||
};
|
||||
use crate::vector::db::EmbedderInfo;
|
||||
use crate::vector::{RuntimeEmbedders, VectorStore};
|
||||
use crate::{DeCboRoaringBitmapCodec, Index, Result, UserError};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
|
||||
|
||||
static MERGED_DATABASE_COUNT: usize = 7;
|
||||
static PREFIX_DATABASE_COUNT: usize = 4;
|
||||
@@ -764,8 +764,8 @@ where
|
||||
fn execute_word_prefix_docids(
|
||||
txn: &mut heed::RwTxn<'_>,
|
||||
merger: Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>,
|
||||
word_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
word_prefix_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
word_docids_db: Database<Str, CboRoaringBitmapCodec>,
|
||||
word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
|
||||
indexer_config: &IndexerConfig,
|
||||
new_prefix_fst_words: &[String],
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
|
||||
@@ -29,7 +29,7 @@ use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
|
||||
use crate::vector::VectorStore;
|
||||
use crate::{
|
||||
lat_lng_to_xyz, DeCboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||
Result, SerializationError, U8StrStrCodec, UserError,
|
||||
};
|
||||
|
||||
@@ -866,7 +866,7 @@ where
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
|
||||
fn write_proximity_entries_into_database_additional_searchables<R, MF>(
|
||||
merger: Merger<R, MF>,
|
||||
database: &heed::Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
|
||||
database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
) -> Result<()>
|
||||
where
|
||||
@@ -881,7 +881,7 @@ where
|
||||
U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
|
||||
let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
|
||||
Some(value) => {
|
||||
DeCboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
|
||||
CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
|
||||
}
|
||||
None => continue,
|
||||
};
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::index::db_name;
|
||||
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
|
||||
use crate::update::new::KvReaderFieldId;
|
||||
use crate::vector::Embedding;
|
||||
use crate::{DeCboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
|
||||
|
||||
/// Note that the FrameProducer requires up to 9 bytes to
|
||||
/// encode the length, the max grant has been computed accordingly.
|
||||
@@ -971,9 +971,7 @@ pub struct WordDocidsSender<'a, 'b, D> {
|
||||
|
||||
impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
|
||||
pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let value_length =
|
||||
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
|
||||
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
|
||||
let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
|
||||
InternalError::StorePut {
|
||||
database_name: D::DATABASE.database_name(),
|
||||
@@ -988,10 +986,7 @@ impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
|
||||
value_length,
|
||||
|key_buffer, value_buffer| {
|
||||
key_buffer.copy_from_slice(key);
|
||||
DeCboRoaringBitmapCodec::serialize_into(
|
||||
bitmap,
|
||||
&mut io::Cursor::new(value_buffer),
|
||||
)?;
|
||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_buffer)?;
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
@@ -1012,9 +1007,7 @@ impl FacetDocidsSender<'_, '_> {
|
||||
let (facet_kind, key) = FacetKind::extract_from_key(key);
|
||||
let database = Database::from(facet_kind);
|
||||
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let value_length =
|
||||
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
|
||||
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
|
||||
let value_length = match facet_kind {
|
||||
// We must take the facet group size into account
|
||||
// when we serialize strings and numbers.
|
||||
@@ -1048,7 +1041,7 @@ impl FacetDocidsSender<'_, '_> {
|
||||
FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
|
||||
};
|
||||
|
||||
DeCboRoaringBitmapCodec::serialize_into(bitmap, &mut io::Cursor::new(value_out))?;
|
||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
|
||||
@@ -82,7 +82,7 @@ use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::new::thread_local::MostlySend;
|
||||
use crate::update::new::KvReaderDelAdd;
|
||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::{DeCboRoaringBitmapCodec, Result};
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
/// A cache that stores bytes keys associated to CboDelAddRoaringBitmaps.
|
||||
///
|
||||
@@ -323,7 +323,6 @@ struct SpillingCaches<'extractor> {
|
||||
spilled_entries: Vec<grenad::Sorter<MergeDeladdCboRoaringBitmaps>>,
|
||||
deladd_buffer: Vec<u8>,
|
||||
cbo_buffer: Vec<u8>,
|
||||
tmp_buffer: Vec<u32>,
|
||||
}
|
||||
|
||||
impl<'extractor> SpillingCaches<'extractor> {
|
||||
@@ -349,7 +348,6 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
caches,
|
||||
deladd_buffer: Vec::new(),
|
||||
cbo_buffer: Vec::new(),
|
||||
tmp_buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -372,7 +370,6 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
&mut self.spilled_entries[bucket],
|
||||
&mut self.deladd_buffer,
|
||||
&mut self.cbo_buffer,
|
||||
&mut self.tmp_buffer,
|
||||
key,
|
||||
DelAddRoaringBitmap::new_del_u32(n),
|
||||
),
|
||||
@@ -398,7 +395,6 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
&mut self.spilled_entries[bucket],
|
||||
&mut self.deladd_buffer,
|
||||
&mut self.cbo_buffer,
|
||||
&mut self.tmp_buffer,
|
||||
key,
|
||||
DelAddRoaringBitmap::new_add_u32(n),
|
||||
),
|
||||
@@ -415,7 +411,6 @@ fn spill_entry_to_sorter(
|
||||
spilled_entries: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
|
||||
deladd_buffer: &mut Vec<u8>,
|
||||
cbo_buffer: &mut Vec<u8>,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
key: &[u8],
|
||||
deladd: DelAddRoaringBitmap,
|
||||
) -> Result<()> {
|
||||
@@ -425,21 +420,21 @@ fn spill_entry_to_sorter(
|
||||
match deladd {
|
||||
DelAddRoaringBitmap { del: Some(del), add: None } => {
|
||||
cbo_buffer.clear();
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
|
||||
cbo_buffer.clear();
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
||||
@@ -645,12 +640,12 @@ impl DelAddRoaringBitmap {
|
||||
let reader = KvReaderDelAdd::from_slice(bytes);
|
||||
|
||||
let del = match reader.get(DelAdd::Deletion) {
|
||||
Some(bytes) => DeCboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
|
||||
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
|
||||
None => None,
|
||||
};
|
||||
|
||||
let add = match reader.get(DelAdd::Addition) {
|
||||
Some(bytes) => DeCboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
|
||||
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
|
||||
None => None,
|
||||
};
|
||||
|
||||
|
||||
@@ -5,38 +5,18 @@
|
||||
|
||||
use std::hash::{BuildHasher as _, BuildHasherDefault};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Shards(pub Vec<Shard>);
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Shard {
|
||||
pub is_own: bool,
|
||||
pub name: String,
|
||||
pub struct Shards {
|
||||
pub own: Vec<String>,
|
||||
pub others: Vec<String>,
|
||||
}
|
||||
|
||||
impl Shards {
|
||||
pub fn from_remotes_local<'a>(
|
||||
remotes: impl IntoIterator<Item = &'a str>,
|
||||
local: Option<&str>,
|
||||
) -> Self {
|
||||
Shards(
|
||||
remotes
|
||||
.into_iter()
|
||||
.map(|name| Shard { is_own: Some(name) == local, name: name.to_owned() })
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn must_process(&self, docid: &str) -> bool {
|
||||
self.processing_shard(docid).map(|shard| shard.is_own).unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn processing_shard<'a>(&'a self, docid: &str) -> Option<&'a Shard> {
|
||||
let hasher = BuildHasherDefault::<twox_hash::XxHash3_64>::new();
|
||||
let to_hash = |shard: &'a Shard| (shard, hasher.hash_one((&shard.name, docid)));
|
||||
let to_hash = |shard: &String| hasher.hash_one((shard, docid));
|
||||
|
||||
let shard =
|
||||
self.0.iter().map(to_hash).max_by_key(|(_, hash)| *hash).map(|(shard, _)| shard);
|
||||
shard
|
||||
let max_hash = self.others.iter().map(to_hash).max().unwrap_or_default();
|
||||
|
||||
self.own.iter().map(to_hash).any(|hash| hash > max_hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::update::{create_writer, writer_into_reader};
|
||||
use crate::{DeCboRoaringBitmapCodec, FieldId, Index};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index};
|
||||
|
||||
/// Generate the facet level based on the level 0.
|
||||
///
|
||||
@@ -123,7 +123,7 @@ fn compute_level(
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = mem::take(&mut group_docids);
|
||||
let docids = group_docids.into_iter().union();
|
||||
DeCboRoaringBitmapCodec::serialize_into(&docids, &mut ser_buffer)?;
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&docids, &mut ser_buffer);
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
left_bound = Some(key.left_bound);
|
||||
@@ -142,7 +142,7 @@ fn compute_level(
|
||||
let group_len: u8 = group_docids.len().try_into().unwrap();
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = group_docids.into_iter().union();
|
||||
DeCboRoaringBitmapCodec::serialize_into(&group_docids, &mut ser_buffer)?;
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&group_docids, &mut ser_buffer);
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user