mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-24 05:26:57 +00:00
Compare commits
46 Commits
delta-enco
...
update-dep
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a2f6b30c9 | ||
|
|
f92c88ec52 | ||
|
|
dfeb3a640e | ||
|
|
a0853d00ce | ||
|
|
0d5e176dc2 | ||
|
|
d6f36a773d | ||
|
|
97e6ae1957 | ||
|
|
5ed9be0789 | ||
|
|
7597b1049f | ||
|
|
d99150f21b | ||
|
|
c9726674a0 | ||
|
|
205f40b3b8 | ||
|
|
3d013cdebe | ||
|
|
ddeff5678f | ||
|
|
a235434910 | ||
|
|
a376525348 | ||
|
|
361580f451 | ||
|
|
ea70a7d1c9 | ||
|
|
9304f8e586 | ||
|
|
495db080ec | ||
|
|
d71341fa48 | ||
|
|
5b3070d8c3 | ||
|
|
89006fd4b3 | ||
|
|
49f50a0a21 | ||
|
|
1104f00803 | ||
|
|
33fa564a9c | ||
|
|
a097b254f8 | ||
|
|
54cb0ec437 | ||
|
|
38ed1f1dbb | ||
|
|
643dd33358 | ||
|
|
32f9fb6ab2 | ||
|
|
b5966f82e8 | ||
|
|
5e54063aab | ||
|
|
40456795d0 | ||
|
|
40e60c6f52 | ||
|
|
eeae6383d0 | ||
|
|
9f7172f6ab | ||
|
|
d6eca83cfa | ||
|
|
fc3508c8c8 | ||
|
|
747476a225 | ||
|
|
4b72e54ca7 | ||
|
|
adef2cc132 | ||
|
|
533b9951b1 | ||
|
|
9103cbc9db | ||
|
|
083de2bfc1 | ||
|
|
08bc982748 |
6
.github/workflows/publish-release-assets.yml
vendored
6
.github/workflows/publish-release-assets.yml
vendored
@@ -65,9 +65,9 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-13, windows-2022]
|
||||
os: [macos-14, windows-2022]
|
||||
include:
|
||||
- os: macos-13
|
||||
- os: macos-14
|
||||
artifact_name: meilisearch
|
||||
asset_name: meilisearch-macos-amd64
|
||||
- os: windows-2022
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
|
||||
publish-macos-apple-silicon:
|
||||
name: Publish binary for macOS silicon
|
||||
runs-on: macos-13
|
||||
runs-on: macos-14
|
||||
needs: check-version
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
8
.github/workflows/test-suite.yml
vendored
8
.github/workflows/test-suite.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-13, windows-2022]
|
||||
os: [macos-14, windows-2022]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Cache dependencies
|
||||
@@ -143,7 +143,7 @@ jobs:
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-latest-large
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
@@ -154,8 +154,8 @@ jobs:
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run df -h
|
||||
run: df -h
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
|
||||
1286
Cargo.lock
generated
1286
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.24.0"
|
||||
version = "1.26.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.7"
|
||||
anyhow = "1.0.100"
|
||||
bumpalo = "3.19.0"
|
||||
csv = "1.4.0"
|
||||
memmap2 = "0.9.9"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.7.0", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.12"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bytes = "1.10.1"
|
||||
convert_case = "0.8.0"
|
||||
flate2 = "1.1.2"
|
||||
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
anyhow = "1.0.100"
|
||||
bytes = "1.11.0"
|
||||
convert_case = "0.9.0"
|
||||
flate2 = "1.1.5"
|
||||
reqwest = { version = "0.12.24", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.41", features = ["parsing"] }
|
||||
time = { version = "0.3.44", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.100"
|
||||
vergen-git2 = "1.0.7"
|
||||
|
||||
@@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
flate2 = "1.1.2"
|
||||
anyhow = "1.0.100"
|
||||
flate2 = "1.1.5"
|
||||
http = "1.3.1"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.21.3"
|
||||
regex = "1.11.1"
|
||||
regex = "1.12.2"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -96,6 +96,8 @@ pub struct TaskDump {
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
||||
@@ -178,6 +180,7 @@ impl From<Task> for TaskDump {
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network,
|
||||
custom_metadata: task.custom_metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -396,6 +399,7 @@ pub(crate) mod test {
|
||||
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
@@ -421,6 +425,7 @@ pub(crate) mod test {
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
Some(vec![
|
||||
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
|
||||
@@ -441,6 +446,7 @@ pub(crate) mod test {
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
@@ -164,6 +164,7 @@ impl CompatV5ToV6 {
|
||||
started_at: task_view.started_at,
|
||||
finished_at: task_view.finished_at,
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
};
|
||||
|
||||
(task, content_file)
|
||||
|
||||
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -16,7 +16,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.7.0", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
|
||||
@@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
arbitrary = { version = "1.4.2", features = ["derive"] }
|
||||
bumpalo = "3.19.0"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
either = "1.15.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
|
||||
@@ -11,33 +11,33 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.100"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
bytes = "1.11.0"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
convert_case = "0.9.0"
|
||||
csv = "1.4.0"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
indexmap = "2.9.0"
|
||||
flate2 = "1.1.5"
|
||||
indexmap = "2.12.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.9"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
rayon = "1.11.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
@@ -45,11 +45,11 @@ time = { version = "0.3.41", features = [
|
||||
] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
|
||||
reqwest = { version = "0.12.24", features = ["rustls-tls", "http2"], default-features = false }
|
||||
rusty-s3 = "0.8.1"
|
||||
tokio = { version = "1.47.1", features = ["full"] }
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -150,6 +150,7 @@ impl<'a> Dump<'a> {
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
network: task.network,
|
||||
custom_metadata: task.custom_metadata,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
|
||||
@@ -232,6 +232,7 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
status,
|
||||
kind,
|
||||
network,
|
||||
custom_metadata,
|
||||
} = task;
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
@@ -252,6 +253,9 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
if let Some(network) = network {
|
||||
snap.push_str(&format!("network: {network:?}, "))
|
||||
}
|
||||
if let Some(custom_metadata) = custom_metadata {
|
||||
snap.push_str(&format!("custom_metadata: {custom_metadata:?}"))
|
||||
}
|
||||
|
||||
snap.push('}');
|
||||
snap
|
||||
|
||||
@@ -756,6 +756,19 @@ impl IndexScheduler {
|
||||
kind: KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
self.register_with_custom_metadata(kind, task_id, None, dry_run)
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler, with metadata.
|
||||
///
|
||||
/// If it fails and data was associated with the task, it tries to delete the associated data.
|
||||
pub fn register_with_custom_metadata(
|
||||
&self,
|
||||
kind: KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
|
||||
@@ -766,7 +779,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let task = self.queue.register(&mut wtxn, &kind, task_id, dry_run)?;
|
||||
let task = self.queue.register(&mut wtxn, &kind, task_id, custom_metadata, dry_run)?;
|
||||
|
||||
// If the registered task is a task cancelation
|
||||
// we inform the processing tasks to stop (if necessary).
|
||||
|
||||
@@ -257,6 +257,7 @@ impl Queue {
|
||||
wtxn: &mut RwTxn,
|
||||
kind: &KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
@@ -280,6 +281,7 @@ impl Queue {
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
network: None,
|
||||
custom_metadata,
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
@@ -344,6 +346,7 @@ impl Queue {
|
||||
tasks: to_delete,
|
||||
},
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -438,12 +438,15 @@ async fn multipart_stream_to_s3(
|
||||
db_name: String,
|
||||
reader: std::io::PipeReader,
|
||||
) -> Result<(), Error> {
|
||||
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
|
||||
use std::collections::VecDeque;
|
||||
use std::io;
|
||||
use std::os::fd::OwnedFd;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use reqwest::{Client, Response};
|
||||
use rusty_s3::S3Action as _;
|
||||
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
|
||||
use rusty_s3::actions::CreateMultipartUpload;
|
||||
use rusty_s3::{Bucket, BucketError, Credentials, S3Action as _, UrlStyle};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
let reader = OwnedFd::from(reader);
|
||||
@@ -517,7 +520,6 @@ async fn multipart_stream_to_s3(
|
||||
while buffer.len() < (s3_multipart_part_size as usize / 2) {
|
||||
// Wait for the pipe to be readable
|
||||
|
||||
use std::io;
|
||||
reader.readable().await?;
|
||||
|
||||
match reader.try_read_buf(&mut buffer) {
|
||||
@@ -581,15 +583,17 @@ async fn multipart_stream_to_s3(
|
||||
async move {
|
||||
match client.post(url).body(body).send().await {
|
||||
Ok(resp) if resp.status().is_client_error() => {
|
||||
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||
Err(backoff::Error::Permanent(Error::S3Error {
|
||||
status: resp.status(),
|
||||
body: resp.text().await.unwrap_or_default(),
|
||||
}))
|
||||
}
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err(backoff::Error::transient(e)),
|
||||
Err(e) => Err(backoff::Error::transient(Error::S3HttpError(e))),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(Error::S3HttpError)?;
|
||||
.await?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -48,6 +48,8 @@ pub fn upgrade_index_scheduler(
|
||||
(1, 22, _) => 0,
|
||||
(1, 23, _) => 0,
|
||||
(1, 24, _) => 0,
|
||||
(1, 25, _) => 0,
|
||||
(1, 26, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
@@ -98,6 +100,7 @@ pub fn upgrade_index_scheduler(
|
||||
status: Status::Enqueued,
|
||||
kind: KindWithContent::UpgradeDatabase { from },
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -379,6 +379,7 @@ impl crate::IndexScheduler {
|
||||
status,
|
||||
kind,
|
||||
network: _,
|
||||
custom_metadata: _,
|
||||
} = task;
|
||||
assert_eq!(uid, task.uid);
|
||||
if task.status != Status::Enqueued {
|
||||
|
||||
@@ -15,7 +15,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.6.0"
|
||||
criterion = "0.7.0"
|
||||
|
||||
[[bench]]
|
||||
name = "depth"
|
||||
|
||||
@@ -13,7 +13,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
md5 = "0.8.0"
|
||||
once_cell = "1.21"
|
||||
regex-lite = "0.1.6"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
regex-lite = "0.1.8"
|
||||
uuid = { version = "1.18.1", features = ["v4"] }
|
||||
|
||||
@@ -12,15 +12,15 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.22.1"
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -11,38 +11,38 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.11.0", default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
actix-web = { version = "4.12.0", default-features = false }
|
||||
anyhow = "1.0.100"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
convert_case = "0.9.0"
|
||||
csv = "1.4.0"
|
||||
deserr = { version = "0.6.4", features = ["actix-web"] }
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
flate2 = "1.1.5"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.9"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rustc-hash = "2.1.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.45"
|
||||
tokio = "1.48"
|
||||
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
||||
@@ -254,6 +254,7 @@ InvalidSearchHybridQuery , InvalidRequest , BAD_REQU
|
||||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexCustomMetadata , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
|
||||
|
||||
@@ -55,6 +55,9 @@ pub struct TaskView {
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
@@ -73,6 +76,7 @@ impl TaskView {
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network.clone(),
|
||||
custom_metadata: task.custom_metadata.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,6 +45,9 @@ pub struct Task {
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
|
||||
@@ -14,91 +14,91 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.1"
|
||||
actix-http = { version = "3.11.0", default-features = false, features = [
|
||||
actix-http = { version = "3.11.2", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.11.0", default-features = false, features = [
|
||||
actix-web = { version = "4.12.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
anyhow = { version = "1.0.98", features = ["backtrace"] }
|
||||
bstr = "1.12.0"
|
||||
anyhow = { version = "1.0.100", features = ["backtrace"] }
|
||||
bstr = "1.12.1"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive", "env"] }
|
||||
bytes = "1.11.0"
|
||||
bumpalo = "3.19.0"
|
||||
clap = { version = "4.5.52", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
deserr = { version = "0.6.4", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.15.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
flate2 = "1.1.5"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
is-terminal = "0.4.16"
|
||||
indexmap = { version = "2.12.0", features = ["serde"] }
|
||||
is-terminal = "0.4.17"
|
||||
itertools = "0.14.0"
|
||||
jsonwebtoken = "9.3.1"
|
||||
lazy_static = "1.5.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
memmap2 = "0.9.9"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.17.0"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
parking_lot = "0.12.4"
|
||||
ordered-float = "5.1.0"
|
||||
parking_lot = "0.12.5"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.16"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.14.0", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
rayon = "1.11.0"
|
||||
regex = "1.12.2"
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
|
||||
rustls = { version = "0.23.35", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.13.0", features = ["alloc"] }
|
||||
rustls-pemfile = "2.2.0"
|
||||
segment = { version = "0.2.6" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
siphasher = "1.0.1"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
sysinfo = "0.35.2"
|
||||
static-files = { version = "0.3.1", optional = true }
|
||||
sysinfo = "0.37.2"
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
toml = "0.8.23"
|
||||
uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
toml = "0.9.8"
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4", "v7"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
url = { version = "2.5.7", features = ["serde"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.20", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.18"
|
||||
tracing-actix-web = "0.7.19"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.12"
|
||||
mopa-maintained = "0.2.3"
|
||||
@@ -114,35 +114,35 @@ utoipa = { version = "5.4.0", features = [
|
||||
utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
|
||||
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
|
||||
secrecy = "0.10.3"
|
||||
actix-web-lab = { version = "0.24.1", default-features = false }
|
||||
actix-web-lab = { version = "0.24.3", default-features = false }
|
||||
urlencoding = "2.1.3"
|
||||
backoff = { version = "0.4.0", features = ["tokio"] }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
brotli = "8.0.1"
|
||||
actix-rt = "2.11.0"
|
||||
brotli = "8.0.2"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["redactions"] }
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
wiremock = "0.6.3"
|
||||
wiremock = "0.6.5"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.98", optional = true }
|
||||
cargo_toml = { version = "0.22.1", optional = true }
|
||||
anyhow = { version = "1.0.100", optional = true }
|
||||
cargo_toml = { version = "0.22.3", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
tempfile = { version = "3.20.0", optional = true }
|
||||
zip = { version = "4.1.0", optional = true }
|
||||
static-files = { version = "0.3.1", optional = true }
|
||||
tempfile = { version = "3.23.0", optional = true }
|
||||
zip = { version = "6.0.0", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
|
||||
@@ -195,7 +195,7 @@ struct Infos {
|
||||
experimental_enable_logs_route: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_limit_batched_tasks_total_size: Option<u64>,
|
||||
experimental_network: bool,
|
||||
experimental_multimodal: bool,
|
||||
experimental_chat_completions: bool,
|
||||
@@ -359,7 +359,7 @@ impl Infos {
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
experimental_limit_batched_tasks_total_size:
|
||||
experimental_limit_batched_tasks_total_size.into(),
|
||||
experimental_limit_batched_tasks_total_size.map(|size| size.as_u64()),
|
||||
task_queue_webhook: task_webhook_url.is_some(),
|
||||
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
||||
log_level: log_level.to_string(),
|
||||
|
||||
@@ -230,7 +230,11 @@ pub fn setup_meilisearch(
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.map_or_else(
|
||||
// By default, we use half of the available memory to determine the size of batched tasks
|
||||
|| opt.indexer_options.max_indexing_memory.map_or(u64::MAX, |mem| mem.as_u64() / 2),
|
||||
|size| size.as_u64(),
|
||||
),
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features: opt.to_instance_features(),
|
||||
|
||||
@@ -473,11 +473,13 @@ pub struct Opt {
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_max_number_of_batched_tasks: usize,
|
||||
|
||||
/// Experimentally reduces the maximum total size, in bytes, of tasks that will be processed at once,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/801>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
|
||||
#[serde(default = "default_limit_batched_tasks_total_size")]
|
||||
pub experimental_limit_batched_tasks_total_size: Byte,
|
||||
/// Experimentally controls the maximum total size, in bytes, of tasks that will be processed
|
||||
/// simultaneously. When unspecified, defaults to half of the maximum indexing memory.
|
||||
///
|
||||
/// See: <https://github.com/orgs/meilisearch/discussions/801>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE)]
|
||||
#[serde(default)]
|
||||
pub experimental_limit_batched_tasks_total_size: Option<Byte>,
|
||||
|
||||
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
|
||||
/// distinct embedder.
|
||||
@@ -701,10 +703,12 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
|
||||
experimental_max_number_of_batched_tasks.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
|
||||
experimental_limit_batched_tasks_total_size.to_string(),
|
||||
);
|
||||
if let Some(limit) = experimental_limit_batched_tasks_total_size {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
|
||||
limit.to_string(),
|
||||
);
|
||||
}
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
|
||||
experimental_embedding_cache_entries.to_string(),
|
||||
@@ -1273,10 +1277,6 @@ fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks_total_size() -> Byte {
|
||||
Byte::from_u64(u64::MAX)
|
||||
}
|
||||
|
||||
fn default_embedding_cache_entries() -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
use meilisearch_types::{
|
||||
error::{Code, ErrorCode, ResponseError},
|
||||
milli::TimeBudget,
|
||||
};
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::milli::TimeBudget;
|
||||
use rand::Rng;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
|
||||
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
|
||||
const MAX_RETRIES: u32 = 10;
|
||||
|
||||
|
||||
@@ -333,10 +333,12 @@ impl Aggregate for DocumentsDeletionAggregator {
|
||||
pub async fn delete_document(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<DocumentParam>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
let network = index_scheduler.network();
|
||||
@@ -359,7 +361,10 @@ pub async fn delete_document(
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
@@ -678,6 +683,19 @@ pub struct UpdateDocumentsQuery {
|
||||
#[param(value_type = char, default = ",", example = ";")]
|
||||
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
|
||||
pub csv_delimiter: Option<u8>,
|
||||
|
||||
#[param(example = "custom")]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Deserr, IntoParams)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[into_params(parameter_in = Query, rename_all = "camelCase")]
|
||||
pub struct CustomMetadataQuery {
|
||||
#[param(example = "custom")]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
fn from_char_csv_delimiter(
|
||||
@@ -819,6 +837,7 @@ pub async fn replace_documents(
|
||||
body,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
uid,
|
||||
params.custom_metadata,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
@@ -921,6 +940,7 @@ pub async fn update_documents(
|
||||
body,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
uid,
|
||||
params.custom_metadata,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
@@ -940,6 +960,7 @@ async fn document_addition(
|
||||
body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
allow_index_creation: bool,
|
||||
req: &HttpRequest,
|
||||
@@ -1065,8 +1086,10 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
|
||||
.await?
|
||||
let task = match tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, task_id, custom_metadata, dry_run)
|
||||
})
|
||||
.await?
|
||||
{
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
@@ -1130,7 +1153,7 @@ async fn copy_body_to_file(
|
||||
/// Delete a set of documents based on an array of document ids.
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "{indexUid}/delete-batch",
|
||||
path = "{indexUid}/documents/delete-batch",
|
||||
tag = "Documents",
|
||||
security(("Bearer" = ["documents.delete", "documents.*", "*"])),
|
||||
params(
|
||||
@@ -1161,11 +1184,14 @@ pub async fn delete_documents_batch(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
@@ -1190,7 +1216,10 @@ pub async fn delete_documents_batch(
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
@@ -1244,12 +1273,15 @@ pub struct DocumentDeletionByFilter {
|
||||
pub async fn delete_documents_by_filter(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner();
|
||||
@@ -1282,7 +1314,10 @@ pub async fn delete_documents_by_filter(
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
@@ -1372,12 +1407,14 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
|
||||
pub async fn edit_documents_by_function(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
body: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Edit documents by function");
|
||||
debug!(parameters = ?body, "Edit documents by function");
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
index_scheduler
|
||||
.features()
|
||||
@@ -1387,23 +1424,23 @@ pub async fn edit_documents_by_function(
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let params = params.into_inner();
|
||||
let body = body.into_inner();
|
||||
|
||||
analytics.publish(
|
||||
EditDocumentsByFunctionAggregator {
|
||||
filtered: params.filter.is_some(),
|
||||
with_context: params.context.is_some(),
|
||||
filtered: body.filter.is_some(),
|
||||
with_context: body.context.is_some(),
|
||||
index_creation: index_scheduler.index(&index_uid).is_err(),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
let engine = milli::rhai::Engine::new();
|
||||
if let Err(e) = engine.compile(¶ms.function) {
|
||||
if let Err(e) = engine.compile(&body.function) {
|
||||
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = params.filter {
|
||||
if let Some(ref filter) = body.filter {
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(
|
||||
filter,
|
||||
@@ -1414,8 +1451,8 @@ pub async fn edit_documents_by_function(
|
||||
}
|
||||
let task = KindWithContent::DocumentEdition {
|
||||
index_uid: index_uid.clone(),
|
||||
filter_expr: params.filter.clone(),
|
||||
context: match params.context.clone() {
|
||||
filter_expr: body.filter.clone(),
|
||||
context: match body.context.clone() {
|
||||
Some(Value::Object(m)) => Some(m),
|
||||
None => None,
|
||||
_ => {
|
||||
@@ -1425,18 +1462,21 @@ pub async fn edit_documents_by_function(
|
||||
))
|
||||
}
|
||||
},
|
||||
function: params.function.clone(),
|
||||
function: body.function.clone(),
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
@@ -1477,12 +1517,14 @@ pub async fn edit_documents_by_function(
|
||||
pub async fn clear_all_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1501,7 +1543,10 @@ pub async fn clear_all_documents(
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
|
||||
@@ -218,6 +218,8 @@ pub struct SummarizedTaskView {
|
||||
deserialize_with = "time::serde::rfc3339::deserialize"
|
||||
)]
|
||||
enqueued_at: OffsetDateTime,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
impl From<Task> for SummarizedTaskView {
|
||||
@@ -228,6 +230,7 @@ impl From<Task> for SummarizedTaskView {
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
enqueued_at: task.enqueued_at,
|
||||
custom_metadata: task.custom_metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,9 @@ use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
|
||||
use crate::milli::vector::Embedding;
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
|
||||
|
||||
|
||||
@@ -91,7 +91,16 @@ impl<'a> Index<'a, Owned> {
|
||||
documents: Value,
|
||||
primary_key: Option<&str>,
|
||||
) -> (Value, StatusCode) {
|
||||
self._add_documents(documents, primary_key).await
|
||||
self._add_documents(documents, primary_key, None).await
|
||||
}
|
||||
|
||||
pub async fn add_documents_with_custom_metadata(
|
||||
&self,
|
||||
documents: Value,
|
||||
primary_key: Option<&str>,
|
||||
custom_metadata: Option<&str>,
|
||||
) -> (Value, StatusCode) {
|
||||
self._add_documents(documents, primary_key, custom_metadata).await
|
||||
}
|
||||
|
||||
pub async fn raw_add_documents(
|
||||
@@ -352,12 +361,25 @@ impl<State> Index<'_, State> {
|
||||
&self,
|
||||
documents: Value,
|
||||
primary_key: Option<&str>,
|
||||
custom_metadata: Option<&str>,
|
||||
) -> (Value, StatusCode) {
|
||||
let url = match primary_key {
|
||||
Some(key) => {
|
||||
format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key)
|
||||
let url = match (primary_key, custom_metadata) {
|
||||
(Some(key), Some(meta)) => {
|
||||
format!(
|
||||
"/indexes/{}/documents?primaryKey={key}&customMetadata={meta}",
|
||||
urlencode(self.uid.as_ref()),
|
||||
)
|
||||
}
|
||||
None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
|
||||
(None, Some(meta)) => {
|
||||
format!(
|
||||
"/indexes/{}/documents?&customMetadata={meta}",
|
||||
urlencode(self.uid.as_ref()),
|
||||
)
|
||||
}
|
||||
(Some(key), None) => {
|
||||
format!("/indexes/{}/documents?&primaryKey={key}", urlencode(self.uid.as_ref()),)
|
||||
}
|
||||
(None, None) => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
|
||||
};
|
||||
self.service.post_encoded(url, documents, self.encoder).await
|
||||
}
|
||||
|
||||
@@ -241,7 +241,7 @@ pub async fn shared_index_with_documents() -> &'static Index<'static, Shared> {
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_DOCUMENTS").to_shared();
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (response, _code) = index._add_documents(documents, None).await;
|
||||
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
let (response, _code) = index
|
||||
._update_settings(
|
||||
@@ -284,7 +284,7 @@ pub async fn shared_index_with_score_documents() -> &'static Index<'static, Shar
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared();
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
let (response, _code) = index._add_documents(documents, None).await;
|
||||
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
let (response, _code) = index
|
||||
._update_settings(
|
||||
@@ -361,7 +361,7 @@ pub async fn shared_index_with_nested_documents() -> &'static Index<'static, Sha
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
let (response, _code) = index._add_documents(documents, None).await;
|
||||
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
let (response, _code) = index
|
||||
._update_settings(
|
||||
@@ -508,7 +508,7 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
|
||||
.get_or_init(|| async {
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_GEO_DOCUMENTS").to_shared();
|
||||
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None).await;
|
||||
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (response, _code) = index
|
||||
@@ -531,7 +531,7 @@ pub async fn shared_index_geojson_documents() -> &'static Index<'static, Shared>
|
||||
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
|
||||
let countries = include_str!("../documents/geojson/assets/countries.json");
|
||||
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
|
||||
let (response, _code) = index._add_documents(Value(lille), Some("name")).await;
|
||||
let (response, _code) = index._add_documents(Value(lille), Some("name"), None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (response, _code) =
|
||||
|
||||
@@ -137,6 +137,60 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
}])
|
||||
});
|
||||
|
||||
static MANY_DOCS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "2",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "3",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "4",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "5",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "6",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "7",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "8",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "9",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "10",
|
||||
}])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search() {
|
||||
let server = Server::new_shared();
|
||||
@@ -449,6 +503,38 @@ async fn simple_search_hf() {
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn issue_5976_missing_docs_hf() {
|
||||
let server = Server::new_shared();
|
||||
let index = index_with_documents_hf(server, &MANY_DOCS).await;
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
let are_empty: Vec<_> = response["hits"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty())
|
||||
.collect();
|
||||
snapshot!(json!(are_empty), @r###"
|
||||
[
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn distribution_shift() {
|
||||
let server = Server::new_shared();
|
||||
|
||||
@@ -3141,3 +3141,513 @@ fn fail(override_response_body: Option<&str>) -> ResponseTemplate {
|
||||
response.set_body_json(json!({"error": "provoked error", "code": "test_error", "link": "https://docs.meilisearch.com/errors#test_error"}))
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn remote_auto_sharding() {
|
||||
let ms0 = Server::new().await;
|
||||
let ms1 = Server::new().await;
|
||||
let ms2 = Server::new().await;
|
||||
|
||||
// enable feature
|
||||
|
||||
let (response, code) = ms0.set_features(json!({"network": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
let (response, code) = ms1.set_features(json!({"network": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
let (response, code) = ms2.set_features(json!({"network": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
|
||||
// set self & sharding
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
|
||||
// wrap servers
|
||||
let ms0 = Arc::new(ms0);
|
||||
let ms1 = Arc::new(ms1);
|
||||
let ms2 = Arc::new(ms2);
|
||||
|
||||
let rms0 = LocalMeili::new(ms0.clone()).await;
|
||||
let rms1 = LocalMeili::new(ms1.clone()).await;
|
||||
let rms2 = LocalMeili::new(ms2.clone()).await;
|
||||
|
||||
// set network
|
||||
let network = json!({"remotes": {
|
||||
"ms0": {
|
||||
"url": rms0.url()
|
||||
},
|
||||
"ms1": {
|
||||
"url": rms1.url()
|
||||
},
|
||||
"ms2": {
|
||||
"url": rms2.url()
|
||||
}
|
||||
}});
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&network).unwrap());
|
||||
|
||||
let (_response, status_code) = ms0.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
let (_response, status_code) = ms1.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
let (_response, status_code) = ms2.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
|
||||
// add documents
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
let documents = documents.as_array().unwrap();
|
||||
let index0 = ms0.index("test");
|
||||
let _index1 = ms1.index("test");
|
||||
let _index2 = ms2.index("test");
|
||||
|
||||
let (task, _status_code) = index0.add_documents(json!(documents), None).await;
|
||||
|
||||
let t0 = task.uid();
|
||||
let (t, _) = ms0.get_task(task.uid()).await;
|
||||
let t1 = t["network"]["remote_tasks"]["ms1"]["taskUid"].as_u64().unwrap();
|
||||
let t2 = t["network"]["remote_tasks"]["ms2"]["taskUid"].as_u64().unwrap();
|
||||
|
||||
ms0.wait_task(t0).await.succeeded();
|
||||
ms1.wait_task(t1).await.succeeded();
|
||||
ms2.wait_task(t2).await.succeeded();
|
||||
|
||||
// perform multi-search
|
||||
let query = "badman returns";
|
||||
let request = json!({
|
||||
"federation": {},
|
||||
"queries": [
|
||||
{
|
||||
"q": query,
|
||||
"indexUid": "test",
|
||||
"federationOptions": {
|
||||
"remote": "ms0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": query,
|
||||
"indexUid": "test",
|
||||
"federationOptions": {
|
||||
"remote": "ms1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": query,
|
||||
"indexUid": "test",
|
||||
"federationOptions": {
|
||||
"remote": "ms2"
|
||||
}
|
||||
},
|
||||
]
|
||||
});
|
||||
|
||||
let (response, _status_code) = ms0.multi_search(request.clone()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 2,
|
||||
"weightedRankingScore": 0.8317901234567902,
|
||||
"remote": "ms2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 1,
|
||||
"weightedRankingScore": 0.7028218694885362,
|
||||
"remote": "ms1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 1,
|
||||
"weightedRankingScore": 0.7028218694885362,
|
||||
"remote": "ms1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 2,
|
||||
"weightedRankingScore": 0.5,
|
||||
"remote": "ms2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 0,
|
||||
"weightedRankingScore": 0.23106060606060605,
|
||||
"remote": "ms0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 5,
|
||||
"requestUid": "[uuid]",
|
||||
"remoteErrors": {}
|
||||
}
|
||||
"###);
|
||||
let (response, _status_code) = ms1.multi_search(request.clone()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 2,
|
||||
"weightedRankingScore": 0.8317901234567902,
|
||||
"remote": "ms2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 1,
|
||||
"weightedRankingScore": 0.7028218694885362,
|
||||
"remote": "ms1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 1,
|
||||
"weightedRankingScore": 0.7028218694885362,
|
||||
"remote": "ms1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 2,
|
||||
"weightedRankingScore": 0.5,
|
||||
"remote": "ms2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 0,
|
||||
"weightedRankingScore": 0.23106060606060605,
|
||||
"remote": "ms0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 5,
|
||||
"requestUid": "[uuid]",
|
||||
"remoteErrors": {}
|
||||
}
|
||||
"###);
|
||||
let (response, _status_code) = ms2.multi_search(request.clone()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 2,
|
||||
"weightedRankingScore": 0.8317901234567902,
|
||||
"remote": "ms2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 1,
|
||||
"weightedRankingScore": 0.7028218694885362,
|
||||
"remote": "ms1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 1,
|
||||
"weightedRankingScore": 0.7028218694885362,
|
||||
"remote": "ms1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 2,
|
||||
"weightedRankingScore": 0.5,
|
||||
"remote": "ms2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
"_federation": {
|
||||
"indexUid": "test",
|
||||
"queriesPosition": 0,
|
||||
"weightedRankingScore": 0.23106060606060605,
|
||||
"remote": "ms0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 5,
|
||||
"requestUid": "[uuid]",
|
||||
"remoteErrors": {}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn remote_auto_sharding_with_custom_metadata() {
|
||||
let ms0 = Server::new().await;
|
||||
let ms1 = Server::new().await;
|
||||
let ms2 = Server::new().await;
|
||||
|
||||
// enable feature
|
||||
|
||||
let (response, code) = ms0.set_features(json!({"network": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
let (response, code) = ms1.set_features(json!({"network": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
let (response, code) = ms2.set_features(json!({"network": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
|
||||
// set self & sharding
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {},
|
||||
"sharding": true
|
||||
}
|
||||
"###);
|
||||
|
||||
// wrap servers
|
||||
let ms0 = Arc::new(ms0);
|
||||
let ms1 = Arc::new(ms1);
|
||||
let ms2 = Arc::new(ms2);
|
||||
|
||||
let rms0 = LocalMeili::new(ms0.clone()).await;
|
||||
let rms1 = LocalMeili::new(ms1.clone()).await;
|
||||
let rms2 = LocalMeili::new(ms2.clone()).await;
|
||||
|
||||
// set network
|
||||
let network = json!({"remotes": {
|
||||
"ms0": {
|
||||
"url": rms0.url()
|
||||
},
|
||||
"ms1": {
|
||||
"url": rms1.url()
|
||||
},
|
||||
"ms2": {
|
||||
"url": rms2.url()
|
||||
}
|
||||
}});
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&network).unwrap());
|
||||
|
||||
let (_response, status_code) = ms0.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
let (_response, status_code) = ms1.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
let (_response, status_code) = ms2.set_network(network.clone()).await;
|
||||
snapshot!(status_code, @"200 OK");
|
||||
|
||||
// add documents
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
let documents = documents.as_array().unwrap();
|
||||
let index0 = ms0.index("test");
|
||||
let _index1 = ms1.index("test");
|
||||
let _index2 = ms2.index("test");
|
||||
|
||||
let (task, _status_code) = index0
|
||||
.add_documents_with_custom_metadata(
|
||||
json!(documents),
|
||||
None,
|
||||
Some("remote_auto_sharding_with_custom_metadata"),
|
||||
)
|
||||
.await;
|
||||
|
||||
let t0 = task.uid();
|
||||
let (t, _) = ms0.get_task(task.uid()).await;
|
||||
let t1 = t["network"]["remote_tasks"]["ms1"]["taskUid"].as_u64().unwrap();
|
||||
let t2 = t["network"]["remote_tasks"]["ms2"]["taskUid"].as_u64().unwrap();
|
||||
|
||||
let t = ms0.wait_task(t0).await.succeeded();
|
||||
snapshot!(t, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 5,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"network": {
|
||||
"remote_tasks": {
|
||||
"ms1": {
|
||||
"taskUid": 0,
|
||||
"error": null
|
||||
},
|
||||
"ms2": {
|
||||
"taskUid": 0,
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||
}
|
||||
"###);
|
||||
|
||||
let t = ms1.wait_task(t1).await.succeeded();
|
||||
snapshot!(t, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 5,
|
||||
"indexedDocuments": 2
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"network": {
|
||||
"origin": {
|
||||
"remoteName": "ms0",
|
||||
"taskUid": 0
|
||||
}
|
||||
},
|
||||
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||
}
|
||||
"###);
|
||||
|
||||
let t = ms2.wait_task(t2).await.succeeded();
|
||||
snapshot!(t, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 5,
|
||||
"indexedDocuments": 2
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"network": {
|
||||
"origin": {
|
||||
"remoteName": "ms0",
|
||||
"taskUid": 0
|
||||
}
|
||||
},
|
||||
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -656,3 +656,119 @@ async fn forbidden_fields() {
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn receive_custom_metadata() {
|
||||
let WebhookHandle { server_handle: handle1, url: url1, receiver: mut receiver1 } =
|
||||
create_webhook_server().await;
|
||||
let WebhookHandle { server_handle: handle2, url: url2, receiver: mut receiver2 } =
|
||||
create_webhook_server().await;
|
||||
let WebhookHandle { server_handle: handle3, url: url3, receiver: mut receiver3 } =
|
||||
create_webhook_server().await;
|
||||
|
||||
let db_path = tempfile::tempdir().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
task_webhook_url: Some(Url::parse(&url3).unwrap()),
|
||||
..default_settings(db_path.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for url in [url1, url2] {
|
||||
let (value, code) = server.create_webhook(json!({ "url": url })).await;
|
||||
snapshot!(code, @"201 Created");
|
||||
snapshot!(json_string!(value, { ".uuid" => "[uuid]", ".url" => "[ignored]" }), @r#"
|
||||
{
|
||||
"uuid": "[uuid]",
|
||||
"isEditable": true,
|
||||
"url": "[ignored]",
|
||||
"headers": {}
|
||||
}
|
||||
"#);
|
||||
}
|
||||
let index = server.index("tamo");
|
||||
let (response, code) = index
|
||||
.add_documents_with_custom_metadata(
|
||||
json!({ "id": 1, "doggo": "bone" }),
|
||||
None,
|
||||
Some("test_meta"),
|
||||
)
|
||||
.await;
|
||||
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "tamo",
|
||||
"status": "enqueued",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"enqueuedAt": "[date]",
|
||||
"customMetadata": "test_meta"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let mut count1 = 0;
|
||||
let mut count2 = 0;
|
||||
let mut count3 = 0;
|
||||
while count1 == 0 || count2 == 0 || count3 == 0 {
|
||||
tokio::select! {
|
||||
msg = receiver1.recv() => {
|
||||
if let Some(msg) = msg {
|
||||
count1 += 1;
|
||||
check_metadata(msg);
|
||||
}
|
||||
},
|
||||
msg = receiver2.recv() => {
|
||||
if let Some(msg) = msg {
|
||||
count2 += 1;
|
||||
check_metadata(msg);
|
||||
}
|
||||
},
|
||||
msg = receiver3.recv() => {
|
||||
if let Some(msg) = msg {
|
||||
count3 += 1;
|
||||
check_metadata(msg);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(count1, 1);
|
||||
assert_eq!(count2, 1);
|
||||
assert_eq!(count3, 1);
|
||||
|
||||
handle1.abort();
|
||||
handle2.abort();
|
||||
handle3.abort();
|
||||
}
|
||||
|
||||
fn check_metadata(msg: Vec<u8>) {
|
||||
let msg = String::from_utf8(msg).unwrap();
|
||||
let tasks = msg.split('\n');
|
||||
for task in tasks {
|
||||
if task.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let task: serde_json::Value = serde_json::from_str(task).unwrap();
|
||||
snapshot!(common::Value(task), @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "tamo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"customMetadata": "test_meta"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn version_too_old() {
|
||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.24.0");
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.26.0");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
|
||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.24.1 is higher than the Meilisearch version 1.24.0. Downgrade is not supported");
|
||||
snapshot!(err, @"Database version 1.26.1 is higher than the Meilisearch version 1.26.0. Downgrade is not supported");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.24.0"
|
||||
"upgradeTo": "v1.26.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -9,15 +9,15 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
anyhow = "1.0.100"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
indexmap = { version = "2.12.0", features = ["serde"] }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
time = { version = "0.3.41", features = ["formatting", "parsing", "alloc"] }
|
||||
uuid = { version = "1.17.0", features = ["v4"], default-features = false }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
time = { version = "0.3.44", features = ["formatting", "parsing", "alloc"] }
|
||||
uuid = { version = "1.18.1", features = ["v4"], default-features = false }
|
||||
|
||||
@@ -15,15 +15,15 @@ license.workspace = true
|
||||
big_s = "1.0.2"
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.12.0"
|
||||
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
||||
bstr = "1.12.1"
|
||||
bytemuck = { version = "1.24.0", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.8", default-features = false }
|
||||
cellulite = "0.3.1-nested-rtxns-2"
|
||||
concat-arrays = "0.1.2"
|
||||
convert_case = "0.8.0"
|
||||
convert_case = "0.9.0"
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = "0.6.3"
|
||||
deserr = "0.6.4"
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
@@ -38,51 +38,52 @@ heed = { version = "0.22.1-nested-rtxns-6", default-features = false, features =
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
] }
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
indexmap = { version = "2.12.0", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memchr = "2.7.5"
|
||||
memmap2 = "0.9.7"
|
||||
memchr = "2.7.6"
|
||||
memmap2 = "0.9.9"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
rayon = "1.10.0"
|
||||
ordered-float = "5.1.0"
|
||||
rayon = "1.11.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rstar = { version = "0.12.2", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order", "raw_value"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order", "raw_value"] }
|
||||
slice-group-by = "0.3.1"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallstr = { version = "0.3.1", features = ["serde"] }
|
||||
smallvec = "1.15.1"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
uuid = { version = "1.18.1", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.14.0"
|
||||
|
||||
csv = "1.3.1"
|
||||
csv = "1.4.0"
|
||||
candle-core = { version = "0.9.1" }
|
||||
candle-transformers = { version = "0.9.1" }
|
||||
candle-nn = { version = "0.9.1" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
|
||||
tokenizers = { version = "0.22.1", default-features = false, features = [
|
||||
"onig",
|
||||
] }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
tiktoken-rs = "0.7.0"
|
||||
safetensors = "0.6.2"
|
||||
tiktoken-rs = "0.9.1"
|
||||
liquid = "0.26.11"
|
||||
rhai = { version = "1.22.2", features = [
|
||||
rhai = { version = "1.23.6", features = [
|
||||
"serde",
|
||||
"no_module",
|
||||
"no_custom_syntax",
|
||||
@@ -94,15 +95,14 @@ hannoy = { version = "0.0.9-nested-rtxns-2", features = ["arroy"] }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
url = "2.5.4"
|
||||
hashbrown = "0.15.4"
|
||||
bumpalo = "3.18.1"
|
||||
url = "2.5.7"
|
||||
hashbrown = "0.15.5"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
steppe = { version = "0.4", default-features = false }
|
||||
thread_local = "1.1.9"
|
||||
allocator-api2 = "0.3.0"
|
||||
rustc-hash = "2.1.1"
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
utoipa = { version = "5.4.0", features = [
|
||||
@@ -112,21 +112,21 @@ utoipa = { version = "5.4.0", features = [
|
||||
"time",
|
||||
"openapi_extensions",
|
||||
] }
|
||||
lru = "0.14.0"
|
||||
twox-hash = { version = "2.1.1", default-features = false, features = [
|
||||
lru = "0.16.2"
|
||||
twox-hash = { version = "2.1.2", default-features = false, features = [
|
||||
"std",
|
||||
"xxhash3_64",
|
||||
"xxhash64",
|
||||
] }
|
||||
geo-types = "0.7.16"
|
||||
geo-types = "0.7.17"
|
||||
zerometry = "0.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
md5 = "0.8.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
|
||||
@@ -1173,6 +1173,7 @@ pub fn extract_embeddings_from_fragments<R: io::Read + io::Seek>(
|
||||
request_threads,
|
||||
&doc_alloc,
|
||||
embedder_stats,
|
||||
false,
|
||||
on_embed,
|
||||
);
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ pub struct EmbeddingExtractor<'a, 'b> {
|
||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||
embedder_stats: &'a EmbedderStats,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
failure_modes: EmbedderFailureModes,
|
||||
}
|
||||
|
||||
impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||
@@ -46,7 +47,15 @@ impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
) -> Self {
|
||||
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
||||
Self { embedders, sender, threads, possible_embedding_mistakes, embedder_stats }
|
||||
let failure_modes = EmbedderFailureModes::from_env();
|
||||
Self {
|
||||
embedders,
|
||||
sender,
|
||||
threads,
|
||||
possible_embedding_mistakes,
|
||||
embedder_stats,
|
||||
failure_modes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,6 +100,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
self.threads,
|
||||
self.sender,
|
||||
&context.doc_alloc,
|
||||
self.failure_modes,
|
||||
))
|
||||
}
|
||||
|
||||
@@ -267,6 +277,7 @@ pub struct SettingsChangeEmbeddingExtractor<'a, 'b, SD> {
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
failure_modes: EmbedderFailureModes,
|
||||
}
|
||||
|
||||
impl<'a, 'b, SD: SettingsDelta> SettingsChangeEmbeddingExtractor<'a, 'b, SD> {
|
||||
@@ -279,7 +290,16 @@ impl<'a, 'b, SD: SettingsDelta> SettingsChangeEmbeddingExtractor<'a, 'b, SD> {
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
) -> Self {
|
||||
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
||||
Self { settings_delta, embedder_stats, sender, threads, possible_embedding_mistakes }
|
||||
let failure_modes = EmbedderFailureModes::from_env();
|
||||
|
||||
Self {
|
||||
settings_delta,
|
||||
embedder_stats,
|
||||
sender,
|
||||
threads,
|
||||
possible_embedding_mistakes,
|
||||
failure_modes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -336,6 +356,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||
self.threads,
|
||||
self.sender,
|
||||
&context.doc_alloc,
|
||||
self.failure_modes,
|
||||
),
|
||||
reindex_action,
|
||||
));
|
||||
@@ -539,6 +560,7 @@ struct Chunks<'a, 'b, 'extractor> {
|
||||
enum ChunkType<'a, 'b> {
|
||||
DocumentTemplate {
|
||||
document_template: &'a Prompt,
|
||||
ignore_document_template_failures: bool,
|
||||
session: EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, &'a str>,
|
||||
},
|
||||
Fragments {
|
||||
@@ -559,6 +581,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
doc_alloc: &'a Bump,
|
||||
failure_modes: EmbedderFailureModes,
|
||||
) -> Self {
|
||||
let embedder = &runtime.embedder;
|
||||
let dimensions = embedder.dimensions();
|
||||
@@ -567,12 +590,14 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
let kind = if fragments.is_empty() {
|
||||
ChunkType::DocumentTemplate {
|
||||
document_template: &runtime.document_template,
|
||||
ignore_document_template_failures: failure_modes.ignore_document_template_failures,
|
||||
session: EmbedSession::new(
|
||||
&runtime.embedder,
|
||||
embedder_name,
|
||||
threads,
|
||||
doc_alloc,
|
||||
embedder_stats,
|
||||
failure_modes.ignore_embedder_failures,
|
||||
OnEmbeddingDocumentUpdates {
|
||||
embedder_id: embedder_info.embedder_id,
|
||||
sender,
|
||||
@@ -589,6 +614,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
threads,
|
||||
doc_alloc,
|
||||
embedder_stats,
|
||||
failure_modes.ignore_embedder_failures,
|
||||
OnEmbeddingDocumentUpdates {
|
||||
embedder_id: embedder_info.embedder_id,
|
||||
sender,
|
||||
@@ -693,7 +719,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
},
|
||||
)?;
|
||||
}
|
||||
ChunkType::DocumentTemplate { document_template, session } => {
|
||||
ChunkType::DocumentTemplate {
|
||||
document_template,
|
||||
ignore_document_template_failures,
|
||||
session,
|
||||
} => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
|
||||
let old_embedder = settings_delta.old_embedders().get(session.embedder_name());
|
||||
@@ -702,6 +732,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
} else {
|
||||
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
||||
};
|
||||
|
||||
let extractor =
|
||||
DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
|
||||
let old_extractor = old_document_template.map(|old_document_template| {
|
||||
@@ -710,7 +741,15 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
let metadata =
|
||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||
|
||||
match extractor.diff_settings(document, &external_docid, old_extractor.as_ref())? {
|
||||
let extractor_diff = if *ignore_document_template_failures {
|
||||
let extractor = extractor.ignore_errors();
|
||||
let old_extractor = old_extractor.map(DocumentTemplateExtractor::ignore_errors);
|
||||
extractor.diff_settings(document, &external_docid, old_extractor.as_ref())?
|
||||
} else {
|
||||
extractor.diff_settings(document, &external_docid, old_extractor.as_ref())?
|
||||
};
|
||||
|
||||
match extractor_diff {
|
||||
ExtractorDiff::Removed => {
|
||||
if old_is_user_provided || full_reindex {
|
||||
session.on_embed_mut().clear_vectors(docid);
|
||||
@@ -758,7 +797,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
new_must_regenerate,
|
||||
);
|
||||
match &mut self.kind {
|
||||
ChunkType::DocumentTemplate { document_template, session } => {
|
||||
ChunkType::DocumentTemplate {
|
||||
document_template,
|
||||
ignore_document_template_failures,
|
||||
session,
|
||||
} => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
let ex = DocumentTemplateExtractor::new(
|
||||
document_template,
|
||||
@@ -766,18 +809,33 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
new_fields_ids_map,
|
||||
);
|
||||
|
||||
update_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
[ex],
|
||||
old_document,
|
||||
new_document,
|
||||
&external_docid,
|
||||
old_must_regenerate,
|
||||
old_is_user_provided,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?
|
||||
if *ignore_document_template_failures {
|
||||
update_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
[ex.ignore_errors()],
|
||||
old_document,
|
||||
new_document,
|
||||
&external_docid,
|
||||
old_must_regenerate,
|
||||
old_is_user_provided,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)
|
||||
} else {
|
||||
update_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
[ex],
|
||||
old_document,
|
||||
new_document,
|
||||
&external_docid,
|
||||
old_must_regenerate,
|
||||
old_is_user_provided,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)
|
||||
}?
|
||||
}
|
||||
ChunkType::Fragments { fragments, session } => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
@@ -844,23 +902,38 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
);
|
||||
|
||||
match &mut self.kind {
|
||||
ChunkType::DocumentTemplate { document_template, session } => {
|
||||
ChunkType::DocumentTemplate {
|
||||
document_template,
|
||||
ignore_document_template_failures,
|
||||
session,
|
||||
} => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
let ex = DocumentTemplateExtractor::new(
|
||||
document_template,
|
||||
doc_alloc,
|
||||
new_fields_ids_map,
|
||||
);
|
||||
|
||||
insert_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
[ex],
|
||||
new_document,
|
||||
&external_docid,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?;
|
||||
if *ignore_document_template_failures {
|
||||
insert_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
[ex.ignore_errors()],
|
||||
new_document,
|
||||
&external_docid,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?;
|
||||
} else {
|
||||
insert_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
[ex],
|
||||
new_document,
|
||||
&external_docid,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
ChunkType::Fragments { fragments, session } => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
@@ -884,7 +957,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
|
||||
pub fn drain(self, unused_vectors_distribution: &UnusedVectorsDistributionBump) -> Result<()> {
|
||||
match self.kind {
|
||||
ChunkType::DocumentTemplate { document_template: _, session } => {
|
||||
ChunkType::DocumentTemplate {
|
||||
document_template: _,
|
||||
ignore_document_template_failures: _,
|
||||
session,
|
||||
} => {
|
||||
session.drain(unused_vectors_distribution)?;
|
||||
}
|
||||
ChunkType::Fragments { fragments: _, session } => {
|
||||
@@ -896,9 +973,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
|
||||
pub fn embedder_name(&self) -> &'a str {
|
||||
match &self.kind {
|
||||
ChunkType::DocumentTemplate { document_template: _, session } => {
|
||||
session.embedder_name()
|
||||
}
|
||||
ChunkType::DocumentTemplate {
|
||||
document_template: _,
|
||||
ignore_document_template_failures: _,
|
||||
session,
|
||||
} => session.embedder_name(),
|
||||
ChunkType::Fragments { fragments: _, session } => session.embedder_name(),
|
||||
}
|
||||
}
|
||||
@@ -967,7 +1046,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
}
|
||||
}
|
||||
match &mut self.kind {
|
||||
ChunkType::DocumentTemplate { document_template: _, session } => {
|
||||
ChunkType::DocumentTemplate {
|
||||
document_template: _,
|
||||
ignore_document_template_failures: _,
|
||||
session,
|
||||
} => {
|
||||
session.on_embed_mut().process_embeddings(
|
||||
Metadata { docid, external_docid, extractor_id: 0 },
|
||||
embeddings,
|
||||
@@ -1078,3 +1161,41 @@ where
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
||||
struct EmbedderFailureModes {
|
||||
pub ignore_document_template_failures: bool,
|
||||
pub ignore_embedder_failures: bool,
|
||||
}
|
||||
|
||||
impl EmbedderFailureModes {
|
||||
fn from_env() -> Self {
|
||||
match std::env::var("MEILI_EXPERIMENTAL_CONFIG_EMBEDDER_FAILURE_MODES") {
|
||||
Ok(failure_modes) => Self::parse_from_str(
|
||||
&failure_modes,
|
||||
"`MEILI_EXPERIMENTAL_CONFIG_EMBEDDER_FAILURE_MODES`",
|
||||
),
|
||||
Err(std::env::VarError::NotPresent) => Self::default(),
|
||||
Err(std::env::VarError::NotUnicode(_)) => panic!(
|
||||
"`MEILI_EXPERIMENTAL_CONFIG_EMBEDDER_FAILURE_MODES` contains a non-unicode value"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_from_str(failure_modes: &str, provenance: &'static str) -> Self {
|
||||
let Self { mut ignore_document_template_failures, mut ignore_embedder_failures } =
|
||||
Default::default();
|
||||
for segment in failure_modes.split(',') {
|
||||
let segment = segment.trim();
|
||||
match segment {
|
||||
"ignore_document_template_failures" => {
|
||||
ignore_document_template_failures = true;
|
||||
}
|
||||
"ignore_embedder_failures" => ignore_embedder_failures = true,
|
||||
"" => continue,
|
||||
segment => panic!("Unrecognized segment value for {provenance}: {segment}"),
|
||||
}
|
||||
}
|
||||
Self { ignore_document_template_failures, ignore_embedder_failures }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1631,8 +1631,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
|
||||
// Update index settings
|
||||
let embedding_config_updates = self.update_embedding_configs()?;
|
||||
self.update_user_defined_searchable_attributes()?;
|
||||
|
||||
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||
let mut new_inner_settings =
|
||||
InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
|
||||
|
||||
let primary_key_id = self
|
||||
.index
|
||||
|
||||
@@ -42,6 +42,8 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
||||
&ToTargetNoOp { target: (1, 22, 0) },
|
||||
&ToTargetNoOp { target: (1, 23, 0) },
|
||||
&ToTargetNoOp { target: (1, 24, 0) },
|
||||
&ToTargetNoOp { target: (1, 25, 0) },
|
||||
&ToTargetNoOp { target: (1, 26, 0) },
|
||||
// This is the last upgrade function, it will be called when the index is up to date.
|
||||
// any other upgrade function should be added before this one.
|
||||
&ToCurrentNoOp {},
|
||||
@@ -77,6 +79,8 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
|
||||
(1, 22, _) => function_index!(12),
|
||||
(1, 23, _) => function_index!(13),
|
||||
(1, 24, _) => function_index!(14),
|
||||
(1, 25, _) => function_index!(15),
|
||||
(1, 26, _) => function_index!(16),
|
||||
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
|
||||
// considering dumpless upgrade.
|
||||
(_major, _minor, _patch) => return None,
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use candle_core::Tensor;
|
||||
use candle_nn::VarBuilder;
|
||||
use candle_transformers::models::bert::{BertModel, Config, DTYPE};
|
||||
use candle_transformers::models::bert::{BertModel, Config as BertConfig, DTYPE};
|
||||
use candle_transformers::models::modernbert::{Config as ModernConfig, ModernBert};
|
||||
// FIXME: currently we'll be using the hub to retrieve model, in the future we might want to embed it into Meilisearch itself
|
||||
use hf_hub::api::sync::Api;
|
||||
use hf_hub::{Repo, RepoType};
|
||||
use safetensors::SafeTensors;
|
||||
use tokenizers::{PaddingParams, Tokenizer};
|
||||
|
||||
use super::EmbeddingCache;
|
||||
@@ -84,14 +86,21 @@ impl Default for EmbedderOptions {
|
||||
}
|
||||
}
|
||||
|
||||
enum ModelKind {
|
||||
Bert(BertModel),
|
||||
Modern(ModernBert),
|
||||
}
|
||||
|
||||
/// Perform embedding of documents and queries
|
||||
pub struct Embedder {
|
||||
model: BertModel,
|
||||
model: ModelKind,
|
||||
tokenizer: Tokenizer,
|
||||
options: EmbedderOptions,
|
||||
dimensions: usize,
|
||||
pooling: Pooling,
|
||||
cache: EmbeddingCache,
|
||||
device: candle_core::Device,
|
||||
max_len: usize,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Embedder {
|
||||
@@ -101,10 +110,60 @@ impl std::fmt::Debug for Embedder {
|
||||
.field("tokenizer", &self.tokenizer)
|
||||
.field("options", &self.options)
|
||||
.field("pooling", &self.pooling)
|
||||
.field("device", &self.device)
|
||||
.field("max_len", &self.max_len)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
// some models do not have the "model." prefix in their safetensors weights
|
||||
fn change_tensor_names(
|
||||
weights_path: &std::path::Path,
|
||||
) -> Result<std::path::PathBuf, NewEmbedderError> {
|
||||
let data = std::fs::read(weights_path)
|
||||
.map_err(|e| NewEmbedderError::safetensor_weight(candle_core::Error::Io(e)))?;
|
||||
|
||||
let tensors = SafeTensors::deserialize(&data)
|
||||
.map_err(|e| NewEmbedderError::safetensor_weight(candle_core::Error::Msg(e.to_string())))?;
|
||||
|
||||
let names = tensors.names();
|
||||
let has_model_prefix = names.iter().any(|n| n.starts_with("model."));
|
||||
|
||||
if has_model_prefix {
|
||||
return Ok(weights_path.to_path_buf());
|
||||
}
|
||||
|
||||
let fixed_path = weights_path.with_extension("fixed.safetensors");
|
||||
|
||||
if fixed_path.exists() {
|
||||
return Ok(fixed_path);
|
||||
}
|
||||
|
||||
let mut new_tensors = vec![];
|
||||
for name in names {
|
||||
let tensor_view = tensors.tensor(name).map_err(|e| {
|
||||
NewEmbedderError::safetensor_weight(candle_core::Error::Msg(e.to_string()))
|
||||
})?;
|
||||
|
||||
let new_name = format!("model.{}", name);
|
||||
let data_offset = tensor_view.data();
|
||||
let shape = tensor_view.shape();
|
||||
let dtype = tensor_view.dtype();
|
||||
|
||||
new_tensors.push((new_name, shape.to_vec(), dtype, data_offset));
|
||||
}
|
||||
|
||||
use safetensors::tensor::TensorView;
|
||||
let views = new_tensors.iter().map(|(name, shape, dtype, data)| {
|
||||
(name.as_str(), TensorView::new(*dtype, shape.clone(), data).unwrap())
|
||||
});
|
||||
|
||||
safetensors::serialize_to_file(views, None, &fixed_path)
|
||||
.map_err(|e| NewEmbedderError::safetensor_weight(candle_core::Error::Msg(e.to_string())))?;
|
||||
|
||||
Ok(fixed_path)
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, serde::Deserialize)]
|
||||
struct PoolingConfig {
|
||||
#[serde(default)]
|
||||
@@ -220,19 +279,42 @@ impl Embedder {
|
||||
(config, tokenizer, weights, source, pooling)
|
||||
};
|
||||
|
||||
let config = std::fs::read_to_string(&config_filename)
|
||||
let config_str = std::fs::read_to_string(&config_filename)
|
||||
.map_err(|inner| NewEmbedderError::open_config(config_filename.clone(), inner))?;
|
||||
let config: Config = serde_json::from_str(&config).map_err(|inner| {
|
||||
NewEmbedderError::deserialize_config(
|
||||
options.model.clone(),
|
||||
config,
|
||||
config_filename,
|
||||
inner,
|
||||
)
|
||||
})?;
|
||||
|
||||
let cfg_val: serde_json::Value = match serde_json::from_str(&config_str) {
|
||||
Ok(v) => v,
|
||||
Err(inner) => {
|
||||
return Err(NewEmbedderError::deserialize_config(
|
||||
options.model.clone(),
|
||||
config_str.clone(),
|
||||
config_filename.clone(),
|
||||
inner,
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let model_type = cfg_val.get("model_type").and_then(|v| v.as_str()).unwrap_or_default();
|
||||
let arch_arr = cfg_val.get("architectures").and_then(|v| v.as_array());
|
||||
let has_arch = |needle: &str| {
|
||||
model_type.eq_ignore_ascii_case(needle)
|
||||
|| arch_arr.is_some_and(|arr| {
|
||||
arr.iter().filter_map(|v| v.as_str()).any(|s| s.to_lowercase().contains(needle))
|
||||
})
|
||||
};
|
||||
|
||||
let is_modern = has_arch("modernbert");
|
||||
tracing::debug!(is_modern, model_type, "detected HF architecture");
|
||||
|
||||
let mut tokenizer = Tokenizer::from_file(&tokenizer_filename)
|
||||
.map_err(|inner| NewEmbedderError::open_tokenizer(tokenizer_filename, inner))?;
|
||||
|
||||
let weights_filename = if is_modern && weight_source == WeightSource::Safetensors {
|
||||
change_tensor_names(&weights_filename)?
|
||||
} else {
|
||||
weights_filename
|
||||
};
|
||||
|
||||
let vb = match weight_source {
|
||||
WeightSource::Pytorch => VarBuilder::from_pth(&weights_filename, DTYPE, &device)
|
||||
.map_err(NewEmbedderError::pytorch_weight)?,
|
||||
@@ -244,7 +326,31 @@ impl Embedder {
|
||||
|
||||
tracing::debug!(model = options.model, weight=?weight_source, pooling=?pooling, "model config");
|
||||
|
||||
let model = BertModel::load(vb, &config).map_err(NewEmbedderError::load_model)?;
|
||||
// max length from config, fallback to 512
|
||||
let max_len =
|
||||
cfg_val.get("max_position_embeddings").and_then(|v| v.as_u64()).unwrap_or(512) as usize;
|
||||
|
||||
let model = if is_modern {
|
||||
let config: ModernConfig = serde_json::from_str(&config_str).map_err(|inner| {
|
||||
NewEmbedderError::deserialize_config(
|
||||
options.model.clone(),
|
||||
config_str.clone(),
|
||||
config_filename.clone(),
|
||||
inner,
|
||||
)
|
||||
})?;
|
||||
ModelKind::Modern(ModernBert::load(vb, &config).map_err(NewEmbedderError::load_model)?)
|
||||
} else {
|
||||
let config: BertConfig = serde_json::from_str(&config_str).map_err(|inner| {
|
||||
NewEmbedderError::deserialize_config(
|
||||
options.model.clone(),
|
||||
config_str.clone(),
|
||||
config_filename.clone(),
|
||||
inner,
|
||||
)
|
||||
})?;
|
||||
ModelKind::Bert(BertModel::load(vb, &config).map_err(NewEmbedderError::load_model)?)
|
||||
};
|
||||
|
||||
if let Some(pp) = tokenizer.get_padding_mut() {
|
||||
pp.strategy = tokenizers::PaddingStrategy::BatchLongest
|
||||
@@ -263,6 +369,8 @@ impl Embedder {
|
||||
dimensions: 0,
|
||||
pooling,
|
||||
cache: EmbeddingCache::new(cache_cap),
|
||||
device,
|
||||
max_len,
|
||||
};
|
||||
|
||||
let embeddings = this
|
||||
@@ -321,15 +429,29 @@ impl Embedder {
|
||||
pub fn embed_one(&self, text: &str) -> std::result::Result<Embedding, EmbedError> {
|
||||
let tokens = self.tokenizer.encode(text, true).map_err(EmbedError::tokenize)?;
|
||||
let token_ids = tokens.get_ids();
|
||||
let token_ids = if token_ids.len() > 512 { &token_ids[..512] } else { token_ids };
|
||||
let token_ids =
|
||||
Tensor::new(token_ids, &self.model.device).map_err(EmbedError::tensor_shape)?;
|
||||
if token_ids.len() > self.max_len { &token_ids[..self.max_len] } else { token_ids };
|
||||
let token_ids = Tensor::new(token_ids, &self.device).map_err(EmbedError::tensor_shape)?;
|
||||
let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?;
|
||||
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
|
||||
let embeddings = self
|
||||
.model
|
||||
.forward(&token_ids, &token_type_ids, None)
|
||||
.map_err(EmbedError::model_forward)?;
|
||||
|
||||
let embeddings = match &self.model {
|
||||
ModelKind::Bert(model) => {
|
||||
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
|
||||
model
|
||||
.forward(&token_ids, &token_type_ids, None)
|
||||
.map_err(EmbedError::model_forward)?
|
||||
}
|
||||
ModelKind::Modern(model) => {
|
||||
let mut mask_vec = tokens.get_attention_mask().to_vec();
|
||||
if mask_vec.len() > self.max_len {
|
||||
mask_vec.truncate(self.max_len);
|
||||
}
|
||||
let mask = Tensor::new(mask_vec.as_slice(), &self.device)
|
||||
.map_err(EmbedError::tensor_shape)?;
|
||||
let mask = Tensor::stack(&[mask], 0).map_err(EmbedError::tensor_shape)?;
|
||||
model.forward(&token_ids, &mask).map_err(EmbedError::model_forward)?
|
||||
}
|
||||
};
|
||||
|
||||
let embedding = Self::pooling(embeddings, self.pooling)?;
|
||||
|
||||
|
||||
@@ -91,6 +91,7 @@ struct EmbedderData {
|
||||
request: RequestData,
|
||||
response: Response,
|
||||
configuration_source: ConfigurationSource,
|
||||
max_retry_duration: std::time::Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -182,10 +183,15 @@ impl Embedder {
|
||||
) -> Result<Self, NewEmbedderError> {
|
||||
let bearer = options.api_key.as_deref().map(|api_key| format!("Bearer {api_key}"));
|
||||
|
||||
let timeout = std::env::var("MEILI_EXPERIMENTAL_REST_EMBEDDER_TIMEOUT_SECONDS")
|
||||
.ok()
|
||||
.map(|p| p.parse().unwrap())
|
||||
.unwrap_or(30);
|
||||
|
||||
let client = ureq::AgentBuilder::new()
|
||||
.max_idle_connections(REQUEST_PARALLELISM * 2)
|
||||
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.timeout(std::time::Duration::from_secs(timeout))
|
||||
.build();
|
||||
|
||||
let request = RequestData::new(
|
||||
@@ -196,6 +202,14 @@ impl Embedder {
|
||||
|
||||
let response = Response::new(options.response, &request)?;
|
||||
|
||||
let max_retry_duration =
|
||||
std::env::var("MEILI_EXPERIMENTAL_REST_EMBEDDER_MAX_RETRY_DURATION_SECONDS")
|
||||
.ok()
|
||||
.map(|p| p.parse().unwrap())
|
||||
.unwrap_or(60);
|
||||
|
||||
let max_retry_duration = std::time::Duration::from_secs(max_retry_duration);
|
||||
|
||||
let data = EmbedderData {
|
||||
client,
|
||||
bearer,
|
||||
@@ -204,6 +218,7 @@ impl Embedder {
|
||||
response,
|
||||
configuration_source,
|
||||
headers: options.headers,
|
||||
max_retry_duration,
|
||||
};
|
||||
|
||||
let dimensions = if let Some(dimensions) = options.dimensions {
|
||||
@@ -457,7 +472,7 @@ where
|
||||
}
|
||||
}?;
|
||||
|
||||
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
||||
let retry_duration = retry_duration.min(data.max_retry_duration); // don't wait more than the max duration
|
||||
|
||||
// randomly up to double the retry duration
|
||||
let retry_duration = retry_duration
|
||||
|
||||
@@ -550,9 +550,9 @@ pub struct DeserializePoolingConfig {
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("model `{model_name}` appears to be unsupported{}\n - inner error: {inner}",
|
||||
if architectures.is_empty() {
|
||||
"\n - Note: only models with architecture \"BertModel\" are supported.".to_string()
|
||||
"\n - Note: only models with architecture \"BertModel\" or \"ModernBert\" are supported.".to_string()
|
||||
} else {
|
||||
format!("\n - Note: model has declared architectures `{architectures:?}`, only models with architecture `\"BertModel\"` are supported.")
|
||||
format!("\n - Note: model has declared architectures `{architectures:?}`, only models with architecture `\"BertModel\"` or `\"ModernBert\"` are supported.")
|
||||
})]
|
||||
pub struct UnsupportedModel {
|
||||
pub model_name: String,
|
||||
|
||||
@@ -44,6 +44,7 @@ pub struct EmbedSession<'doc, C, I> {
|
||||
embedder_name: &'doc str,
|
||||
|
||||
embedder_stats: &'doc EmbedderStats,
|
||||
ignore_embedding_failures: bool,
|
||||
|
||||
on_embed: C,
|
||||
}
|
||||
@@ -87,6 +88,7 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
||||
threads: &'doc ThreadPoolNoAbort,
|
||||
doc_alloc: &'doc Bump,
|
||||
embedder_stats: &'doc EmbedderStats,
|
||||
ignore_embedding_failures: bool,
|
||||
on_embed: C,
|
||||
) -> Self {
|
||||
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
|
||||
@@ -99,6 +101,7 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
||||
threads,
|
||||
embedder_name,
|
||||
embedder_stats,
|
||||
ignore_embedding_failures,
|
||||
on_embed,
|
||||
}
|
||||
}
|
||||
@@ -109,13 +112,12 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
||||
rendered: I,
|
||||
unused_vectors_distribution: &C::ErrorMetadata,
|
||||
) -> Result<()> {
|
||||
if self.inputs.len() < self.inputs.capacity() {
|
||||
self.inputs.push(rendered);
|
||||
self.metadata.push(metadata);
|
||||
return Ok(());
|
||||
if self.inputs.len() >= self.inputs.capacity() {
|
||||
self.embed_chunks(unused_vectors_distribution)?;
|
||||
}
|
||||
|
||||
self.embed_chunks(unused_vectors_distribution)
|
||||
self.inputs.push(rendered);
|
||||
self.metadata.push(metadata);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<C> {
|
||||
@@ -144,24 +146,33 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
||||
Ok(())
|
||||
}
|
||||
Err(error) => {
|
||||
// reset metadata and inputs, and send metadata to the error processing.
|
||||
// send metadata to the error processing.
|
||||
let doc_alloc = self.metadata.bump();
|
||||
let metadata = std::mem::replace(
|
||||
&mut self.metadata,
|
||||
BVec::with_capacity_in(self.inputs.capacity(), doc_alloc),
|
||||
);
|
||||
self.inputs.clear();
|
||||
return Err(self.on_embed.process_embedding_error(
|
||||
Err(self.on_embed.process_embedding_error(
|
||||
error,
|
||||
self.embedder_name,
|
||||
unused_vectors_distribution,
|
||||
metadata,
|
||||
));
|
||||
))
|
||||
}
|
||||
};
|
||||
self.inputs.clear();
|
||||
self.metadata.clear();
|
||||
res
|
||||
if self.ignore_embedding_failures {
|
||||
if let Err(err) = res {
|
||||
tracing::warn!(
|
||||
%err,
|
||||
"ignored error embedding batch of documents due to failure policy"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn embedder_name(&self) -> &'doc str {
|
||||
|
||||
@@ -7,6 +7,6 @@ publish = false
|
||||
[dependencies]
|
||||
meilisearch = { path = "../meilisearch" , default-features = false}
|
||||
serde_json = "1.0"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
anyhow = "1.0.98"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
anyhow = "1.0.100"
|
||||
utoipa = "5.4.0"
|
||||
|
||||
@@ -8,8 +8,8 @@ edition = "2021"
|
||||
[dependencies]
|
||||
color-spantrace = "0.3.0"
|
||||
fxprof-processed-profile = "0.7.0"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.140"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = "1.0.145"
|
||||
tracing = "0.1.41"
|
||||
tracing-error = "0.2.1"
|
||||
tracing-subscriber = "0.3.20"
|
||||
@@ -18,7 +18,7 @@ byte-unit = { version = "5.1.6", default-features = false, features = [
|
||||
"byte",
|
||||
"serde",
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = ["sync"] }
|
||||
tokio = { version = "1.48.0", features = ["sync"] }
|
||||
|
||||
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
|
||||
libproc = "0.14.10"
|
||||
libproc = "0.14.11"
|
||||
|
||||
@@ -11,27 +11,27 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.100"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
cargo_metadata = "0.20.0"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
cargo_metadata = "0.23.1"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
futures-core = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"stream",
|
||||
"json",
|
||||
"rustls-tls",
|
||||
], default-features = false }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.140"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = "1.0.145"
|
||||
sha2 = "0.10.9"
|
||||
sysinfo = "0.35.2"
|
||||
time = { version = "0.3.41", features = [
|
||||
sysinfo = "0.37.2"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde",
|
||||
"serde-human-readable",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = [
|
||||
tokio = { version = "1.48.0", features = [
|
||||
"rt",
|
||||
"net",
|
||||
"time",
|
||||
@@ -41,4 +41,4 @@ tokio = { version = "1.45.1", features = [
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = "0.3.20"
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
uuid = { version = "1.17.0", features = ["v7", "serde"] }
|
||||
uuid = { version = "1.18.1", features = ["v7", "serde"] }
|
||||
|
||||
Reference in New Issue
Block a user