Compare commits

...

23 Commits

Author SHA1 Message Date
Clément Renault
cf62af13e8 Merge pull request #6005 from meilisearch/clamp-max-batch-size
Clamp max batch size to 10 GiB
2025-11-20 10:45:23 +00:00
Many the fish
91cf94c196 Merge pull request #5999 from meilisearch/fix-document-fetch-sort
Fix the Document Fetch pagination bug when Sort is applied
2025-11-20 10:15:04 +00:00
Clément Renault
753ba39199 Update the documentation of the batch size 2025-11-20 10:33:02 +01:00
Clément Renault
3944c25853 Clamp the maximum batch size to maximum 10GiB 2025-11-20 10:29:50 +01:00
ManyTheFish
925bce5fbd Modify the test to test all the sort branches and fix the untested branch 2025-11-20 10:27:24 +01:00
ManyTheFish
62065ed30d Fix the pagination bug
where the last document of the previous page was duplicated as the first
document of the current page. This was due to a bug on the custom nth
function of the sort ranking rule skipping `n-1` documents instead of `n`
2025-11-20 10:27:24 +01:00
Clément Renault
97e6ae1957 Merge pull request #5994 from meilisearch/improve-s3-error-messages
Improve S3 upload by showing errors in the task queue
2025-11-19 16:58:02 +00:00
Clément Renault
5ed9be0789 Merge pull request #5990 from meilisearch/default-max-batch-size
Make the limit batched tasks total size defaults to half of the max indexing memory
2025-11-19 16:56:34 +00:00
Clément Renault
7597b1049f Merge pull request #6001 from meilisearch/update-windows-macos-ci
Update the macOS platform version in the CI
2025-11-19 16:12:52 +00:00
Clément Renault
d99150f21b Improve error message extraction
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-19 17:09:15 +01:00
Kerollmops
c9726674a0 Make the limit batched tasks total size default to half of max indexing
memory
2025-11-19 17:04:45 +01:00
Clément Renault
205f40b3b8 Update the macOS platform version to use version 14 2025-11-19 16:10:41 +01:00
Clément Renault
3d013cdebe Merge pull request #5995 from meilisearch/fix-embedding-skip
Fix embedding skip
2025-11-18 10:02:53 +00:00
Louis Dureuil
ddeff5678f Clippy happy 2025-11-17 14:48:40 +01:00
Louis Dureuil
a235434910 Add test 2025-11-17 13:52:23 +01:00
Louis Dureuil
a376525348 Do not skip embedding request for the document that exceeds capacity 2025-11-17 13:18:58 +01:00
Kerollmops
361580f451 Display the error message on failure 2025-11-17 09:21:18 +01:00
Clément Renault
ea70a7d1c9 Merge pull request #5969 from xuhongxu96/main
Remove unused dependency `allocator-api2`
2025-11-15 10:03:15 +00:00
Clément Renault
9304f8e586 Merge pull request #5991 from meilisearch/release-v1.26.0
Release v1.26.0
2025-11-13 17:54:01 +00:00
Louis Dureuil
495db080ec Upgrade snap 2025-11-13 17:52:34 +01:00
Louis Dureuil
d71341fa48 Suport upgrade to v1.26.0 2025-11-13 17:52:02 +01:00
Louis Dureuil
5b3070d8c3 Update version in toml and lock 2025-11-13 17:35:26 +01:00
Hongxu Xu
08bc982748 Remove unused dependency allocator-api2 2025-11-04 03:29:24 +00:00
32 changed files with 303 additions and 94 deletions

View File

@@ -65,9 +65,9 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-13, windows-2022]
os: [macos-14, windows-2022]
include:
- os: macos-13
- os: macos-14
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
- os: windows-2022
@@ -90,7 +90,7 @@ jobs:
publish-macos-apple-silicon:
name: Publish binary for macOS silicon
runs-on: macos-13
runs-on: macos-14
needs: check-version
strategy:
matrix:

View File

@@ -47,7 +47,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-13, windows-2022]
os: [macos-14, windows-2022]
steps:
- uses: actions/checkout@v5
- name: Cache dependencies

49
Cargo.lock generated
View File

@@ -345,12 +345,6 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "allocator-api2"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c583acf993cf4245c4acb0a2cc2ab1f9cc097de73411bb6d3647ff6af2b1013d"
[[package]]
name = "anes"
version = "0.1.6"
@@ -590,7 +584,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
[[package]]
name = "benchmarks"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"anyhow",
"bumpalo",
@@ -800,7 +794,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"anyhow",
"time",
@@ -813,7 +807,7 @@ version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
dependencies = [
"allocator-api2 0.2.21",
"allocator-api2",
"serde",
]
@@ -823,7 +817,7 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
dependencies = [
"allocator-api2 0.2.21",
"allocator-api2",
"bitpacking",
"bumpalo",
"hashbrown 0.15.5",
@@ -1790,7 +1784,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"anyhow",
"big_s",
@@ -2033,7 +2027,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-store"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"tempfile",
"thiserror 2.0.16",
@@ -2055,7 +2049,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"insta",
"levenshtein_automata",
@@ -2083,7 +2077,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"criterion",
"serde_json",
@@ -2240,7 +2234,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"arbitrary",
"bumpalo",
@@ -2766,7 +2760,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash 0.8.12",
"allocator-api2 0.2.21",
"allocator-api2",
]
[[package]]
@@ -2775,7 +2769,7 @@ version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"allocator-api2 0.2.21",
"allocator-api2",
"equivalent",
"foldhash",
"serde",
@@ -3194,7 +3188,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"anyhow",
"backoff",
@@ -3467,7 +3461,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"criterion",
"serde_json",
@@ -3986,7 +3980,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"insta",
"md5",
@@ -3997,7 +3991,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -4094,7 +4088,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@@ -4113,7 +4107,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"actix-web",
"anyhow",
@@ -4148,7 +4142,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"anyhow",
"clap",
@@ -4182,9 +4176,8 @@ dependencies = [
[[package]]
name = "milli"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"allocator-api2 0.3.1",
"arroy",
"bbqueue",
"big_s",
@@ -4764,7 +4757,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "permissive-json-pointer"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"big_s",
"serde_json",
@@ -7886,7 +7879,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.25.0"
version = "1.26.0"
dependencies = [
"anyhow",
"build-info",

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.25.0"
version = "1.26.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -438,12 +438,15 @@ async fn multipart_stream_to_s3(
db_name: String,
reader: std::io::PipeReader,
) -> Result<(), Error> {
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
use std::collections::VecDeque;
use std::io;
use std::os::fd::OwnedFd;
use std::path::PathBuf;
use bytes::{Bytes, BytesMut};
use reqwest::{Client, Response};
use rusty_s3::S3Action as _;
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
use rusty_s3::actions::CreateMultipartUpload;
use rusty_s3::{Bucket, BucketError, Credentials, S3Action as _, UrlStyle};
use tokio::task::JoinHandle;
let reader = OwnedFd::from(reader);
@@ -517,7 +520,6 @@ async fn multipart_stream_to_s3(
while buffer.len() < (s3_multipart_part_size as usize / 2) {
// Wait for the pipe to be readable
use std::io;
reader.readable().await?;
match reader.try_read_buf(&mut buffer) {
@@ -581,15 +583,17 @@ async fn multipart_stream_to_s3(
async move {
match client.post(url).body(body).send().await {
Ok(resp) if resp.status().is_client_error() => {
resp.error_for_status().map_err(backoff::Error::Permanent)
Err(backoff::Error::Permanent(Error::S3Error {
status: resp.status(),
body: resp.text().await.unwrap_or_default(),
}))
}
Ok(resp) => Ok(resp),
Err(e) => Err(backoff::Error::transient(e)),
Err(e) => Err(backoff::Error::transient(Error::S3HttpError(e))),
}
}
})
.await
.map_err(Error::S3HttpError)?;
.await?;
let status = resp.status();
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -48,6 +48,8 @@ pub fn upgrade_index_scheduler(
(1, 22, _) => 0,
(1, 23, _) => 0,
(1, 24, _) => 0,
(1, 25, _) => 0,
(1, 26, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)

View File

@@ -195,7 +195,7 @@ struct Infos {
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
experimental_limit_batched_tasks_total_size: u64,
experimental_limit_batched_tasks_total_size: Option<u64>,
experimental_network: bool,
experimental_multimodal: bool,
experimental_chat_completions: bool,
@@ -359,7 +359,7 @@ impl Infos {
http_payload_size_limit,
experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size:
experimental_limit_batched_tasks_total_size.into(),
experimental_limit_batched_tasks_total_size.map(|size| size.as_u64()),
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(),

View File

@@ -230,7 +230,17 @@ pub fn setup_meilisearch(
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.map_or_else(
|| {
opt.indexer_options
.max_indexing_memory
// By default, we use half of the available memory to determine the size of batched tasks
.map_or(u64::MAX, |mem| mem.as_u64() / 2)
// And never exceed 10 GiB when we infer the limit
.min(10 * 1024 * 1024 * 1024)
},
|size| size.as_u64(),
),
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features: opt.to_instance_features(),

View File

@@ -473,11 +473,14 @@ pub struct Opt {
#[serde(default = "default_limit_batched_tasks")]
pub experimental_max_number_of_batched_tasks: usize,
/// Experimentally reduces the maximum total size, in bytes, of tasks that will be processed at once,
/// see: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
#[serde(default = "default_limit_batched_tasks_total_size")]
pub experimental_limit_batched_tasks_total_size: Byte,
/// Experimentally controls the maximum total size, in bytes, of tasks that will be processed
/// simultaneously. When unspecified, defaults to half of the maximum indexing memory and
/// clamped to 10 GiB.
///
/// See: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE)]
#[serde(default)]
pub experimental_limit_batched_tasks_total_size: Option<Byte>,
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
/// distinct embedder.
@@ -701,10 +704,12 @@ impl Opt {
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
experimental_max_number_of_batched_tasks.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
experimental_limit_batched_tasks_total_size.to_string(),
);
if let Some(limit) = experimental_limit_batched_tasks_total_size {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
limit.to_string(),
);
}
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
experimental_embedding_cache_entries.to_string(),
@@ -1273,10 +1278,6 @@ fn default_limit_batched_tasks() -> usize {
usize::MAX
}
fn default_limit_batched_tasks_total_size() -> Byte {
Byte::from_u64(u64::MAX)
}
fn default_embedding_cache_entries() -> usize {
0
}

View File

@@ -1,14 +1,14 @@
use crate::search::{Personalize, SearchResult};
use meilisearch_types::{
error::{Code, ErrorCode, ResponseError},
milli::TimeBudget,
};
use std::time::Duration;
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::milli::TimeBudget;
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use tracing::{debug, info, warn};
use crate::search::{Personalize, SearchResult};
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
const MAX_RETRIES: u32 = 10;

View File

@@ -18,10 +18,9 @@ use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid;
use crate::search::SearchMetadata;
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
use crate::milli::vector::Embedding;
use crate::search::SearchMetadata;
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;

View File

@@ -1339,3 +1339,117 @@ async fn get_document_with_vectors() {
}
"###);
}
#[actix_rt::test]
async fn test_fetch_documents_pagination_with_sorting() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();
// Set name as sortable attribute
let (task, code) = index.update_settings_sortable_attributes(json!(["name"])).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.succeeded();
let documents = json!((0..50)
.map(|i| json!({"id": i, "name": format!("doc_{:05}", std::cmp::min(i, 5))}))
.collect::<Vec<_>>());
// Add documents as described in the bug report
let (task, code) = index.add_documents(documents, None).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.succeeded();
// Request 1 (first page): offset 0, limit 2
let (response, code) = index
.fetch_documents(json!({
"offset": 0,
"limit": 2,
"sort": ["name:asc"]
}))
.await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r###"
[
{
"id": 0,
"name": "doc_00000"
},
{
"id": 1,
"name": "doc_00001"
}
]
"###);
// Request 2 (second page): offset 2, limit 2
let (response, code) = index
.fetch_documents(json!({
"offset": 2,
"limit": 2,
"sort": ["name:asc"]
}))
.await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r###"
[
{
"id": 2,
"name": "doc_00002"
},
{
"id": 3,
"name": "doc_00003"
}
]
"###);
// Request 3 (third page): offset 4, limit 2
let (response, code) = index
.fetch_documents(json!({
"offset": 4,
"limit": 2,
"sort": ["name:asc"]
}))
.await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r###"
[
{
"id": 4,
"name": "doc_00004"
},
{
"id": 5,
"name": "doc_00005"
}
]
"###);
// Request 4 (fourth page): offset 6, limit 2
let (response, code) = index
.fetch_documents(json!({
"offset": 6,
"limit": 2,
"sort": ["name:asc"]
}))
.await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r###"
[
{
"id": 6,
"name": "doc_00005"
},
{
"id": 7,
"name": "doc_00005"
}
]
"###);
}

View File

@@ -137,6 +137,60 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
}])
});
static MANY_DOCS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "4",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "5",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "6",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "7",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "8",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "9",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "10",
}])
});
#[actix_rt::test]
async fn simple_search() {
let server = Server::new_shared();
@@ -449,6 +503,38 @@ async fn simple_search_hf() {
snapshot!(response["semanticHitCount"], @"3");
}
#[actix_rt::test]
async fn issue_5976_missing_docs_hf() {
let server = Server::new_shared();
let index = index_with_documents_hf(server, &MANY_DOCS).await;
let (response, code) = index
.search_post(
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
let are_empty: Vec<_> = response["hits"]
.as_array()
.unwrap()
.iter()
.map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty())
.collect();
snapshot!(json!(are_empty), @r###"
[
false,
false,
false,
false,
false,
false,
false,
false,
false,
false
]
"###);
}
#[actix_rt::test]
async fn distribution_shift() {
let server = Server::new_shared();

View File

@@ -43,7 +43,7 @@ async fn version_too_old() {
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.25.0");
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.26.0");
}
#[actix_rt::test]
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.25.1 is higher than the Meilisearch version 1.25.0. Downgrade is not supported");
snapshot!(err, @"Database version 1.26.1 is higher than the Meilisearch version 1.26.0. Downgrade is not supported");
}
#[actix_rt::test]

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.25.0"
"upgradeTo": "v1.26.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -101,7 +101,6 @@ bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
steppe = { version = "0.4", default-features = false }
thread_local = "1.1.9"
allocator-api2 = "0.3.0"
rustc-hash = "2.1.1"
enum-iterator = "2.1.0"
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }

View File

@@ -87,7 +87,7 @@ impl Iterator for SortedDocumentsIterator<'_> {
};
// Otherwise don't directly iterate over children, skip them if we know we will go further
let mut to_skip = n - 1;
let mut to_skip = n;
while to_skip > 0 {
if let Err(e) = SortedDocumentsIterator::update_current(
current_child,
@@ -108,7 +108,7 @@ impl Iterator for SortedDocumentsIterator<'_> {
continue;
} else {
// The current iterator is large enough, so we can forward the call to it.
return inner.nth(to_skip + 1);
return inner.nth(to_skip);
}
}

View File

@@ -43,6 +43,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
&ToTargetNoOp { target: (1, 23, 0) },
&ToTargetNoOp { target: (1, 24, 0) },
&ToTargetNoOp { target: (1, 25, 0) },
&ToTargetNoOp { target: (1, 26, 0) },
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
@@ -79,6 +80,7 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
(1, 23, _) => function_index!(13),
(1, 24, _) => function_index!(14),
(1, 25, _) => function_index!(15),
(1, 26, _) => function_index!(16),
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
// considering dumpless upgrade.
(_major, _minor, _patch) => return None,

View File

@@ -112,13 +112,12 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
rendered: I,
unused_vectors_distribution: &C::ErrorMetadata,
) -> Result<()> {
if self.inputs.len() < self.inputs.capacity() {
self.inputs.push(rendered);
self.metadata.push(metadata);
return Ok(());
if self.inputs.len() >= self.inputs.capacity() {
self.embed_chunks(unused_vectors_distribution)?;
}
self.embed_chunks(unused_vectors_distribution)
self.inputs.push(rendered);
self.metadata.push(metadata);
Ok(())
}
pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<C> {