Compare commits

...

67 Commits

Author SHA1 Message Date
Mubelotix
47fa91c39f Add documentation 2025-08-26 17:04:19 +02:00
Mubelotix
2ac623f1e4 Api key tests 2025-08-26 16:17:13 +02:00
Mubelotix
5b31960967 Add variable registration mechanism 2025-08-26 15:30:17 +02:00
Mubelotix
b99410ee6c Update movies workload 2025-08-26 14:53:46 +02:00
Mubelotix
36ff335a4d Test for upgrade 2025-08-26 14:33:43 +02:00
Mubelotix
0558d41930 Fix processing time ms 2025-08-26 14:33:24 +02:00
Mubelotix
a61d9f8584 Update issue template 2025-08-26 14:05:01 +02:00
Mubelotix
fdb716c818 Update movies 2025-08-26 13:52:38 +02:00
Mubelotix
997e7d4bfd Fix line feed at the end of files 2025-08-26 13:52:34 +02:00
Mubelotix
1ed2a654c9 Update emojis 2025-08-26 13:45:18 +02:00
Mubelotix
39e796ee03 Add redaction system 2025-08-26 13:39:08 +02:00
Mubelotix
8e3a0c339f Fix compilation 2025-08-26 12:17:50 +02:00
Mubelotix
40b49543f1 Improve error detection 2025-08-26 12:11:03 +02:00
Mubelotix
1528cfe683 Improve diffing 2025-08-26 12:08:38 +02:00
Mubelotix
c839b804fb Add upgrade system 2025-08-26 12:02:18 +02:00
Mubelotix
baa4c75af8 Refactor around meili_path 2025-08-26 11:13:40 +02:00
Mubelotix
a1424e1cb4 Move file to common 2025-08-26 11:09:03 +02:00
Mubelotix
ab91ea8b47 Remove useless data 2025-08-26 11:06:23 +02:00
Mubelotix
857bdffb1a Remove useless parameter 2025-08-26 11:02:47 +02:00
Mubelotix
4290901dea Add response updating logic 2025-08-26 10:59:12 +02:00
Mubelotix
b2a72b0363 Fix asset version issues 2025-08-25 15:39:05 +02:00
Mubelotix
d649732acd Do so that meilisearch versions get downloaded 2025-08-25 15:16:26 +02:00
Mubelotix
c98efe18c9 Implement test workload running logic 2025-08-25 13:32:34 +02:00
Mubelotix
0d8b2edfb0 Continue integrating commands to tests 2025-08-25 12:24:21 +02:00
Mubelotix
0e25398d3e Remove dead code 2025-08-25 12:13:27 +02:00
Mubelotix
72b6b73a91 Make commands common 2025-08-25 12:07:27 +02:00
Mubelotix
3a2ec5f576 Tag workloads 2025-08-25 11:50:27 +02:00
Mubelotix
8240b76267 Create test workload 2025-08-25 11:43:11 +02:00
Mubelotix
78e98a4e6c Create the test function 2025-08-25 11:37:23 +02:00
Mubelotix
d9177d4727 Create the test xtask command and args 2025-08-25 11:23:46 +02:00
Louis Dureuil
a94a13c9b0 Merge pull request #5849 from meilisearch/tmp-v1.19
Prepare for v1.19 release
2025-08-25 07:03:27 +00:00
Clément Renault
9dcdde592c Merge pull request #5729 from martin-g/5616-max-memory-in-container
Take into account the allowed max memory of the container
2025-08-21 14:43:32 +00:00
Louis Dureuil
7de44ad2b7 Add v1.19 in index-scheduler and index upgrades 2025-08-21 16:37:35 +02:00
Louis Dureuil
820854ba5c Update snapshots 2025-08-21 16:37:23 +02:00
Louis Dureuil
496de5563a Update version in Cargo.toml 2025-08-21 16:36:56 +02:00
Clément Renault
795045c03a Merge pull request #5784 from meilisearch/sharding-split-docs
Sharding and EE license
2025-08-19 14:17:37 +00:00
Louis Dureuil
b541b7bed3 Change license text to clarify that EE files are in EE modules 2025-08-19 14:50:42 +02:00
Louis Dureuil
6fb3cf95e4 Move EE files into EE modules 2025-08-19 14:50:42 +02:00
Louis Dureuil
cbd2bdf0fa Fix snapshots 2025-08-19 14:50:42 +02:00
Louis Dureuil
601785692f Remove erroneous untagged annotation 2025-08-19 14:50:42 +02:00
Louis Dureuil
65c212d1fd camel case the fields in "origin" 2025-08-19 14:50:42 +02:00
Louis Dureuil
85feb3a26c Rename Body::with_file 2025-08-19 14:50:42 +02:00
Louis Dureuil
d550b90c60 Adjust timeouts 2025-08-19 14:50:42 +02:00
Louis Dureuil
385acbbcd2 Don't always hardcode Content-Type in proxy 2025-08-19 14:50:41 +02:00
Louis Dureuil
484dbf8c06 Update snap 2025-08-19 14:50:41 +02:00
Louis Dureuil
9c6c0af076 Misc churn 2025-08-19 14:50:41 +02:00
Louis Dureuil
e33fbcf7b2 Move meilisearch_types::Network to its own module 2025-08-19 14:50:41 +02:00
Louis Dureuil
d352f33d16 Make types Serialize and Deserialize for proxying 2025-08-19 14:50:41 +02:00
Louis Dureuil
3682b92ee8 New errors 2025-08-19 14:50:41 +02:00
Louis Dureuil
ef10c1fb23 Dependency changes 2025-08-19 14:50:41 +02:00
Louis Dureuil
bd97a7cc19 IndexScheduler::update_task now merges the task.network and accepts &mut Task 2025-08-19 14:50:41 +02:00
Louis Dureuil
56c7f54804 IndexScheduler::set_task_network 2025-08-19 14:50:41 +02:00
Louis Dureuil
15d34c33e8 file-store: persist returns the persisted File object 2025-08-19 14:50:40 +02:00
Louis Dureuil
42ac869c5c Dump support for network 2025-08-19 14:50:40 +02:00
Louis Dureuil
6e0152921f Proxy all document tasks to the network when sharding is enabled 2025-08-19 14:50:40 +02:00
Louis Dureuil
069d25dce6 Shard documents 2025-08-19 14:50:40 +02:00
Louis Dureuil
9929f798d3 network: add sharding to Network and writeApiKey to Remotes 2025-08-19 14:50:40 +02:00
Louis Dureuil
80ff438402 Add proxy module to proxy requests to members of a network 2025-08-19 14:50:40 +02:00
Louis Dureuil
e62a807b60 Add new milli::update:🆕:indexer::sharding module 2025-08-19 14:50:40 +02:00
Louis Dureuil
907055ed08 Add network to Task and TaskView 2025-08-19 14:50:39 +02:00
Louis Dureuil
8b18adee95 Add EE license 2025-08-19 14:50:39 +02:00
Clément Renault
53223ace47 Merge pull request #5844 from meilisearch/prepare-v1.18
Prepare v1.18.0
2025-08-18 11:34:53 +00:00
Mubelotix
a579ea2596 Remove useless code 2025-08-18 10:30:29 +02:00
Mubelotix
e13541818a Update upgrade tests 2025-08-18 09:48:44 +02:00
Mubelotix
c974f0ab0a Update dumpless upgrades 2025-08-18 09:44:55 +02:00
Mubelotix
36cac8acf7 Update package version 2025-08-18 09:44:40 +02:00
Martin Tzvetanov Grigorov
45da2257ec Take into account the allowed max memory of the container
When Meilisearch runs inside a container (e.g. Docker or Kubernetes) it
may run with less max memory than the available on the host, e.g.
`docker run --memory 1G ...`

Fixes #5616

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-02 14:23:11 +03:00
116 changed files with 3103 additions and 638 deletions

View File

@@ -24,6 +24,11 @@ TBD
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
### Reminders when adding features
- [ ] Write unit tests using insta
- [ ] Write declarative integration tests in [workloads/tests](https://github.com/meilisearch/meilisearch/tree/main/workloads/test). Specify the routes to call and then call `cargo xtask test workloads/tests/YOUR_TEST.json --update-responses` so that responses are automatically filled.
### Reminders when modifying the API
- [ ] Update the openAPI file with utoipa:

View File

@@ -124,6 +124,7 @@ They are JSON files with the following structure (comments are not actually supp
{
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
"name": "hackernews.ndjson_1M,no-threads",
"type": "bench",
// Number of consecutive runs of the commands that should be performed.
// Each run uses a fresh instance of Meilisearch and a fresh database.
// Each run produces its own report file.

111
Cargo.lock generated
View File

@@ -350,6 +350,21 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78200ac3468a57d333cd0ea5dd398e25111194dcacd49208afca95c629a6311d"
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anes"
version = "0.1.6"
@@ -580,7 +595,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
[[package]]
name = "benchmarks"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"anyhow",
"bumpalo",
@@ -770,7 +785,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"anyhow",
"time",
@@ -1106,6 +1121,20 @@ dependencies = [
"whatlang",
]
[[package]]
name = "chrono"
version = "0.4.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "ciborium"
version = "0.2.2"
@@ -1774,7 +1803,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"anyhow",
"big_s",
@@ -2006,7 +2035,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-store"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"tempfile",
"thiserror 2.0.12",
@@ -2028,7 +2057,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"insta",
"levenshtein_automata",
@@ -2050,7 +2079,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"criterion",
"serde_json",
@@ -2195,7 +2224,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"arbitrary",
"bumpalo",
@@ -2851,6 +2880,30 @@ dependencies = [
"tracing",
]
[[package]]
name = "iana-time-zone"
version = "0.1.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "icu_collections"
version = "2.0.0"
@@ -2995,7 +3048,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"anyhow",
"backoff",
@@ -3231,7 +3284,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"criterion",
"serde_json",
@@ -3725,7 +3778,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"insta",
"md5",
@@ -3736,7 +3789,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -3746,6 +3799,7 @@ dependencies = [
"actix-web-lab",
"anyhow",
"async-openai",
"backoff",
"brotli",
"bstr",
"build-info",
@@ -3832,7 +3886,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@@ -3851,7 +3905,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"actix-web",
"anyhow",
@@ -3886,7 +3940,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"anyhow",
"clap",
@@ -3920,7 +3974,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"allocator-api2 0.3.0",
"arroy",
@@ -3989,6 +4043,7 @@ dependencies = [
"time",
"tokenizers",
"tracing",
"twox-hash",
"ureq",
"url",
"utoipa",
@@ -4483,7 +4538,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "permissive-json-pointer"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"big_s",
"serde_json",
@@ -5680,6 +5735,20 @@ name = "similar"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
dependencies = [
"bstr",
"unicode-segmentation",
]
[[package]]
name = "similar-asserts"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b441962c817e33508847a22bd82f03a30cff43642dc2fae8b050566121eb9a"
dependencies = [
"console",
"similar",
]
[[package]]
name = "simple_asn1"
@@ -6442,6 +6511,12 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "twox-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
[[package]]
name = "typeid"
version = "1.0.3"
@@ -7271,11 +7346,12 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.17.1"
version = "1.19.0"
dependencies = [
"anyhow",
"build-info",
"cargo_metadata",
"chrono",
"clap",
"futures-core",
"futures-util",
@@ -7283,6 +7359,7 @@ dependencies = [
"serde",
"serde_json",
"sha2",
"similar-asserts",
"sysinfo",
"time",
"tokio",

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.17.1"
version = "1.19.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -19,3 +19,11 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---
🔒 Meilisearch Enterprise Edition (EE)
Certain parts of this codebase are not licensed under the MIT license and governed by the Business Source License 1.1.
See the LICENSE-EE file for details.

67
LICENSE-EE Normal file
View File

@@ -0,0 +1,67 @@
Business Source License 1.1 Adapted for Meili SAS
This license is based on the Business Source License version 1.1, as published by MariaDB Corporation Ab.
Parameters
Licensor: Meili SAS
Licensed Work: Any file explicitly marked as “Enterprise Edition (EE)” or “governed by the Business Source License” residing in enterprise_editions modules/folders.
Additional Use Grant:
You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
Production use of the Licensed Work requires a commercial license agreement with Meilisearch. Contact bonjour@meilisearch.com for licensing.
Change License: MIT
Change Date: Four years from the date the Licensed Work is published.
This License does not apply to any code outside of the Licensed Work, which remains under the MIT license.
For information about alternative licensing arrangements for the Licensed Work,
please contact bonjour@meilisearch.com or sales@meilisearch.com.
Notice
Business Source License 1.1
Terms
The Licensor hereby grants you the right to copy, modify, create derivative
works, redistribute, and make non-production use of the Licensed Work. The
Licensor may make an Additional Use Grant, above, permitting limited production use.
Effective on the Change Date, or the fourth anniversary of the first publicly
available distribution of a specific version of the Licensed Work under this
License, whichever comes first, the Licensor hereby grants you rights under
the terms of the Change License, and the rights granted in the paragraph
above terminate.
If your use of the Licensed Work does not comply with the requirements
currently in effect as described in this License, you must purchase a
commercial license from the Licensor, its affiliated entities, or authorized
resellers, or you must refrain from using the Licensed Work.
All copies of the original and modified Licensed Work, and derivative works
of the Licensed Work, are subject to this License. This License applies
separately for each version of the Licensed Work and the Change Date may vary
for each version of the Licensed Work released by Licensor.
You must conspicuously display this License on each original or modified copy
of the Licensed Work. If you receive the Licensed Work in original or
modified form from a third party, the terms and conditions set forth in this
License apply to your use of that work.
Any use of the Licensed Work in violation of this License will automatically
terminate your rights under this License for the current and all other
versions of the Licensed Work.
This License does not grant you any right in any trademark or logo of
Licensor or its affiliates (provided that you may use a trademark or logo of
Licensor as expressly required by this License).
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
TITLE.

View File

@@ -89,6 +89,26 @@ We also offer a wide range of dedicated guides to all Meilisearch features, such
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
## 🧾 Editions & Licensing
Meilisearch is available in two editions:
### 🧪 Community Edition (CE)
- Fully open source under the [MIT license](./LICENSE)
- Core search engine with fast and relevant full-text, semantic or hybrid search
- Free to use for anyone, including commercial usage
### 🏢 Enterprise Edition (EE)
- Includes advanced features such as:
- Sharding
- Governed by a [commercial license](./LICENSE-EE) or the [Business Source License 1.1](https://mariadb.com/bsl11)
- Not allowed in production without a commercial agreement with Meilisearch.
- You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
Want access to Enterprise features? → Contact us at [sales@meilisearch.com](maito:sales@meilisearch.com).
## 📊 Telemetry
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.

View File

@@ -154,6 +154,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -221,6 +222,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -266,6 +268,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -335,6 +338,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -412,6 +416,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -457,6 +462,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -498,6 +504,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -566,6 +573,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -633,6 +641,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -700,6 +709,7 @@ fn indexing_wiki(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -766,6 +776,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -811,6 +822,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -879,6 +891,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -956,6 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1002,6 +1016,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1044,6 +1059,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1111,6 +1127,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1177,6 +1194,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1222,6 +1240,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1290,6 +1309,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1404,6 +1424,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1449,6 +1470,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1490,6 +1512,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1580,6 +1603,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1671,6 +1695,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1754,6 +1779,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1821,6 +1847,7 @@ fn indexing_geo(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1887,6 +1914,7 @@ fn reindexing_geo(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1932,6 +1960,7 @@ fn reindexing_geo(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2000,6 +2029,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -123,6 +123,7 @@ pub fn base_setup(conf: &Conf) -> Index {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -10,7 +10,7 @@ use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::{
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
@@ -94,6 +94,8 @@ pub struct TaskDump {
default
)]
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
}
// A `Kind` specific version made for the dump. If modified you may break the dump.
@@ -172,6 +174,7 @@ impl From<Task> for TaskDump {
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
network: task.network,
}
}
}
@@ -250,8 +253,9 @@ pub(crate) mod test {
use big_s::S;
use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::enterprise_edition::network::{Network, Remote};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
@@ -384,6 +388,7 @@ pub(crate) mod test {
enqueued_at: datetime!(2022-11-11 0:00 UTC),
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
network: None,
},
None,
),
@@ -408,6 +413,7 @@ pub(crate) mod test {
enqueued_at: datetime!(2022-11-11 0:00 UTC),
started_at: None,
finished_at: None,
network: None,
},
Some(vec![
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
@@ -427,6 +433,7 @@ pub(crate) mod test {
enqueued_at: datetime!(2022-11-15 0:00 UTC),
started_at: None,
finished_at: None,
network: None,
},
None,
),
@@ -539,7 +546,8 @@ pub(crate) mod test {
fn create_test_network() -> Network {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
sharding: false,
}
}

View File

@@ -163,6 +163,7 @@ impl CompatV5ToV6 {
enqueued_at: task_view.enqueued_at,
started_at: task_view.started_at,
finished_at: task_view.finished_at,
network: None,
};
(task, content_file)

View File

@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network;
pub type Network = meilisearch_types::enterprise_edition::network::Network;
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
// ===== Other types to clarify the code of the compat module

View File

@@ -5,7 +5,8 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::webhooks::WebhooksDumpView;

View File

@@ -148,11 +148,10 @@ impl File {
Ok(Self { path: PathBuf::new(), file: None })
}
pub fn persist(self) -> Result<()> {
if let Some(file) = self.file {
file.persist(&self.path)?;
}
Ok(())
pub fn persist(self) -> Result<Option<StdFile>> {
let Some(file) = self.file else { return Ok(None) };
Ok(Some(file.persist(&self.path)?))
}
}

View File

@@ -129,6 +129,7 @@ fn main() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -147,6 +147,7 @@ impl<'a> Dump<'a> {
canceled_by: task.canceled_by,
details: task.details,
status: task.status,
network: task.network,
kind: match task.kind {
KindDump::DocumentImport {
primary_key,

View File

@@ -1,6 +1,7 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};

View File

@@ -230,6 +230,7 @@ pub fn snapshot_task(task: &Task) -> String {
details,
status,
kind,
network,
} = task;
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
@@ -247,6 +248,9 @@ pub fn snapshot_task(task: &Task) -> String {
snap.push_str(&format!("details: {}, ", &snapshot_details(details)));
}
snap.push_str(&format!("kind: {kind:?}"));
if let Some(network) = network {
snap.push_str(&format!("network: {network:?}, "))
}
snap.push('}');
snap

View File

@@ -51,8 +51,9 @@ pub use features::RoFeatures;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{
ChatCompletionSettings, InstanceTogglableFeatures, Network, RuntimeTogglableFeatures,
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
};
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
@@ -64,7 +65,7 @@ use meilisearch_types::milli::vector::{
};
use meilisearch_types::milli::{self, Index};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{KindWithContent, Task};
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks;
@@ -666,6 +667,16 @@ impl IndexScheduler {
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
}
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
let mut wtxn = self.env.write_txn()?;
let mut task =
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
task.network = Some(network);
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
wtxn.commit()?;
Ok(())
}
/// Return the batches matching the query from the user's point of view along
/// with the total number of batches matching the query, ignoring from and limit.
///

View File

@@ -279,6 +279,7 @@ impl Queue {
details: kind.default_details(),
status: Status::Enqueued,
kind: kind.clone(),
network: None,
};
// For deletion and cancelation tasks, we want to make extra sure that they
// don't attempt to delete/cancel tasks that are newer than themselves.

View File

@@ -97,7 +97,22 @@ impl TaskQueue {
Ok(self.all_tasks.get(rtxn, &task_id)?)
}
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
/// Update the inverted task indexes and write the new value of the task.
///
/// The passed `task` object typically comes from a previous transaction, so two kinds of modification might have occurred:
/// 1. Modification to the `task` object after loading it from the DB (the purpose of this method is to persist these changes)
/// 2. Modification to the task committed by another transaction in the DB (an annoying consequence of having lost the original
/// transaction from which the `task` instance was deserialized)
///
/// When calling this function, this `task` is modified to take into account any existing `network`
/// that can have been added since the task was loaded into memory.
///
/// Any other modification to the task that was committed from the DB since the parameter was pulled from the DB will be overwritten.
///
/// # Errors
///
/// - CorruptedTaskQueue: The task doesn't exist in the database
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
let reprocessing = old_task.status != Status::Enqueued;
@@ -157,6 +172,12 @@ impl TaskQueue {
}
}
task.network = match (old_task.network, task.network.take()) {
(None, None) => None,
(None, Some(network)) | (Some(network), None) => Some(network),
(Some(_), Some(network)) => Some(network),
};
self.all_tasks.put(wtxn, &task.uid, task)?;
Ok(())
}

View File

@@ -268,7 +268,7 @@ impl IndexScheduler {
self.queue
.tasks
.update_task(&mut wtxn, &task)
.update_task(&mut wtxn, &mut task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
}
if let Some(canceled_by) = canceled_by {
@@ -349,7 +349,7 @@ impl IndexScheduler {
self.queue
.tasks
.update_task(&mut wtxn, &task)
.update_task(&mut wtxn, &mut task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
}
}

View File

@@ -66,6 +66,11 @@ impl IndexScheduler {
}
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
let network = self.network();
let shards = network.shards();
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
// to a fresh thread.
@@ -130,6 +135,7 @@ impl IndexScheduler {
&mut new_fields_ids_map,
&|| must_stop_processing.get(),
progress.clone(),
shards.as_ref(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 17, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 19, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.19.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 17, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 19, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 17, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 19, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 17, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 19, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.19.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 17, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 19, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.19.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 17, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 19, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.19.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -40,6 +40,9 @@ pub fn upgrade_index_scheduler(
(1, 14, _) => 0,
(1, 15, _) => 0,
(1, 16, _) => 0,
(1, 17, _) => 0,
(1, 18, _) => 0,
(1, 19, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)
@@ -89,6 +92,7 @@ pub fn upgrade_index_scheduler(
details: Some(Details::UpgradeDatabase { from, to }),
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from },
network: None,
},
)?;
wtxn.commit()?;

View File

@@ -1,6 +1,5 @@
//! Utility functions on the DBs. Mainly getter and setters.
use crate::milli::progress::EmbedderStats;
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use std::sync::Arc;
@@ -15,6 +14,7 @@ use meilisearch_types::tasks::{
use roaring::RoaringBitmap;
use time::OffsetDateTime;
use crate::milli::progress::EmbedderStats;
use crate::{Error, Result, Task, TaskId, BEI128};
/// This structure contains all the information required to write a batch in the database without reading the tasks.
@@ -377,6 +377,7 @@ impl crate::IndexScheduler {
details,
status,
kind,
network: _,
} = task;
assert_eq!(uid, task.uid);
if task.status != Status::Enqueued {

View File

@@ -0,0 +1,6 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
pub mod network;

View File

@@ -0,0 +1,47 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use milli::update::new::indexer::enterprise_edition::sharding::Shards;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
#[serde(default)]
pub sharding: bool,
}
impl Network {
pub fn shards(&self) -> Option<Shards> {
if self.sharding {
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
let others = self
.remotes
.keys()
.filter(|name| name.as_str() != this)
.map(|name| name.to_owned())
.collect();
Some(Shards { own: vec![this.to_owned()], others })
} else {
None
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
#[serde(default)]
pub write_api_key: Option<String>,
}

View File

@@ -235,9 +235,11 @@ InvalidDocumentFields , InvalidRequest , BAD_REQU
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
@@ -266,7 +268,9 @@ InvalidMultiSearchRemote , InvalidRequest , BAD_REQU
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;

View File

@@ -1,5 +1,3 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use crate::error::{Code, ResponseError};
@@ -32,23 +30,6 @@ pub struct InstanceTogglableFeatures {
pub contains_filter: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct ChatCompletionSettings {

View File

@@ -3,6 +3,7 @@ pub mod batches;
pub mod compression;
pub mod deserr;
pub mod document_formats;
pub mod enterprise_edition;
pub mod error;
pub mod facet_values_sort;
pub mod features;

View File

@@ -11,6 +11,7 @@ use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
TaskNetwork,
};
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
@@ -51,6 +52,9 @@ pub struct TaskView {
#[schema(value_type = String, example = json!("2024-08-08_14:12:09.393Z"))]
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
}
impl TaskView {
@@ -68,6 +72,7 @@ impl TaskView {
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
network: task.network.clone(),
}
}
}

View File

@@ -42,6 +42,9 @@ pub struct Task {
pub status: Status,
pub kind: KindWithContent,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
}
impl Task {
@@ -737,6 +740,36 @@ pub enum Details {
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(untagged, rename_all = "camelCase")]
pub enum TaskNetwork {
Origin { origin: Origin },
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct Origin {
pub remote_name: String,
pub task_uid: usize,
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct RemoteTask {
#[serde(skip_serializing_if = "Option::is_none")]
task_uid: Option<TaskId>,
error: Option<ResponseError>,
}
impl From<Result<TaskId, ResponseError>> for RemoteTask {
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
match res {
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
}
}
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[schema(rename_all = "camelCase")]
pub struct DetailsExportIndexSettings {

View File

@@ -115,6 +115,9 @@ utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
secrecy = "0.10.3"
actix-web-lab = { version = "0.24.1", default-features = false }
urlencoding = "2.1.3"
backoff = { version = "0.4.0", features = ["tokio"] }
[dev-dependencies]
actix-rt = "2.10.0"
@@ -125,7 +128,6 @@ manifest-dir-macros = "0.1.18"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
wiremock = "0.6.3"
yaup = "0.3.1"

View File

@@ -9,6 +9,8 @@ use meilisearch_types::milli::OrderBy;
use serde_json::Value;
use tokio::task::JoinError;
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
#[derive(Debug, thiserror::Error)]
pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
@@ -80,6 +82,16 @@ pub enum MeilisearchHttpError {
MissingSearchHybrid,
#[error("Invalid request: both `media` and `vector` parameters are present.")]
MediaAndVector,
#[error("Inconsistent `Origin` headers: {} was provided but {} is missing.\n - Hint: Either both headers should be provided, or none of them", if *is_remote_missing {
PROXY_ORIGIN_TASK_UID_HEADER
} else { PROXY_ORIGIN_REMOTE_HEADER },
if *is_remote_missing {
PROXY_ORIGIN_REMOTE_HEADER
} else { PROXY_ORIGIN_TASK_UID_HEADER }
)]
InconsistentOriginHeaders { is_remote_missing: bool },
#[error("Invalid value for header {header_name}: {msg}")]
InvalidHeaderValue { header_name: &'static str, msg: String },
}
impl MeilisearchHttpError {
@@ -124,6 +136,10 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
Code::InvalidMultiSearchFacetOrder
}
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
Code::InconsistentDocumentChangeHeaders
}
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
}
}
}

View File

@@ -628,6 +628,7 @@ fn import_dump(
&mut new_fields_ids_map,
&|| false, // never stop processing a dump
progress.clone(),
None,
)?;
let operation_stats = operation_stats.pop().unwrap();

View File

@@ -886,7 +886,10 @@ fn total_memory_bytes() -> Option<u64> {
let mem_kind = RefreshKind::nothing().with_memory(MemoryRefreshKind::nothing().with_ram());
let mut system = System::new_with_specifics(mem_kind);
system.refresh_memory();
Some(system.total_memory())
system
.cgroup_limits()
.map(|limits| limits.total_memory)
.or_else(|| Some(system.total_memory()))
} else {
None
}

View File

@@ -45,6 +45,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::enterprise_edition::proxy::{proxy, Body};
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
@@ -338,6 +339,7 @@ pub async fn delete_document(
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = path.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
@@ -355,10 +357,16 @@ pub async fn delete_document(
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -804,7 +812,6 @@ pub async fn replace_documents(
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
@@ -814,8 +821,10 @@ pub async fn replace_documents(
uid,
dry_run,
allow_index_creation,
&req,
)
.await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task))
@@ -905,7 +914,6 @@ pub async fn update_documents(
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
@@ -915,6 +923,7 @@ pub async fn update_documents(
uid,
dry_run,
allow_index_creation,
&req,
)
.await?;
debug!(returns = ?task, "Update documents");
@@ -924,7 +933,6 @@ pub async fn update_documents(
#[allow(clippy::too_many_arguments)]
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
primary_key: Option<String>,
@@ -934,7 +942,11 @@ async fn document_addition(
task_id: Option<TaskId>,
dry_run: bool,
allow_index_creation: bool,
req: &HttpRequest,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let mime_type = extract_mime_type(req)?;
let network = index_scheduler.network();
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
@@ -966,7 +978,7 @@ async fn document_addition(
};
let (uuid, mut update_file) = index_scheduler.queue.create_update_file(dry_run)?;
let documents_count = match format {
let res = match format {
PayloadType::Ndjson => {
let (path, file) = update_file.into_parts();
let file = match file {
@@ -981,19 +993,19 @@ async fn document_addition(
None => None,
};
let documents_count = tokio::task::spawn_blocking(move || {
let res = tokio::task::spawn_blocking(move || {
let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
})?;
let update_file = file_store::File::from_parts(path, file);
update_file.persist()?;
let update_file = update_file.persist()?;
Ok(documents_count)
Ok((documents_count, update_file))
})
.await?;
Ok(documents_count)
Ok(res)
}
PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
let temp_file = match tempfile() {
@@ -1012,16 +1024,16 @@ async fn document_addition(
unreachable!("We already wrote the user content into the update file")
}
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
Ok(documents_count)
// we NEED to persist the file here because we moved the `update_file` in another task.
let file = update_file.persist()?;
Ok((documents_count, file))
})
.await
}
};
let documents_count = match documents_count {
Ok(Ok(documents_count)) => documents_count,
let (documents_count, file) = match res {
Ok(Ok((documents_count, file))) => (documents_count, file),
// in this case the file has not possibly be persisted.
Ok(Err(e)) => return Err(e),
Err(e) => {
@@ -1063,6 +1075,20 @@ async fn document_addition(
}
};
if network.sharding {
if let Some(file) = file {
proxy(
&index_scheduler,
&index_uid,
req,
network,
Body::with_ndjson_payload(file),
&task,
)
.await?;
}
}
Ok(task.into())
}
@@ -1141,6 +1167,7 @@ pub async fn delete_documents_batch(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
@@ -1161,16 +1188,22 @@ pub async fn delete_documents_batch(
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Delete documents by batch");
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr, ToSchema)]
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct DocumentDeletionByFilter {
@@ -1219,7 +1252,8 @@ pub async fn delete_documents_by_filter(
debug!(parameters = ?body, "Delete documents by filter");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter;
let filter = body.into_inner();
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
@@ -1232,23 +1266,36 @@ pub async fn delete_documents_by_filter(
);
// we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
.ok_or(MeilisearchHttpError::EmptyFilter)?;
crate::search::parse_filter(
&filter.filter,
Code::InvalidDocumentFilter,
index_scheduler.features(),
)?
.ok_or(MeilisearchHttpError::EmptyFilter)?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let task = KindWithContent::DocumentDeletionByFilter {
index_uid: index_uid.clone(),
filter_expr: filter.filter.clone(),
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Delete documents by filter");
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr, ToSchema)]
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentEditionByFunction {
/// A string containing a RHAI function.
@@ -1336,6 +1383,8 @@ pub async fn edit_documents_by_function(
.features()
.check_edit_documents_by_function("Using the documents edit route")?;
let network = index_scheduler.network();
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let params = params.into_inner();
@@ -1349,13 +1398,12 @@ pub async fn edit_documents_by_function(
&req,
);
let DocumentEditionByFunction { filter, context, function } = params;
let engine = milli::rhai::Engine::new();
if let Err(e) = engine.compile(&function) {
if let Err(e) = engine.compile(&params.function) {
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
}
if let Some(ref filter) = filter {
if let Some(ref filter) = params.filter {
// we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(
filter,
@@ -1365,9 +1413,9 @@ pub async fn edit_documents_by_function(
.ok_or(MeilisearchHttpError::EmptyFilter)?;
}
let task = KindWithContent::DocumentEdition {
index_uid,
filter_expr: filter,
context: match context {
index_uid: index_uid.clone(),
filter_expr: params.filter.clone(),
context: match params.context.clone() {
Some(Value::Object(m)) => Some(m),
None => None,
_ => {
@@ -1377,15 +1425,21 @@ pub async fn edit_documents_by_function(
))
}
},
function,
function: params.function.clone(),
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Edit documents by function");
Ok(HttpResponse::Accepted().json(task))
@@ -1428,6 +1482,8 @@ pub async fn clear_all_documents(
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
clear_all: true,
@@ -1441,10 +1497,18 @@ pub async fn clear_all_documents(
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Delete all documents");
Ok(HttpResponse::Accepted().json(task))

View File

@@ -0,0 +1,6 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
pub mod proxy;

View File

@@ -0,0 +1,426 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use std::fs::File;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::HttpRequest;
use bytes::Bytes;
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::{Origin, RemoteTask, TaskNetwork};
use reqwest::StatusCode;
use serde::de::DeserializeOwned;
use serde_json::Value;
use crate::error::MeilisearchHttpError;
use crate::routes::indexes::enterprise_edition::proxy::error::{
ProxyDocumentChangeError, ReqwestErrorWithoutUrl,
};
use crate::routes::SummarizedTaskView;
pub enum Body<T: serde::Serialize> {
NdJsonPayload(File),
Inline(T),
None,
}
impl Body<()> {
pub fn with_ndjson_payload(file: File) -> Self {
Self::NdJsonPayload(file)
}
pub fn none() -> Self {
Self::None
}
}
/// If necessary, proxies the passed request to the network and update the task description.
///
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
/// has already been proxied.
///
/// - when it must proxy the request, the endpoint, method and query params are retrieved from the passed `req`, then the `body` is
/// sent to all remotes of the `network` (except `self`). The response from the remotes are collected to update the passed `task`
/// with the task ids from the task queues of the remotes.
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
/// This information is copied to the passed task.
pub async fn proxy<T: serde::Serialize>(
index_scheduler: &IndexScheduler,
index_uid: &str,
req: &HttpRequest,
network: meilisearch_types::enterprise_edition::network::Network,
body: Body<T>,
task: &meilisearch_types::tasks::Task,
) -> Result<(), MeilisearchHttpError> {
match origin_from_req(req)? {
Some(origin) => {
index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })?
}
None => {
let this = network
.local
.as_deref()
.expect("inconsistent `network.sharding` and `network.self`")
.to_owned();
let content_type = match &body {
// for file bodies, force x-ndjson
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
// otherwise get content type from request
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
};
let body = match body {
Body::NdJsonPayload(file) => Some(Bytes::from_owner(unsafe {
memmap2::Mmap::map(&file).map_err(|err| {
MeilisearchHttpError::from_milli(err.into(), Some(index_uid.to_owned()))
})?
})),
Body::Inline(payload) => {
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
}
Body::None => None,
};
let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_secs(3))
.build()
.unwrap();
let method = from_old_http_method(req.method());
// send payload to all remotes
for (node_name, node) in
network.remotes.into_iter().filter(|(name, _)| name.as_str() != this)
{
let body = body.clone();
let client = client.clone();
let api_key = node.write_api_key;
let this = this.clone();
let method = method.clone();
let path_and_query =
req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
in_flight_remote_queries.insert(
node_name,
tokio::spawn({
let url = format!("{}{}", node.url, path_and_query);
let url_encoded_this = urlencoding::encode(&this).into_owned();
let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize
let content_type = content_type.map(|b| b.to_owned());
let backoff = backoff::ExponentialBackoffBuilder::new()
.with_max_elapsed_time(Some(std::time::Duration::from_secs(25)))
.build();
backoff::future::retry(backoff, move || {
let url = url.clone();
let client = client.clone();
let url_encoded_this = url_encoded_this.clone();
let url_encoded_task_uid = url_encoded_task_uid.clone();
let content_type = content_type.clone();
let body = body.clone();
let api_key = api_key.clone();
let method = method.clone();
async move {
try_proxy(
method,
&url,
content_type.as_deref(),
api_key.as_deref(),
&client,
&url_encoded_this,
&url_encoded_task_uid,
body,
)
.await
}
})
}),
);
}
// wait for all in-flight queries to finish and collect their results
let mut remote_tasks: BTreeMap<String, RemoteTask> = BTreeMap::new();
for (node_name, handle) in in_flight_remote_queries {
match handle.await {
Ok(Ok(res)) => {
let task_uid = res.task_uid;
remote_tasks.insert(node_name, Ok(task_uid).into());
}
Ok(Err(error)) => {
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
}
Err(panic) => match panic.try_into_panic() {
Ok(panic) => {
let msg = match panic.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match panic.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
remote_tasks.insert(
node_name,
Err(ResponseError::from_msg(
msg.to_string(),
meilisearch_types::error::Code::Internal,
))
.into(),
);
}
Err(_) => {
tracing::error!("proxy task was unexpectedly cancelled")
}
},
}
}
// edit details to contain the return values from the remotes
index_scheduler.set_task_network(task.uid, TaskNetwork::Remotes { remote_tasks })?;
}
}
Ok(())
}
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
match method {
&actix_http::Method::CONNECT => reqwest::Method::CONNECT,
&actix_http::Method::DELETE => reqwest::Method::DELETE,
&actix_http::Method::GET => reqwest::Method::GET,
&actix_http::Method::HEAD => reqwest::Method::HEAD,
&actix_http::Method::OPTIONS => reqwest::Method::OPTIONS,
&actix_http::Method::PATCH => reqwest::Method::PATCH,
&actix_http::Method::POST => reqwest::Method::POST,
&actix_http::Method::PUT => reqwest::Method::PUT,
&actix_http::Method::TRACE => reqwest::Method::TRACE,
method => reqwest::Method::from_bytes(method.as_str().as_bytes()).unwrap(),
}
}
#[allow(clippy::too_many_arguments)]
async fn try_proxy(
method: reqwest::Method,
url: &str,
content_type: Option<&[u8]>,
api_key: Option<&str>,
client: &reqwest::Client,
url_encoded_this: &str,
url_encoded_task_uid: &str,
body: Option<Bytes>,
) -> Result<SummarizedTaskView, backoff::Error<ProxyDocumentChangeError>> {
let request = client.request(method, url).timeout(std::time::Duration::from_secs(30));
let request = if let Some(body) = body { request.body(body) } else { request };
let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
let request = request.header(PROXY_ORIGIN_TASK_UID_HEADER, url_encoded_task_uid);
let request = request.header(PROXY_ORIGIN_REMOTE_HEADER, url_encoded_this);
let request = if let Some(content_type) = content_type {
request.header(CONTENT_TYPE.as_str(), content_type)
} else {
request
};
let response = request.send().await;
let response = match response {
Ok(response) => response,
Err(error) if error.is_timeout() => {
return Err(backoff::Error::transient(ProxyDocumentChangeError::Timeout))
}
Err(error) => {
return Err(backoff::Error::transient(ProxyDocumentChangeError::CouldNotSendRequest(
ReqwestErrorWithoutUrl::new(error),
)))
}
};
match response.status() {
status_code if status_code.is_success() => (),
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::AuthenticationError))
}
status_code if status_code.is_client_error() => {
let response = parse_error(response).await;
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::BadRequest {
status_code,
response,
}));
}
status_code if status_code.is_server_error() => {
let response = parse_error(response).await;
return Err(backoff::Error::transient(ProxyDocumentChangeError::RemoteError {
status_code,
response,
}));
}
status_code => {
tracing::warn!(
status_code = status_code.as_u16(),
"remote replied with unexpected status code"
);
}
}
let response = match parse_response(response).await {
Ok(response) => response,
Err(response) => {
return Err(backoff::Error::transient(
ProxyDocumentChangeError::CouldNotParseResponse { response },
))
}
};
Ok(response)
}
async fn parse_error(response: reqwest::Response) -> Result<String, ReqwestErrorWithoutUrl> {
let bytes = match response.bytes().await {
Ok(bytes) => bytes,
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
};
Ok(parse_bytes_as_error(&bytes))
}
fn parse_bytes_as_error(bytes: &[u8]) -> String {
match serde_json::from_slice::<Value>(bytes) {
Ok(value) => value.to_string(),
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
}
}
async fn parse_response<T: DeserializeOwned>(
response: reqwest::Response,
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
let bytes = match response.bytes().await {
Ok(bytes) => bytes,
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
};
match serde_json::from_slice::<T>(&bytes) {
Ok(value) => Ok(value),
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
}
}
mod error {
use meilisearch_types::error::ResponseError;
use reqwest::StatusCode;
#[derive(Debug, thiserror::Error)]
pub enum ProxyDocumentChangeError {
#[error("{0}")]
CouldNotSendRequest(ReqwestErrorWithoutUrl),
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `documents.add` action")]
AuthenticationError,
#[error(
"could not parse response from the remote host as a document addition response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
response_from_remote(response)
)]
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
#[error("remote host did not answer before the deadline")]
Timeout,
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
}
impl ProxyDocumentChangeError {
pub fn as_response_error(&self) -> ResponseError {
use meilisearch_types::error::Code;
let message = self.to_string();
let code = match self {
ProxyDocumentChangeError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
ProxyDocumentChangeError::AuthenticationError => Code::RemoteInvalidApiKey,
ProxyDocumentChangeError::BadRequest { .. } => Code::RemoteBadRequest,
ProxyDocumentChangeError::Timeout => Code::RemoteTimeout,
ProxyDocumentChangeError::RemoteError { .. } => Code::RemoteRemoteError,
ProxyDocumentChangeError::CouldNotParseResponse { .. } => Code::RemoteBadResponse,
};
ResponseError::from_msg(message, code)
}
}
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
impl ReqwestErrorWithoutUrl {
pub fn new(inner: reqwest::Error) -> Self {
Self(inner.without_url())
}
}
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
match response {
Ok(response) => {
format!(":\n - response from remote: {}", response)
}
Err(error) => {
format!(":\n - additionally, could not retrieve response from remote: {error}")
}
}
}
}
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
let (remote_name, task_uid) = match (
req.headers().get(PROXY_ORIGIN_REMOTE_HEADER),
req.headers().get(PROXY_ORIGIN_TASK_UID_HEADER),
) {
(None, None) => return Ok(None),
(None, Some(_)) => {
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
}
(Some(_), None) => {
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
is_remote_missing: false,
})
}
(Some(remote_name), Some(task_uid)) => (
urlencoding::decode(remote_name.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_REMOTE_HEADER,
msg: format!("while parsing remote name as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_REMOTE_HEADER,
msg: format!("while URL-decoding remote name: {err}"),
})?,
urlencoding::decode(task_uid.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while parsing task UID as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while URL-decoding task UID: {err}"),
})?,
),
};
let task_uid: usize =
task_uid.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while parsing the task UID as an integer: {err}"),
})?;
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid }))
}

View File

@@ -29,6 +29,7 @@ use crate::routes::is_dry_run;
use crate::Opt;
pub mod documents;
mod enterprise_edition;
pub mod facet_search;
pub mod search;
mod search_analytics;
@@ -39,6 +40,8 @@ mod settings_analytics;
pub mod similar;
mod similar_analytics;
pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
#[derive(OpenApi)]
#[openapi(
nest(

View File

@@ -184,7 +184,7 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
.is_some_and(|s| s.to_lowercase() == "true"))
}
#[derive(Debug, Serialize, ToSchema)]
#[derive(Debug, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
/// The task unique identifier.
@@ -198,7 +198,10 @@ pub struct SummarizedTaskView {
#[serde(rename = "type")]
kind: Kind,
/// The date on which the task was enqueued.
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
#[serde(
serialize_with = "time::serde::rfc3339::serialize",
deserialize_with = "time::serde::rfc3339::deserialize"
)]
enqueued_at: OffsetDateTime,
}

View File

@@ -7,11 +7,12 @@ use deserr::Deserr;
use index_scheduler::IndexScheduler;
use itertools::{EitherOrBoth, Itertools};
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::enterprise_edition::network::{Network as DbNetwork, Remote as DbRemote};
use meilisearch_types::error::deserr_codes::{
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl,
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
use serde::Serialize;
@@ -57,9 +58,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
{
"self": "ms-0",
"remotes": {
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
@@ -88,9 +89,9 @@ async fn get_network(
#[schema(rename_all = "camelCase")]
pub struct Remote {
#[schema(value_type = Option<String>, example = json!({
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
#[serde(default)]
@@ -99,6 +100,10 @@ pub struct Remote {
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
#[serde(default)]
pub search_api_key: Setting<String>,
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkWriteApiKey>)]
#[serde(default)]
pub write_api_key: Setting<String>,
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
@@ -114,6 +119,10 @@ pub struct Network {
#[serde(default, rename = "self")]
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
pub local: Setting<String>,
#[schema(value_type = Option<bool>, example = json!(true))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
pub sharding: Setting<bool>,
}
impl Remote {
@@ -136,6 +145,7 @@ impl Remote {
Ok(url)
})?,
search_api_key: self.search_api_key.set(),
write_api_key: self.write_api_key.set(),
})
}
}
@@ -174,9 +184,9 @@ impl Aggregate for PatchNetworkAnalytics {
{
"self": "ms-0",
"remotes": {
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
@@ -207,6 +217,19 @@ async fn patch_network(
Setting::NotSet => old_network.local,
};
let merged_sharding = match new_network.sharding {
Setting::Set(new_sharding) => new_sharding,
Setting::Reset => false,
Setting::NotSet => old_network.sharding,
};
if merged_sharding && merged_self.is_none() {
return Err(ResponseError::from_msg(
"`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(),
meilisearch_types::error::Code::InvalidNetworkSharding,
));
}
let merged_remotes = match new_network.remotes {
Setting::Set(new_remotes) => {
let mut merged_remotes = BTreeMap::new();
@@ -217,9 +240,17 @@ async fn patch_network(
{
match either_or_both {
EitherOrBoth::Both((key, old), (_, Some(new))) => {
let DbRemote { url: old_url, search_api_key: old_search_api_key } = old;
let DbRemote {
url: old_url,
search_api_key: old_search_api_key,
write_api_key: old_write_api_key,
} = old;
let Remote { url: new_url, search_api_key: new_search_api_key } = new;
let Remote {
url: new_url,
search_api_key: new_search_api_key,
write_api_key: new_write_api_key,
} = new;
let merged = DbRemote {
url: match new_url {
@@ -247,6 +278,11 @@ async fn patch_network(
Setting::Reset => None,
Setting::NotSet => old_search_api_key,
},
write_api_key: match new_write_api_key {
Setting::Set(new_write_api_key) => Some(new_write_api_key),
Setting::Reset => None,
Setting::NotSet => old_write_api_key,
},
};
merged_remotes.insert(key, merged);
}
@@ -274,7 +310,8 @@ async fn patch_network(
&req,
);
let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes };
let merged_network =
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
index_scheduler.put_network(merged_network.clone())?;
debug!(returns = ?merged_network, "Patch network");
Ok(HttpResponse::Ok().json(merged_network))

View File

@@ -9,8 +9,8 @@ use std::vec::{IntoIter, Vec};
use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures};
use itertools::Itertools;
use meilisearch_types::enterprise_edition::network::{Network, Remote};
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{Network, Remote};
use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
use meilisearch_types::milli::vector::Embedding;

View File

@@ -1,6 +1,6 @@
pub use error::ProxySearchError;
use error::ReqwestErrorWithoutUrl;
use meilisearch_types::features::Remote;
use meilisearch_types::enterprise_edition::network::Remote;
use rand::Rng as _;
use reqwest::{Client, Response, StatusCode};
use serde::de::DeserializeOwned;

View File

@@ -46,7 +46,7 @@ async fn errors_on_param() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `selfie`: expected one of `remotes`, `self`",
"message": "Unknown field `selfie`: expected one of `remotes`, `self`, `sharding`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
@@ -149,7 +149,7 @@ async fn errors_on_param() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`",
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`, `writeApiKey`",
"code": "invalid_network_remotes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
@@ -192,9 +192,11 @@ async fn errors_on_param() {
"remotes": {
"kefir": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
let (response, code) = server
@@ -266,7 +268,8 @@ async fn auth() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -274,11 +277,12 @@ async fn auth() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
}
"###);
{
"self": "master",
"remotes": {},
"sharding": false
}
"###);
// try get with get permission
server.use_api_key(get_network_key.as_str().unwrap());
@@ -286,11 +290,12 @@ async fn auth() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
}
"###);
{
"self": "master",
"remotes": {},
"sharding": false
}
"###);
// try update with update permission
server.use_api_key(update_network_key.as_str().unwrap());
@@ -303,11 +308,12 @@ async fn auth() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "api_key",
"remotes": {}
}
"###);
{
"self": "api_key",
"remotes": {},
"sharding": false
}
"###);
// try with the other's permission
let (response, code) = server.get_network().await;
@@ -383,7 +389,8 @@ async fn get_and_set_network() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": null,
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -393,7 +400,8 @@ async fn get_and_set_network() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "myself",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -417,13 +425,16 @@ async fn get_and_set_network() {
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "foo"
"searchApiKey": "foo",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -443,13 +454,16 @@ async fn get_and_set_network() {
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -470,17 +484,21 @@ async fn get_and_set_network() {
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
},
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -498,13 +516,16 @@ async fn get_and_set_network() {
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -518,13 +539,16 @@ async fn get_and_set_network() {
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -538,13 +562,16 @@ async fn get_and_set_network() {
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -553,60 +580,69 @@ async fn get_and_set_network() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar",
"writeApiKey": null
}
"###);
},
"sharding": false
}
"###);
// still doing nothing
let (response, code) = server.set_network(json!({"remotes": {}})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar",
"writeApiKey": null
}
"###);
},
"sharding": false
}
"###);
// good time to check GET
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar",
"writeApiKey": null
}
"###);
},
"sharding": false
}
"###);
// deleting everything
let (response, code) = server
@@ -619,7 +655,8 @@ async fn get_and_set_network() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
}

View File

@@ -132,7 +132,8 @@ async fn remote_sharding() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -140,7 +141,8 @@ async fn remote_sharding() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
@@ -148,7 +150,8 @@ async fn remote_sharding() {
snapshot!(json_string!(response), @r###"
{
"self": "ms2",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -444,7 +447,8 @@ async fn remote_sharding_retrieve_vectors() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -452,7 +456,8 @@ async fn remote_sharding_retrieve_vectors() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
@@ -460,7 +465,8 @@ async fn remote_sharding_retrieve_vectors() {
snapshot!(json_string!(response), @r###"
{
"self": "ms2",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -934,7 +940,8 @@ async fn error_unregistered_remote() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -942,7 +949,8 @@ async fn error_unregistered_remote() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1052,7 +1060,8 @@ async fn error_no_weighted_score() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1060,7 +1069,8 @@ async fn error_no_weighted_score() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1185,7 +1195,8 @@ async fn error_bad_response() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1193,7 +1204,8 @@ async fn error_bad_response() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1322,7 +1334,8 @@ async fn error_bad_request() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1330,7 +1343,8 @@ async fn error_bad_request() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1452,7 +1466,8 @@ async fn error_bad_request_facets_by_index() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1460,7 +1475,8 @@ async fn error_bad_request_facets_by_index() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1593,7 +1609,8 @@ async fn error_bad_request_facets_by_index_facet() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1601,7 +1618,8 @@ async fn error_bad_request_facets_by_index_facet() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1743,7 +1761,8 @@ async fn error_remote_does_not_answer() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1751,7 +1770,8 @@ async fn error_remote_does_not_answer() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1944,7 +1964,8 @@ async fn error_remote_404() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1952,7 +1973,8 @@ async fn error_remote_404() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -2139,7 +2161,8 @@ async fn error_remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -2147,7 +2170,8 @@ async fn error_remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -2299,7 +2323,8 @@ async fn remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -2307,7 +2332,8 @@ async fn remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -2454,7 +2480,8 @@ async fn error_remote_500() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -2462,7 +2489,8 @@ async fn error_remote_500() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -2633,7 +2661,8 @@ async fn error_remote_500_once() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -2641,7 +2670,8 @@ async fn error_remote_500_once() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);

View File

@@ -43,7 +43,7 @@ async fn version_too_old() {
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.17.1");
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.19.0");
}
#[actix_rt::test]
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.17.2 is higher than the Meilisearch version 1.17.1. Downgrade is not supported");
snapshot!(err, @"Database version 1.19.1 is higher than the Meilisearch version 1.19.0. Downgrade is not supported");
}
#[actix_rt::test]

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.17.1"
"upgradeTo": "v1.19.0"
},
"error": null,
"duration": "[duration]",

View File

@@ -109,6 +109,7 @@ utoipa = { version = "5.4.0", features = [
"openapi_extensions",
] }
lru = "0.14.0"
twox-hash = { version = "2.1.1", default-features = false, features = ["std", "xxhash3_64", "xxhash64"] }
[dev-dependencies]
mimalloc = { version = "0.1.47", default-features = false }

View File

@@ -76,6 +76,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -84,6 +84,7 @@ impl TempIndex {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@@ -167,6 +168,7 @@ impl TempIndex {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@@ -242,6 +244,7 @@ fn aborting_indexation() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -1977,6 +1977,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2029,6 +2030,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2117,6 +2119,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2306,6 +2309,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2369,6 +2373,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2423,6 +2428,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2476,6 +2482,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2531,6 +2538,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2591,6 +2599,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2644,6 +2653,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2697,6 +2707,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2908,6 +2919,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2968,6 +2980,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -3025,6 +3038,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -17,6 +17,7 @@ use super::guess_primary_key::retrieve_or_guess_primary_key;
use crate::documents::PrimaryKey;
use crate::progress::{AtomicPayloadStep, Progress};
use crate::update::new::document::{DocumentContext, Versions};
use crate::update::new::indexer::enterprise_edition::sharding::Shards;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::MostlySend;
use crate::update::new::{DocumentIdentifiers, Insertion, Update};
@@ -71,6 +72,7 @@ impl<'pl> DocumentOperation<'pl> {
new_fields_ids_map: &mut FieldsIdsMap,
must_stop_processing: &MSP,
progress: Progress,
shards: Option<&Shards>,
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
where
MSP: Fn() -> bool,
@@ -107,6 +109,7 @@ impl<'pl> DocumentOperation<'pl> {
&mut bytes,
&docids_version_offsets,
IndexDocumentsMethod::ReplaceDocuments,
shards,
payload,
),
Payload::Update(payload) => extract_addition_payload_changes(
@@ -120,6 +123,7 @@ impl<'pl> DocumentOperation<'pl> {
&mut bytes,
&docids_version_offsets,
IndexDocumentsMethod::UpdateDocuments,
shards,
payload,
),
Payload::Deletion(to_delete) => extract_deletion_payload_changes(
@@ -127,6 +131,7 @@ impl<'pl> DocumentOperation<'pl> {
rtxn,
&mut available_docids,
&docids_version_offsets,
shards,
to_delete,
),
};
@@ -173,6 +178,7 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
bytes: &mut u64,
main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>,
method: IndexDocumentsMethod,
shards: Option<&Shards>,
payload: &'pl [u8],
) -> Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>> {
use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments};
@@ -210,12 +216,20 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
primary_key.as_ref().unwrap()
};
let current_offset = iter.byte_offset();
let content = &payload[previous_offset..current_offset];
previous_offset = current_offset;
let external_id =
retrieved_primary_key.extract_fields_and_docid(doc, new_fields_ids_map, indexer)?;
let external_id = external_id.to_de();
let current_offset = iter.byte_offset();
let document_offset = DocumentOffset { content: &payload[previous_offset..current_offset] };
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
continue;
}
let document_offset = DocumentOffset { content };
match main_docids_version_offsets.get(external_id) {
None => {
@@ -299,8 +313,6 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
},
},
}
previous_offset = iter.byte_offset();
}
if payload.is_empty() {
@@ -329,11 +341,16 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>(
rtxn: &RoTxn,
available_docids: &mut AvailableIds,
main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>,
shards: Option<&Shards>,
to_delete: &'pl [&'pl str],
) -> Result<hashbrown::HashMap<&'s str, PayloadOperations<'pl>>> {
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
for external_id in to_delete {
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
continue;
}
match main_docids_version_offsets.get(external_id) {
None => {
match index.external_documents_ids().get(rtxn, external_id) {

View File

@@ -0,0 +1,6 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
pub mod sharding;

View File

@@ -0,0 +1,22 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::hash::{BuildHasher as _, BuildHasherDefault};
pub struct Shards {
pub own: Vec<String>,
pub others: Vec<String>,
}
impl Shards {
pub fn must_process(&self, docid: &str) -> bool {
let hasher = BuildHasherDefault::<twox_hash::XxHash3_64>::new();
let to_hash = |shard: &String| hasher.hash_one((shard, docid));
let max_hash = self.others.iter().map(to_hash).max().unwrap_or_default();
self.own.iter().map(to_hash).any(|hash| hash > max_hash)
}
}

View File

@@ -31,6 +31,7 @@ pub(crate) mod de;
pub mod document_changes;
mod document_deletion;
mod document_operation;
pub mod enterprise_edition;
mod extract;
mod guess_primary_key;
mod partial_dump;

View File

@@ -8,7 +8,6 @@ use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
use v1_14::Latest_V1_13_To_Latest_V1_14;
use v1_15::Latest_V1_14_To_Latest_V1_15;
use v1_16::Latest_V1_16_To_V1_17_0;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep};
@@ -35,7 +34,6 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
&Latest_V1_13_To_Latest_V1_14 {},
&Latest_V1_14_To_Latest_V1_15 {},
&Latest_V1_15_To_V1_16_0 {},
&Latest_V1_16_To_V1_17_0 {},
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
@@ -64,7 +62,9 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 15, _) => function_index!(6),
(1, 16, _) => function_index!(7),
(1, 17, _) => function_index!(8),
(1, 17, _) => function_index!(7),
(1, 18, _) => function_index!(7),
(1, 19, _) => function_index!(7),
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
// considering dumpless upgrade.
(_major, _minor, _patch) => return None,

View File

@@ -46,22 +46,3 @@ impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
(1, 16, 0)
}
}
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_16_To_V1_17_0();
impl UpgradeIndex for Latest_V1_16_To_V1_17_0 {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 17, 0)
}
}

View File

@@ -59,6 +59,7 @@ fn test_facet_distribution_with_no_facet_values() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -97,6 +97,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -329,6 +329,7 @@ fn criteria_ascdesc() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -138,6 +138,7 @@ fn test_typo_disabled_on_word() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -42,3 +42,5 @@ tracing = "0.1.41"
tracing-subscriber = "0.3.19"
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
uuid = { version = "1.17.0", features = ["v7", "serde"] }
similar-asserts = "1.7.0"
chrono = "0.4"

View File

@@ -1,194 +0,0 @@
use std::collections::BTreeMap;
use std::fmt::Display;
use std::io::Read as _;
use anyhow::{bail, Context as _};
use serde::Deserialize;
use super::assets::{fetch_asset, Asset};
use super::client::{Client, Method};
#[derive(Clone, Deserialize)]
pub struct Command {
pub route: String,
pub method: Method,
#[serde(default)]
pub body: Body,
#[serde(default)]
pub synchronous: SyncMode,
}
#[derive(Default, Clone, Deserialize)]
#[serde(untagged)]
pub enum Body {
Inline {
inline: serde_json::Value,
},
Asset {
asset: String,
},
#[default]
Empty,
}
impl Body {
pub fn get(
self,
assets: &BTreeMap<String, Asset>,
asset_folder: &str,
) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
Ok(match self {
Body::Inline { inline: body } => Some((
serde_json::to_vec(&body)
.context("serializing to bytes")
.context("while getting inline body")?,
"application/json",
)),
Body::Asset { asset: name } => Some({
let context = || format!("while getting body from asset '{name}'");
let (mut file, format) =
fetch_asset(&name, assets, asset_folder).with_context(context)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf).with_context(context)?;
(buf, format.to_content_type(&name))
}),
Body::Empty => None,
})
}
}
impl Display for Command {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
}
}
#[derive(Default, Debug, Clone, Copy, Deserialize)]
pub enum SyncMode {
DontWait,
#[default]
WaitForResponse,
WaitForTask,
}
pub async fn run_batch(
client: &Client,
batch: &[Command],
assets: &BTreeMap<String, Asset>,
asset_folder: &str,
) -> anyhow::Result<()> {
let [.., last] = batch else { return Ok(()) };
let sync = last.synchronous;
let mut tasks = tokio::task::JoinSet::new();
for command in batch {
// FIXME: you probably don't want to copy assets everytime here
tasks.spawn({
let client = client.clone();
let command = command.clone();
let assets = assets.clone();
let asset_folder = asset_folder.to_owned();
async move { run(client, command, &assets, &asset_folder).await }
});
}
while let Some(result) = tasks.join_next().await {
result
.context("panicked while executing command")?
.context("error while executing command")?;
}
match sync {
SyncMode::DontWait => {}
SyncMode::WaitForResponse => {}
SyncMode::WaitForTask => wait_for_tasks(client).await?,
}
Ok(())
}
async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
loop {
let response = client
.get("tasks?statuses=enqueued,processing")
.send()
.await
.context("could not wait for tasks")?;
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response to JSON")
.context("could not wait for tasks")?;
match response.get("total") {
Some(serde_json::Value::Number(number)) => {
let number = number.as_u64().with_context(|| {
format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
})?;
if number == 0 {
break;
} else {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
continue;
}
}
Some(thing_else) => {
bail!(format!(
"waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
))
}
None => {
bail!(format!(
"waiting for tasks: expected response to contain 'total', got '{response}'"
))
}
}
}
Ok(())
}
#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))]
pub async fn run(
client: Client,
mut command: Command,
assets: &BTreeMap<String, Asset>,
asset_folder: &str,
) -> anyhow::Result<()> {
// memtake the body here to leave an empty body in its place, so that command is not partially moved-out
let body = std::mem::take(&mut command.body)
.get(assets, asset_folder)
.with_context(|| format!("while getting body for command {command}"))?;
let request = client.request(command.method.into(), &command.route);
let request = if let Some((body, content_type)) = body {
request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
} else {
request
};
let response =
request.send().await.with_context(|| format!("error sending command: {}", command))?;
let code = response.status();
if code.is_client_error() {
tracing::error!(%command, %code, "error in workload file");
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response as JSON")
.context("parsing error in workload file when sending command")?;
bail!("error in workload file: server responded with error code {code} and '{response}'")
} else if code.is_server_error() {
tracing::error!(%command, %code, "server error");
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response as JSON")
.context("parsing server error when sending command")?;
bail!("server error: server responded with error code {code} and '{response}'")
}
Ok(())
}

View File

@@ -7,9 +7,9 @@ use tokio::task::AbortHandle;
use tracing_trace::processor::span_stats::CallStats;
use uuid::Uuid;
use super::client::Client;
use super::env_info;
use super::workload::Workload;
use super::workload::BenchWorkload;
use crate::common::client::Client;
#[derive(Debug, Clone)]
pub enum DashboardClient {
@@ -89,7 +89,7 @@ impl DashboardClient {
pub async fn create_workload(
&self,
invocation_uuid: Uuid,
workload: &Workload,
workload: &BenchWorkload,
) -> anyhow::Result<Uuid> {
let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };

View File

@@ -1,38 +1,22 @@
mod assets;
mod client;
mod command;
mod dashboard;
mod env_info;
mod meili_process;
mod workload;
use std::io::LineWriter;
use std::path::PathBuf;
use crate::common::args::CommonArgs;
use crate::common::logs::setup_logs;
use crate::common::workload::Workload;
use std::{path::PathBuf, sync::Arc};
use anyhow::Context;
use anyhow::{bail, Context};
use clap::Parser;
use tracing_subscriber::fmt::format::FmtSpan;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Layer;
use self::client::Client;
use self::workload::Workload;
use crate::common::client::Client;
pub use workload::BenchWorkload;
pub fn default_http_addr() -> String {
"127.0.0.1:7700".to_string()
}
pub fn default_report_folder() -> String {
"./bench/reports/".into()
}
pub fn default_asset_folder() -> String {
"./bench/assets/".into()
}
pub fn default_log_filter() -> String {
"info".into()
}
pub fn default_dashboard_url() -> String {
"http://localhost:9001".into()
}
@@ -40,12 +24,13 @@ pub fn default_dashboard_url() -> String {
/// Run benchmarks from a workload
#[derive(Parser, Debug)]
pub struct BenchDeriveArgs {
/// Filename of the workload file, pass multiple filenames
/// to run multiple workloads in the specified order.
///
/// Each workload run will get its own report file.
#[arg(value_name = "WORKLOAD_FILE", last = false)]
workload_file: Vec<PathBuf>,
/// Common arguments shared with other commands
#[command(flatten)]
common: CommonArgs,
/// Meilisearch master keys
#[arg(long)]
pub master_key: Option<String>,
/// URL of the dashboard.
#[arg(long, default_value_t = default_dashboard_url())]
@@ -59,34 +44,14 @@ pub struct BenchDeriveArgs {
#[arg(long, default_value_t = default_report_folder())]
report_folder: String,
/// Directory to store the remote assets.
#[arg(long, default_value_t = default_asset_folder())]
asset_folder: String,
/// Log directives
#[arg(short, long, default_value_t = default_log_filter())]
log_filter: String,
/// Benchmark dashboard API key
#[arg(long)]
api_key: Option<String>,
/// Meilisearch master keys
#[arg(long)]
master_key: Option<String>,
/// Authentication bearer for fetching assets
#[arg(long)]
assets_key: Option<String>,
/// Reason for the benchmark invocation
#[arg(short, long)]
reason: Option<String>,
/// The maximum time in seconds we allow for fetching the task queue before timing out.
#[arg(long, default_value_t = 60)]
tasks_queue_timeout_secs: u64,
/// The path to the binary to run.
///
/// If unspecified, runs `cargo run` after building Meilisearch with `cargo build`.
@@ -95,17 +60,7 @@ pub struct BenchDeriveArgs {
}
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
// setup logs
let filter: tracing_subscriber::filter::Targets =
args.log_filter.parse().context("invalid --log-filter")?;
let subscriber = tracing_subscriber::registry().with(
tracing_subscriber::fmt::layer()
.with_writer(|| LineWriter::new(std::io::stderr()))
.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
.with_filter(filter),
);
tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?;
setup_logs(&args.common.log_filter)?;
// fetch environment and build info
let env = env_info::Environment::generate_from_current_config();
@@ -116,8 +71,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
let _scope = rt.enter();
// setup clients
let assets_client =
Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
let assets_client = Client::new(
None,
args.common.assets_key.as_deref(),
Some(std::time::Duration::from_secs(3600)), // 1h
)?;
let dashboard_client = if args.no_dashboard {
dashboard::DashboardClient::new_dry()
@@ -134,11 +92,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
None,
)?;
let meili_client = Client::new(
let meili_client = Arc::new(Client::new(
Some("http://127.0.0.1:7700".into()),
args.master_key.as_deref(),
Some(std::time::Duration::from_secs(args.tasks_queue_timeout_secs)),
)?;
Some(std::time::Duration::from_secs(args.common.tasks_queue_timeout_secs)),
)?);
// enter runtime
@@ -146,11 +104,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
dashboard_client.send_machine_info(&env).await?;
let commit_message = build_info.commit_msg.unwrap_or_default().split('\n').next().unwrap();
let max_workloads = args.workload_file.len();
let max_workloads = args.common.workload_file.len();
let reason: Option<&str> = args.reason.as_deref();
let invocation_uuid = dashboard_client.create_invocation(build_info.clone(), commit_message, env, max_workloads, reason).await?;
tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
tracing::info!(workload_count = args.common.workload_file.len(), "handling workload files");
// main task
let workload_runs = tokio::spawn(
@@ -158,13 +116,17 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
let dashboard_client = dashboard_client.clone();
let mut dashboard_urls = Vec::new();
async move {
for workload_file in args.workload_file.iter() {
for workload_file in args.common.workload_file.iter() {
let workload: Workload = serde_json::from_reader(
std::fs::File::open(workload_file)
.with_context(|| format!("error opening {}", workload_file.display()))?,
)
.with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
let Workload::Bench(workload) = workload else {
bail!("workload file {} is not a bench workload", workload_file.display());
};
let workload_name = workload.name.clone();
workload::execute(

View File

@@ -1,24 +1,27 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{Seek as _, Write as _};
use std::path::Path;
use std::sync::Arc;
use anyhow::{bail, Context as _};
use futures_util::TryStreamExt as _;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::task::JoinHandle;
use uuid::Uuid;
use super::assets::Asset;
use super::client::Client;
use super::command::SyncMode;
use super::dashboard::DashboardClient;
use super::BenchDeriveArgs;
use crate::bench::{assets, meili_process};
use crate::common::assets::{self, Asset};
use crate::common::client::Client;
use crate::common::command::{run_commands, Command};
use crate::common::process::{self, delete_db, start_meili};
#[derive(Deserialize)]
pub struct Workload {
/// A bench workload.
/// Not to be confused with [a test workload](crate::test::workload::Workload).
#[derive(Serialize, Deserialize, Debug)]
pub struct BenchWorkload {
pub name: String,
pub run_count: u16,
pub extra_cli_args: Vec<String>,
@@ -26,30 +29,33 @@ pub struct Workload {
#[serde(default)]
pub target: String,
#[serde(default)]
pub precommands: Vec<super::command::Command>,
pub commands: Vec<super::command::Command>,
pub precommands: Vec<Command>,
pub commands: Vec<Command>,
}
async fn run_commands(
async fn run_workload_commands(
dashboard_client: &DashboardClient,
logs_client: &Client,
meili_client: &Client,
meili_client: &Arc<Client>,
workload_uuid: Uuid,
workload: &Workload,
workload: &BenchWorkload,
args: &BenchDeriveArgs,
run_number: u16,
) -> anyhow::Result<JoinHandle<anyhow::Result<File>>> {
let report_folder = &args.report_folder;
let workload_name = &workload.name;
let assets = Arc::new(workload.assets.clone());
let asset_folder = args.common.asset_folder.clone().leak();
for batch in workload
.precommands
.as_slice()
.split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
{
super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
.await?;
}
run_commands(
meili_client,
&workload.precommands,
&assets,
asset_folder,
&mut HashMap::new(),
false,
)
.await?;
std::fs::create_dir_all(report_folder)
.with_context(|| format!("could not create report directory at {report_folder}"))?;
@@ -59,14 +65,15 @@ async fn run_commands(
let report_handle = start_report(logs_client, trace_filename, &workload.target).await?;
for batch in workload
.commands
.as_slice()
.split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
{
super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
.await?;
}
run_commands(
meili_client,
&workload.commands,
&assets,
asset_folder,
&mut HashMap::new(),
false,
)
.await?;
let processor =
stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
@@ -81,14 +88,14 @@ pub async fn execute(
assets_client: &Client,
dashboard_client: &DashboardClient,
logs_client: &Client,
meili_client: &Client,
meili_client: &Arc<Client>,
invocation_uuid: Uuid,
master_key: Option<&str>,
workload: Workload,
workload: BenchWorkload,
args: &BenchDeriveArgs,
binary_path: Option<&Path>,
) -> anyhow::Result<()> {
assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
assets::fetch_assets(assets_client, &workload.assets, &args.common.asset_folder).await?;
let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;
@@ -129,38 +136,26 @@ pub async fn execute(
async fn execute_run(
dashboard_client: &DashboardClient,
logs_client: &Client,
meili_client: &Client,
meili_client: &Arc<Client>,
workload_uuid: Uuid,
master_key: Option<&str>,
workload: &Workload,
workload: &BenchWorkload,
args: &BenchDeriveArgs,
binary_path: Option<&Path>,
run_number: u16,
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
meili_process::delete_db();
delete_db().await;
let run_command = match binary_path {
Some(binary_path) => tokio::process::Command::new(binary_path),
None => {
meili_process::build().await?;
let mut command = tokio::process::Command::new("cargo");
command
.arg("run")
.arg("--release")
.arg("-p")
.arg("meilisearch")
.arg("--bin")
.arg("meilisearch")
.arg("--");
command
}
};
let meilisearch = start_meili(
meili_client,
master_key,
&workload.extra_cli_args,
&workload.name,
binary_path,
)
.await?;
let meilisearch =
meili_process::start(meili_client, master_key, workload, &args.asset_folder, run_command)
.await?;
let processor = run_commands(
let processor = run_workload_commands(
dashboard_client,
logs_client,
meili_client,
@@ -171,7 +166,7 @@ async fn execute_run(
)
.await?;
meili_process::kill(meilisearch).await;
process::kill_meili(meilisearch).await;
tracing::info!(run_number, "Successful run");

View File

@@ -0,0 +1,36 @@
use clap::Parser;
use std::path::PathBuf;
pub fn default_asset_folder() -> String {
"./bench/assets/".into()
}
pub fn default_log_filter() -> String {
"info".into()
}
#[derive(Parser, Debug, Clone)]
pub struct CommonArgs {
/// Filename of the workload file, pass multiple filenames
/// to run multiple workloads in the specified order.
///
/// For benches, each workload run will get its own report file.
#[arg(value_name = "WORKLOAD_FILE", last = false)]
pub workload_file: Vec<PathBuf>,
/// Directory to store the remote assets.
#[arg(long, default_value_t = default_asset_folder())]
pub asset_folder: String,
/// Log directives
#[arg(short, long, default_value_t = default_log_filter())]
pub log_filter: String,
/// Authentication bearer for fetching assets
#[arg(long)]
pub assets_key: Option<String>,
/// The maximum time in seconds we allow for fetching the task queue before timing out.
#[arg(long, default_value_t = 60)]
pub tasks_queue_timeout_secs: u64,
}

View File

@@ -3,21 +3,22 @@ use std::io::{Read as _, Seek as _, Write as _};
use anyhow::{bail, Context};
use futures_util::TryStreamExt as _;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use sha2::Digest;
use super::client::Client;
#[derive(Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Asset {
pub local_location: Option<String>,
pub remote_location: Option<String>,
#[serde(default)]
#[serde(default, skip_serializing_if = "AssetFormat::is_default")]
pub format: AssetFormat,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sha256: Option<String>,
}
#[derive(Deserialize, Default, Copy, Clone)]
#[derive(Serialize, Deserialize, Default, Copy, Clone, Debug)]
pub enum AssetFormat {
#[default]
Auto,
@@ -27,6 +28,10 @@ pub enum AssetFormat {
}
impl AssetFormat {
fn is_default(&self) -> bool {
matches!(self, AssetFormat::Auto)
}
pub fn to_content_type(self, filename: &str) -> &'static str {
match self {
AssetFormat::Auto => Self::auto_detect(filename).to_content_type(filename),
@@ -166,7 +171,14 @@ fn check_sha256(name: &str, asset: &Asset, mut file: std::fs::File) -> anyhow::R
}
}
None => {
tracing::warn!(sha256 = file_hash, "Skipping hash for asset {name} that doesn't have one. Please add it to workload file");
let msg = match name.starts_with("meilisearch-v") {
true => "Please add it to xtask/src/test/versions.rs",
false => "Please add it to workload file",
};
tracing::warn!(
sha256 = file_hash,
"Skipping hash for asset {name} that doesn't have one. {msg}"
);
true
}
})

View File

@@ -1,5 +1,5 @@
use anyhow::Context;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone)]
pub struct Client {
@@ -61,7 +61,7 @@ impl Client {
}
}
#[derive(Debug, Clone, Copy, Deserialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum Method {
Get,

View File

@@ -0,0 +1,398 @@
use std::collections::{BTreeMap, HashMap};
use std::fmt::Display;
use std::io::Read as _;
use std::sync::Arc;
use anyhow::{bail, Context as _};
use reqwest::StatusCode;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use similar_asserts::SimpleDiff;
use crate::common::assets::{fetch_asset, Asset};
use crate::common::client::{Client, Method};
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct Command {
pub route: String,
pub method: Method,
#[serde(default)]
pub body: Body,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub expected_status: Option<u16>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub expected_response: Option<serde_json::Value>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub register: HashMap<String, String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub api_key_variable: Option<String>,
#[serde(default)]
pub synchronous: SyncMode,
}
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
#[serde(untagged)]
pub enum Body {
Inline {
inline: serde_json::Value,
},
Asset {
asset: String,
},
#[default]
Empty,
}
impl Body {
pub fn get(
self,
assets: &BTreeMap<String, Asset>,
registered: &HashMap<String, Value>,
asset_folder: &str,
) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
Ok(match self {
Body::Inline { inline: mut body } => {
fn insert_variables(value: &mut Value, registered: &HashMap<String, Value>) {
match value {
Value::Null | Value::Bool(_) | Value::Number(_) => (),
Value::String(s) => {
if s.starts_with("{{") && s.ends_with("}}") {
let name = s[2..s.len() - 2].trim();
if let Some(replacement) = registered.get(name) {
*value = replacement.clone();
}
}
}
Value::Array(values) => {
for value in values {
insert_variables(value, registered);
}
}
Value::Object(map) => {
for (_key, value) in map.iter_mut() {
insert_variables(value, registered);
}
}
}
}
if !registered.is_empty() {
insert_variables(&mut body, registered);
}
Some((
serde_json::to_vec(&body)
.context("serializing to bytes")
.context("while getting inline body")?,
"application/json",
))
}
Body::Asset { asset: name } => Some({
let context = || format!("while getting body from asset '{name}'");
let (mut file, format) =
fetch_asset(&name, assets, asset_folder).with_context(context)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf).with_context(context)?;
(buf, format.to_content_type(&name))
}),
Body::Empty => None,
})
}
}
impl Display for Command {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
}
}
#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
pub enum SyncMode {
DontWait,
#[default]
WaitForResponse,
WaitForTask,
}
async fn run_batch(
client: &Arc<Client>,
batch: &[Command],
assets: &Arc<BTreeMap<String, Asset>>,
asset_folder: &'static str,
registered: &mut HashMap<String, Value>,
return_response: bool,
) -> anyhow::Result<Vec<(Value, StatusCode)>> {
let [.., last] = batch else { return Ok(Vec::new()) };
let sync = last.synchronous;
let batch_len = batch.len();
let mut tasks = Vec::with_capacity(batch.len());
for command in batch.iter().cloned() {
let client2 = Arc::clone(client);
let assets2 = Arc::clone(assets);
let needs_response = return_response || !command.register.is_empty();
let registered2 = registered.clone(); // FIXME: cloning the whole map for each command is inefficient
tasks.push(tokio::spawn(async move {
run(&client2, &command, &assets2, registered2, asset_folder, needs_response).await
}));
}
let mut outputs = Vec::with_capacity(if return_response { batch_len } else { 0 });
for (task, command) in tasks.into_iter().zip(batch.iter()) {
let output = task.await.context("task panicked")??;
if let Some(output) = output {
for (name, path) in &command.register {
let value = output
.0
.pointer(path)
.with_context(|| format!("could not find path '{path}' in response (required to register '{name}')"))?
.clone();
registered.insert(name.clone(), value);
}
if return_response {
outputs.push(output);
}
}
}
match sync {
SyncMode::DontWait => {}
SyncMode::WaitForResponse => {}
SyncMode::WaitForTask => wait_for_tasks(client).await?,
}
Ok(outputs)
}
async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
loop {
let response = client
.get("tasks?statuses=enqueued,processing")
.send()
.await
.context("could not wait for tasks")?;
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response to JSON")
.context("could not wait for tasks")?;
match response.get("total") {
Some(serde_json::Value::Number(number)) => {
let number = number.as_u64().with_context(|| {
format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
})?;
if number == 0 {
break;
} else {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
continue;
}
}
Some(thing_else) => {
bail!(format!(
"waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
))
}
None => {
bail!(format!(
"waiting for tasks: expected response to contain 'total', got '{response}'"
))
}
}
}
Ok(())
}
fn json_eq_ignore(reference: &Value, value: &Value) -> bool {
match reference {
Value::Null | Value::Bool(_) | Value::Number(_) => reference == value,
Value::String(s) => (s.starts_with('[') && s.ends_with(']')) || reference == value,
Value::Array(values) => match value {
Value::Array(other_values) => {
if values.len() != other_values.len() {
return false;
}
for (value, other_value) in values.iter().zip(other_values.iter()) {
if !json_eq_ignore(value, other_value) {
return false;
}
}
true
}
_ => false,
},
Value::Object(map) => match value {
Value::Object(other_map) => {
if map.len() != other_map.len() {
return false;
}
for (key, value) in map.iter() {
match other_map.get(key) {
Some(other_value) => {
if !json_eq_ignore(value, other_value) {
return false;
}
}
None => return false,
}
}
true
}
_ => false,
},
}
}
#[tracing::instrument(skip(client, command, assets, registered, asset_folder), fields(command = %command))]
pub async fn run(
client: &Client,
command: &Command,
assets: &BTreeMap<String, Asset>,
registered: HashMap<String, Value>,
asset_folder: &str,
return_value: bool,
) -> anyhow::Result<Option<(Value, StatusCode)>> {
// Try to replace variables in the route
let mut route = &command.route;
let mut owned_route;
if !registered.is_empty() {
while let (Some(pos1), Some(pos2)) = (route.find("{{"), route.rfind("}}")) {
if pos2 > pos1 {
let name = route[pos1 + 2..pos2].trim();
if let Some(replacement) = registered.get(name).and_then(|r| r.as_str()) {
let mut new_route = String::new();
new_route.push_str(&route[..pos1]);
new_route.push_str(replacement);
new_route.push_str(&route[pos2 + 2..]);
owned_route = new_route;
route = &owned_route;
continue;
}
}
break;
}
}
// memtake the body here to leave an empty body in its place, so that command is not partially moved-out
let body = command
.body
.clone()
.get(assets, &registered, asset_folder)
.with_context(|| format!("while getting body for command {command}"))?;
let mut request = client.request(command.method.into(), route);
// Replace the api key
if let Some(var_name) = &command.api_key_variable {
if let Some(api_key) = registered.get(var_name).and_then(|v| v.as_str()) {
request = request.header("Authorization", format!("Bearer {api_key}"));
} else {
bail!("could not find API key variable '{var_name}' in registered values");
}
}
let request = if let Some((body, content_type)) = body {
request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
} else {
request
};
let response =
request.send().await.with_context(|| format!("error sending command: {}", command))?;
let code = response.status();
if !return_value {
if let Some(expected_status) = command.expected_status {
if code.as_u16() != expected_status {
let response = response
.text()
.await
.context("could not read response body as text")
.context("reading response body when checking expected status")?;
bail!("unexpected status code: got {}, expected {expected_status}, response body: '{response}'", code.as_u16());
}
} else if code.is_client_error() {
tracing::error!(%command, %code, "error in workload file");
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response as JSON")
.context("parsing error in workload file when sending command")?;
bail!(
"error in workload file: server responded with error code {code} and '{response}'"
)
} else if code.is_server_error() {
tracing::error!(%command, %code, "server error");
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response as JSON")
.context("parsing server error when sending command")?;
bail!("server error: server responded with error code {code} and '{response}'")
}
}
if let Some(expected_response) = &command.expected_response {
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response as JSON")
.context("parsing response when checking expected response")?;
if return_value {
return Ok(Some((response, code)));
}
if !json_eq_ignore(expected_response, &response) {
let expected_pretty = serde_json::to_string_pretty(expected_response)
.context("serializing expected response as pretty JSON")?;
let response_pretty = serde_json::to_string_pretty(&response)
.context("serializing response as pretty JSON")?;
let diff = SimpleDiff::from_str(&expected_pretty, &response_pretty, "expected", "got");
bail!("unexpected response:\n{diff}");
}
} else if return_value {
let response: serde_json::Value = response
.json()
.await
.context("could not deserialize response as JSON")
.context("parsing response when recording expected response")?;
return Ok(Some((response, code)));
}
Ok(None)
}
pub async fn run_commands(
client: &Arc<Client>,
commands: &[Command],
assets: &Arc<BTreeMap<String, Asset>>,
asset_folder: &'static str,
registered: &mut HashMap<String, Value>,
return_response: bool,
) -> anyhow::Result<Vec<(Value, StatusCode)>> {
let mut responses = Vec::new();
for batch in
commands.split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
{
let mut new_responses =
run_batch(client, batch, assets, asset_folder, registered, return_response).await?;
responses.append(&mut new_responses);
}
Ok(responses)
}
pub fn health_command() -> Command {
Command {
route: "/health".into(),
method: crate::common::client::Method::Get,
body: Default::default(),
register: HashMap::new(),
synchronous: SyncMode::WaitForResponse,
expected_status: None,
expected_response: None,
api_key_variable: None,
}
}

View File

@@ -0,0 +1,18 @@
use anyhow::Context;
use std::io::LineWriter;
use tracing_subscriber::{fmt::format::FmtSpan, layer::SubscriberExt, Layer};
pub fn setup_logs(log_filter: &str) -> anyhow::Result<()> {
let filter: tracing_subscriber::filter::Targets =
log_filter.parse().context("invalid --log-filter")?;
let subscriber = tracing_subscriber::registry().with(
tracing_subscriber::fmt::layer()
.with_writer(|| LineWriter::new(std::io::stderr()))
.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
.with_filter(filter),
);
tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?;
Ok(())
}

View File

@@ -0,0 +1,7 @@
pub mod args;
pub mod assets;
pub mod client;
pub mod command;
pub mod logs;
pub mod process;
pub mod workload;

View File

@@ -1,18 +1,18 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::path::Path;
use std::time::Duration;
use anyhow::{bail, Context as _};
use tokio::process::Command;
use tokio::process::Command as TokioCommand;
use tokio::time;
use super::assets::Asset;
use super::client::Client;
use super::workload::Workload;
use crate::common::client::Client;
use crate::common::command::{health_command, run as run_command};
pub async fn kill(mut meilisearch: tokio::process::Child) {
pub async fn kill_meili(mut meilisearch: tokio::process::Child) {
let Some(id) = meilisearch.id() else { return };
match Command::new("kill").args(["--signal=TERM", &id.to_string()]).spawn() {
match TokioCommand::new("kill").args(["--signal=TERM", &id.to_string()]).spawn() {
Ok(mut cmd) => {
let Err(error) = cmd.wait().await else { return };
tracing::warn!(
@@ -49,8 +49,8 @@ pub async fn kill(mut meilisearch: tokio::process::Child) {
}
#[tracing::instrument]
pub async fn build() -> anyhow::Result<()> {
let mut command = Command::new("cargo");
async fn build() -> anyhow::Result<()> {
let mut command = TokioCommand::new("cargo");
command.arg("build").arg("--release").arg("-p").arg("meilisearch");
command.kill_on_drop(true);
@@ -64,29 +64,61 @@ pub async fn build() -> anyhow::Result<()> {
Ok(())
}
#[tracing::instrument(skip(client, master_key, workload), fields(workload = workload.name))]
pub async fn start(
#[tracing::instrument(skip(client, master_key), fields(workload = _workload))]
pub async fn start_meili(
client: &Client,
master_key: Option<&str>,
workload: &Workload,
asset_folder: &str,
mut command: Command,
extra_cli_args: &[String],
_workload: &str,
binary_path: Option<&Path>,
) -> anyhow::Result<tokio::process::Child> {
let mut command = match binary_path {
Some(binary_path) => tokio::process::Command::new(binary_path),
None => {
build().await?;
let mut command = tokio::process::Command::new("cargo");
command
.arg("run")
.arg("--release")
.arg("-p")
.arg("meilisearch")
.arg("--bin")
.arg("meilisearch")
.arg("--");
command
}
};
command.arg("--db-path").arg("./_xtask_benchmark.ms");
if let Some(master_key) = master_key {
command.arg("--master-key").arg(master_key);
}
command.arg("--experimental-enable-logs-route");
for extra_arg in workload.extra_cli_args.iter() {
for extra_arg in extra_cli_args.iter() {
command.arg(extra_arg);
}
command.kill_on_drop(true);
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Some(binary_path) = binary_path {
let mut perms = tokio::fs::metadata(binary_path)
.await
.with_context(|| format!("could not get metadata for {binary_path:?}"))?
.permissions();
perms.set_mode(perms.mode() | 0o111);
tokio::fs::set_permissions(binary_path, perms)
.await
.with_context(|| format!("could not set permissions for {binary_path:?}"))?;
}
}
let mut meilisearch = command.spawn().context("Error starting Meilisearch")?;
wait_for_health(client, &mut meilisearch, &workload.assets, asset_folder).await?;
wait_for_health(client, &mut meilisearch).await?;
Ok(meilisearch)
}
@@ -94,11 +126,11 @@ pub async fn start(
async fn wait_for_health(
client: &Client,
meilisearch: &mut tokio::process::Child,
assets: &BTreeMap<String, Asset>,
asset_folder: &str,
) -> anyhow::Result<()> {
for i in 0..100 {
let res = super::command::run(client.clone(), health_command(), assets, asset_folder).await;
let res =
run_command(client, &health_command(), &BTreeMap::new(), HashMap::new(), "", false)
.await;
if res.is_ok() {
// check that this is actually the current Meilisearch instance that answered us
if let Some(exit_code) =
@@ -122,15 +154,6 @@ async fn wait_for_health(
bail!("meilisearch is not responding")
}
fn health_command() -> super::command::Command {
super::command::Command {
route: "/health".into(),
method: super::client::Method::Get,
body: Default::default(),
synchronous: super::command::SyncMode::WaitForResponse,
}
}
pub fn delete_db() {
let _ = std::fs::remove_dir_all("./_xtask_benchmark.ms");
pub async fn delete_db() {
let _ = tokio::fs::remove_dir_all("./_xtask_benchmark.ms").await;
}

View File

@@ -0,0 +1,11 @@
use serde::{Deserialize, Serialize};
use crate::{bench::BenchWorkload, test::TestWorkload};
#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type")]
#[serde(rename_all = "camelCase")]
pub enum Workload {
Bench(BenchWorkload),
Test(TestWorkload),
}

View File

@@ -1 +1,3 @@
pub mod bench;
pub mod common;
pub mod test;

View File

@@ -1,7 +1,7 @@
use std::collections::HashSet;
use clap::Parser;
use xtask::bench::BenchDeriveArgs;
use xtask::{bench::BenchDeriveArgs, test::TestDeriveArgs};
/// List features available in the workspace
#[derive(Parser, Debug)]
@@ -20,6 +20,7 @@ struct ListFeaturesDeriveArgs {
enum Command {
ListFeatures(ListFeaturesDeriveArgs),
Bench(BenchDeriveArgs),
Test(TestDeriveArgs),
}
fn main() -> anyhow::Result<()> {
@@ -27,6 +28,7 @@ fn main() -> anyhow::Result<()> {
match args {
Command::ListFeatures(args) => list_features(args),
Command::Bench(args) => xtask::bench::run(args)?,
Command::Test(args) => xtask::test::run(args)?,
}
Ok(())
}

View File

@@ -0,0 +1,100 @@
use std::{sync::Arc, time::Duration};
use crate::{
common::{
args::CommonArgs, client::Client, command::SyncMode, logs::setup_logs, workload::Workload,
},
test::workload::CommandOrUpgrade,
};
use anyhow::{bail, Context};
use clap::Parser;
mod versions;
mod workload;
pub use workload::TestWorkload;
/// Run tests from a workload
#[derive(Parser, Debug)]
pub struct TestDeriveArgs {
/// Common arguments shared with other commands
#[command(flatten)]
common: CommonArgs,
/// Enables workloads to be rewritten in place to update expected responses.
#[arg(short, long, default_value_t = false)]
pub update_responses: bool,
/// Enables workloads to be rewritten in place to add missing expected responses.
#[arg(short, long, default_value_t = false)]
pub add_missing_responses: bool,
}
pub fn run(args: TestDeriveArgs) -> anyhow::Result<()> {
let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?;
let _scope = rt.enter();
rt.block_on(async { run_inner(args).await })?;
Ok(())
}
async fn run_inner(args: TestDeriveArgs) -> anyhow::Result<()> {
setup_logs(&args.common.log_filter)?;
// setup clients
let assets_client = Arc::new(Client::new(
None,
args.common.assets_key.as_deref(),
Some(Duration::from_secs(3600)), // 1h
)?);
let meili_client = Arc::new(Client::new(
Some("http://127.0.0.1:7700".into()),
Some("masterKey"),
Some(Duration::from_secs(args.common.tasks_queue_timeout_secs)),
)?);
let asset_folder = args.common.asset_folder.clone().leak();
for workload_file in &args.common.workload_file {
let string = tokio::fs::read_to_string(workload_file)
.await
.with_context(|| format!("error reading {}", workload_file.display()))?;
let workload: Workload = serde_json::from_str(string.trim())
.with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
let Workload::Test(workload) = workload else {
bail!("workload file {} is not a test workload", workload_file.display());
};
let has_upgrade =
workload.commands.iter().any(|c| matches!(c, CommandOrUpgrade::Upgrade { .. }));
let has_faulty_register = workload.commands.iter().any(|c| {
matches!(c, CommandOrUpgrade::Command(cmd) if cmd.synchronous == SyncMode::DontWait && !cmd.register.is_empty())
});
if has_faulty_register {
bail!("workload {} contains commands that register values but are marked as --dont-wait. This is not supported because we cannot guarantee the value will be registered before the next command runs.", workload.name);
}
let name = workload.name.clone();
match workload.run(&args, &assets_client, &meili_client, asset_folder).await {
Ok(_) => {
match args.update_responses {
true => println!("🛠️ Workload {name} was updated"),
false => println!("✅ Workload {name} passed"),
}
if !has_upgrade {
println!("⚠️ Warning: this workload doesn't contain an upgrade. The whole point of these tests is to test upgrades! Please add one.");
}
}
Err(error) => {
println!("❌ Workload {name} failed: {error}");
println!("💡 Is this intentional? If so, rerun with --update-responses to update the workload files.");
return Err(error);
}
}
}
Ok(())
}

View File

@@ -0,0 +1,197 @@
use std::{collections::BTreeMap, fmt::Display, path::PathBuf};
use crate::common::assets::{Asset, AssetFormat};
use anyhow::Context;
use cargo_metadata::semver::Version;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug)]
pub enum VersionOrLatest {
Version(Version),
Latest,
}
impl<'a> Deserialize<'a> for VersionOrLatest {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'a>,
{
let s: &str = Deserialize::deserialize(deserializer)?;
if s.eq_ignore_ascii_case("latest") {
Ok(VersionOrLatest::Latest)
} else {
let version = Version::parse(s).map_err(serde::de::Error::custom)?;
Ok(VersionOrLatest::Version(version))
}
}
}
impl Serialize for VersionOrLatest {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
VersionOrLatest::Version(v) => serializer.serialize_str(&v.to_string()),
VersionOrLatest::Latest => serializer.serialize_str("latest"),
}
}
}
impl VersionOrLatest {
pub fn binary_path(&self, asset_folder: &str) -> anyhow::Result<Option<PathBuf>> {
match self {
VersionOrLatest::Version(version) => {
let mut asset_folder: PathBuf =
asset_folder.parse().context("parsing asset folder")?;
let arch = get_arch()?;
let local_filename = format!("meilisearch-{version}-{arch}");
asset_folder.push(local_filename);
Ok(Some(asset_folder))
}
VersionOrLatest::Latest => Ok(None),
}
}
}
impl Display for VersionOrLatest {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
VersionOrLatest::Version(v) => v.fmt(f),
VersionOrLatest::Latest => write!(f, "latest"),
}
}
}
async fn get_sha256(version: &Version, asset_name: &str) -> anyhow::Result<String> {
// If version is lower than 1.15 there is no point in trying to get the sha256, GitHub didn't support it
if *version < Version::parse("1.15.0")? {
anyhow::bail!("version is lower than 1.15, sha256 not available");
}
#[derive(Deserialize)]
struct GithubReleaseAsset {
name: String,
digest: Option<String>,
}
#[derive(Deserialize)]
struct GithubRelease {
assets: Vec<GithubReleaseAsset>,
}
let url =
format!("https://api.github.com/repos/meilisearch/meilisearch/releases/tags/v{version}");
let client = reqwest::Client::builder()
.user_agent("Meilisearch bench xtask")
.build()
.context("failed to build reqwest client")?;
let body = client.get(url).send().await?.text().await?;
let data: GithubRelease = serde_json::from_str(&body)?;
let digest = data
.assets
.into_iter()
.find(|asset| asset.name.as_str() == asset_name)
.with_context(|| format!("asset {asset_name} not found in release v{version}"))?
.digest
.with_context(|| format!("asset {asset_name} has no digest"))?;
let sha256 =
digest.strip_prefix("sha256:").map(|s| s.to_string()).context("invalid sha256 format")?;
Ok(sha256)
}
pub fn get_arch() -> anyhow::Result<&'static str> {
let arch;
// linux-aarch64
#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
{
arch = "linux-aarch64";
}
// linux-amd64
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
{
arch = "linux-amd64";
}
// macos-amd64
#[cfg(all(target_os = "macos", target_arch = "x86_64"))]
{
arch = "macos-amd64";
}
// macos-apple-silicon
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
{
arch = "macos-apple-silicon";
}
// windows-amd64
#[cfg(all(target_os = "windows", target_arch = "x86_64"))]
{
arch = "windows-amd64";
}
if arch.is_empty() {
anyhow::bail!("unsupported platform");
}
Ok(arch)
}
async fn add_asset(assets: &mut BTreeMap<String, Asset>, version: &Version) -> anyhow::Result<()> {
let arch = get_arch()?;
let local_filename = format!("meilisearch-{version}-{arch}");
if assets.contains_key(&local_filename) {
return Ok(());
}
let filename = format!("meilisearch-{arch}");
// Try to get the sha256 but it may fail if Github is rate limiting us
// We hardcode some values to speed up tests and avoid hitting Github
// Also, versions prior to 1.15 don't have sha256 available anyway
let sha256 = match local_filename.as_str() {
"meilisearch-1.12.0-macos-apple-silicon" => {
Some(String::from("3b384707a5df9edf66f9157f0ddb70dcd3ac84d4887149169cf93067d06717b7"))
}
_ => match get_sha256(version, &filename).await {
Ok(sha256) => Some(sha256),
Err(err) => {
tracing::warn!("failed to get sha256 for version {version}: {err}");
None
}
},
};
let url = format!(
"https://github.com/meilisearch/meilisearch/releases/download/v{version}/{filename}"
);
let asset = Asset {
local_location: Some(local_filename.clone()),
remote_location: Some(url),
format: AssetFormat::Raw,
sha256,
};
assets.insert(local_filename, asset);
Ok(())
}
pub async fn expand_assets_with_versions(
assets: &mut BTreeMap<String, Asset>,
versions: &[Version],
) -> anyhow::Result<()> {
for version in versions {
add_asset(assets, version).await?;
}
Ok(())
}

View File

@@ -0,0 +1,201 @@
use anyhow::Context;
use cargo_metadata::semver::Version;
use chrono::DateTime;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::{
collections::{BTreeMap, HashMap},
io::Write,
sync::Arc,
};
use crate::{
common::{
assets::{fetch_assets, Asset},
client::Client,
command::{run_commands, Command},
process::{self, delete_db, kill_meili},
workload::Workload,
},
test::{
versions::{expand_assets_with_versions, VersionOrLatest},
TestDeriveArgs,
},
};
#[derive(Serialize, Deserialize, Debug)]
#[serde(untagged)]
#[allow(clippy::large_enum_variant)]
pub enum CommandOrUpgrade {
Command(Command),
Upgrade { upgrade: VersionOrLatest },
}
enum CommandOrUpgradeVec<'a> {
Commands(Vec<&'a mut Command>),
Upgrade(VersionOrLatest),
}
fn produce_reference_value(value: &mut Value) {
match value {
Value::Null | Value::Bool(_) | Value::Number(_) => (),
Value::String(string) => {
if DateTime::parse_from_rfc3339(string.as_str()).is_ok() {
*string = String::from("[timestamp]");
} else if uuid::Uuid::parse_str(string).is_ok() {
*string = String::from("[uuid]");
}
}
Value::Array(values) => {
for value in values {
produce_reference_value(value);
}
}
Value::Object(map) => {
for (key, value) in map.iter_mut() {
match key.as_str() {
"processingTimeMs" => {
*value = Value::String(String::from("[duration]"));
continue;
}
_ => produce_reference_value(value),
}
}
}
}
}
/// A test workload.
/// Not to be confused with [a bench workload](crate::bench::workload::Workload).
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct TestWorkload {
pub name: String,
pub initial_version: Version,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub master_key: Option<String>,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub assets: BTreeMap<String, Asset>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub commands: Vec<CommandOrUpgrade>,
}
impl TestWorkload {
pub async fn run(
mut self,
args: &TestDeriveArgs,
assets_client: &Client,
meili_client: &Arc<Client>,
asset_folder: &'static str,
) -> anyhow::Result<()> {
// Group commands between upgrades
let mut commands_or_upgrade = Vec::new();
let mut current_commands = Vec::new();
let mut all_versions = vec![self.initial_version.clone()];
for command_or_upgrade in &mut self.commands {
match command_or_upgrade {
CommandOrUpgrade::Command(command) => current_commands.push(command),
CommandOrUpgrade::Upgrade { upgrade } => {
if !current_commands.is_empty() {
commands_or_upgrade.push(CommandOrUpgradeVec::Commands(current_commands));
current_commands = Vec::new();
}
commands_or_upgrade.push(CommandOrUpgradeVec::Upgrade(upgrade.clone()));
if let VersionOrLatest::Version(upgrade) = upgrade {
all_versions.push(upgrade.clone());
}
}
}
}
if !current_commands.is_empty() {
commands_or_upgrade.push(CommandOrUpgradeVec::Commands(current_commands));
}
// Fetch assets
expand_assets_with_versions(&mut self.assets, &all_versions).await?;
fetch_assets(assets_client, &self.assets, &args.common.asset_folder).await?;
// Run server
delete_db().await;
let binary_path = VersionOrLatest::Version(self.initial_version.clone())
.binary_path(&args.common.asset_folder)?;
let mut process = process::start_meili(
meili_client,
Some("masterKey"),
&[],
&self.name,
binary_path.as_deref(),
)
.await?;
let assets = Arc::new(self.assets.clone());
let return_responses = dbg!(args.add_missing_responses || args.update_responses);
let mut registered = HashMap::new();
for command_or_upgrade in commands_or_upgrade {
match command_or_upgrade {
CommandOrUpgradeVec::Commands(commands) => {
let cloned: Vec<_> = commands.iter().map(|c| (*c).clone()).collect();
let responses = run_commands(
meili_client,
&cloned,
&assets,
asset_folder,
&mut registered,
return_responses,
)
.await?;
if return_responses {
assert_eq!(responses.len(), cloned.len());
for (command, (mut response, status)) in commands.into_iter().zip(responses)
{
if args.update_responses
|| (args.add_missing_responses
&& command.expected_response.is_none())
{
produce_reference_value(&mut response);
command.expected_response = Some(response);
command.expected_status = Some(status.as_u16());
}
}
}
}
CommandOrUpgradeVec::Upgrade(version) => {
kill_meili(process).await;
let binary_path = version.binary_path(&args.common.asset_folder)?;
process = process::start_meili(
meili_client,
Some("masterKey"),
&[String::from("--experimental-dumpless-upgrade")],
&self.name,
binary_path.as_deref(),
)
.await?;
tracing::info!("Upgraded to {version}");
}
}
}
// Write back the workload if needed
if return_responses {
// Filter out the assets we added for the versions
self.assets.retain(|_, asset| {
asset.local_location.as_ref().is_none_or(|a| !a.starts_with("meilisearch-"))
});
let workload = Workload::Test(self);
let mut file =
std::fs::File::create(&args.common.workload_file[0]).with_context(|| {
format!("could not open {}", args.common.workload_file[0].display())
})?;
serde_json::to_writer_pretty(&file, &workload).with_context(|| {
format!("could not write to {}", args.common.workload_file[0].display())
})?;
file.write_all(b"\n").with_context(|| {
format!("could not write to {}", args.common.workload_file[0].display())
})?;
tracing::info!("Updated workload file {}", args.common.workload_file[0].display());
}
Ok(())
}
}

View File

@@ -1,5 +1,6 @@
{
"name": "movies-subset-hf-embeddings",
"type": "bench",
"run_count": 5,
"extra_cli_args": [
"--max-indexing-threads=4"

View File

@@ -1,5 +1,6 @@
{
"name": "settings-add-embeddings-hf",
"type": "bench",
"run_count": 5,
"extra_cli_args": [
"--max-indexing-threads=4"

View File

@@ -1,5 +1,6 @@
{
"name": "hackernews.add_new_documents",
"type": "bench",
"run_count": 3,
"extra_cli_args": [],
"assets": {

View File

@@ -1,5 +1,6 @@
{
"name": "hackernews.ndjson_1M_ignore_first_100k",
"type": "bench",
"run_count": 3,
"extra_cli_args": [],
"assets": {

View File

@@ -1,5 +1,6 @@
{
"name": "hackernews.modify_facet_numbers",
"type": "bench",
"run_count": 3,
"extra_cli_args": [],
"assets": {

View File

@@ -1,5 +1,6 @@
{
"name": "hackernews.modify_facet_strings",
"type": "bench",
"run_count": 3,
"extra_cli_args": [],
"assets": {

View File

@@ -1,5 +1,6 @@
{
"name": "hackernews.modify_searchables",
"type": "bench",
"run_count": 3,
"extra_cli_args": [],
"assets": {

Some files were not shown because too many files have changed in this diff Show More