Compare commits

...

274 Commits

Author SHA1 Message Date
Kerollmops
ac428b5d7c Disallow todos from the Clippy CI 2025-12-15 13:36:47 +01:00
Kerollmops
1291990f7d Fix actix payload error handling 2025-12-15 13:36:46 +01:00
Clément Renault
2b6b4284bb Merge pull request #6000 from meilisearch/change-network-topology-2
Allow changing network topology
2025-12-15 11:09:56 +00:00
Louis Dureuil
018cad1781 add batch reason 2025-12-15 11:06:25 +01:00
Louis Dureuil
65944df325 Address issue where old tasks where handled with new network 2025-12-11 17:47:51 +01:00
Louis Dureuil
ed3cb36dca Update migration test to check that the migrated network version is nil UUID 2025-12-11 15:51:55 +01:00
Louis Dureuil
316998ce97 upgrade the network to a nil version to make sure that all upgradees have the same version 2025-12-11 15:46:09 +01:00
Louis Dureuil
2ad094e95d Include remote name in export logs 2025-12-11 14:21:36 +01:00
Louis Dureuil
f1c0ebab5b add declarative tests for network 2025-12-11 14:06:08 +01:00
Louis Dureuil
59fe64adec Fix dump tests following breaking change 2025-12-11 09:34:21 +01:00
Louis Dureuil
7d22a6eb3a cargo formattoclippy 2025-12-11 09:10:17 +01:00
Louis Dureuil
9cf91f3ffe drop local and leader while importing dumps 2025-12-10 18:14:01 +01:00
Louis Dureuil
666b16e1d1 Add dumpless upgrade network 2025-12-10 18:02:37 +01:00
Louis Dureuil
5b467ed4ce Update version in Cargo.toml 2025-12-10 18:02:18 +01:00
Louis Dureuil
6e98fe5f2d cargo fmt 2025-12-10 17:24:21 +01:00
Louis Dureuil
1fcd330751 Duplicate index-swaps tasks 2025-12-10 17:23:55 +01:00
Louis Dureuil
d5583ba1e9 Ensure that the leader is present in the remotes 2025-12-10 17:23:43 +01:00
Louis Dureuil
50532ccccc add new nodes in the list of node to export from 2025-12-10 17:23:18 +01:00
Louis Dureuil
dacb711ea7 Fix headers 2025-12-10 17:22:54 +01:00
Louis Dureuil
a90d467163 Ignore prioritary tasks when batching unprioritary tasks in network task 2025-12-10 17:22:13 +01:00
Louis Dureuil
c1dcb618f1 Support both legacy and normative headers 2025-12-10 15:48:55 +01:00
Louis Dureuil
c71add854d cargo-formattoclippy 2025-12-09 18:59:15 +01:00
Louis Dureuil
e484bfc514 skip settings validation when we have an origin 2025-12-09 18:32:18 +01:00
Clément Renault
26e368b116 Merge pull request #6041 from meilisearch/fix-workflow-injection
Remove risk of command injection
2025-12-09 17:04:58 +00:00
Louis Dureuil
ccc54b1d23 Use b64 encoded roaring bitmaps for task keys 2025-12-09 18:01:37 +01:00
Louis Dureuil
bf33ca0c38 Add env variables for proxy 2025-12-09 17:06:54 +01:00
curquiza
ba95ac0915 Remove risk of command injection 2025-12-09 17:06:41 +01:00
Louis Dureuil
532684981d export of documents use a limit that is by default close to the http_payload_size_limit 2025-12-09 16:32:12 +01:00
Louis Dureuil
ce2dd8e2f9 breaking: headers prefixed by X- 2025-12-09 16:00:15 +01:00
Louis Dureuil
d90febdc82 happy rustc 2025-12-09 15:51:30 +01:00
Louis Dureuil
f0e73333af Respond to various PR comments 2025-12-09 15:24:23 +01:00
Louis Dureuil
a682f79487 clippy happy 2025-12-09 15:24:23 +01:00
Louis Dureuil
9214a9b641 Refactor the skip_if lambdas 2025-12-09 15:24:23 +01:00
Louis Dureuil
51d57c1076 factor out to function 2025-12-09 15:24:23 +01:00
Louis Dureuil
3954af9fe8 Compute total moved docs with a variable rather than a vec 2025-12-09 15:24:23 +01:00
Louis Dureuil
d8880a93b7 move output of balance documents to a var 2025-12-09 15:24:22 +01:00
Louis Dureuil
27bd557396 Use turbofish 2025-12-09 15:24:22 +01:00
Louis Dureuil
c322b307bc Replace match with if let Err 2025-12-09 15:24:22 +01:00
Louis Dureuil
7aad304224 use expect 2025-12-09 15:24:22 +01:00
Louis Dureuil
61a7f68113 Adjust comment 2025-12-09 15:24:22 +01:00
Louis Dureuil
8d3af3dea2 Add missing default 2025-12-09 15:24:22 +01:00
Louis Dureuil
b82530e4d5 Remove unwarranted BUSL header 2025-12-09 15:24:22 +01:00
Louis Dureuil
eaa249ca94 Use same version of hashbrown in index-scheduler as in milli 2025-12-09 15:24:21 +01:00
Louis Dureuil
a3def29f11 Tests 2025-12-09 15:24:21 +01:00
Louis Dureuil
dd5db5257d Duplicate settings tasks 2025-12-09 15:24:21 +01:00
Louis Dureuil
4e5a3fee5d Duplicate index creation, update, delete tasks 2025-12-09 15:24:21 +01:00
Louis Dureuil
22027c782a Apply new sharding interface to document route 2025-12-09 15:24:21 +01:00
Louis Dureuil
44e7377240 Move network to module and add separate editions 2025-12-09 15:24:21 +01:00
Louis Dureuil
71f359b10b Move and add to proxy 2025-12-09 15:24:20 +01:00
Louis Dureuil
771d1e8282 process export changes
- extract reusable export function
- support extra balance headers
2025-12-09 15:24:20 +01:00
Louis Dureuil
87b2f8f7c2 misc NetworkTopologyChange support 2025-12-09 15:24:20 +01:00
Louis Dureuil
aed03f1473 Process network topology change batches 2025-12-09 15:24:20 +01:00
Louis Dureuil
7ff517bf3a Update tick 2025-12-09 15:24:20 +01:00
Louis Dureuil
961a960fff Create batch 2025-12-09 15:24:20 +01:00
Louis Dureuil
093b358864 Network topology change batch types 2025-12-09 15:24:20 +01:00
Louis Dureuil
71ea943386 Fix update task to support updating multiple times the same task 2025-12-09 15:24:19 +01:00
Louis Dureuil
a878875aca Propagate task network in task queue 2025-12-09 15:24:19 +01:00
Louis Dureuil
6aa93e3e93 Stop processing no longer destroys the progress 2025-12-09 15:24:19 +01:00
Louis Dureuil
2be35e9c5c Handle receiving import tasks in the scheduler 2025-12-09 15:24:19 +01:00
Louis Dureuil
bea64ecc5c Set task network returns the updated task 2025-12-09 15:24:19 +01:00
Louis Dureuil
fb96e8496e Support network topology change tasks in dumps 2025-12-09 15:24:19 +01:00
Louis Dureuil
0dd9d173c6 Add network topology change task 2025-12-09 15:24:18 +01:00
Louis Dureuil
ff9439b5ac Move tasks.rs to tasks/mod.rs 2025-12-09 15:24:18 +01:00
Louis Dureuil
355950939a Add leader and version to Network object 2025-12-09 15:24:18 +01:00
Louis Dureuil
7c502794d5 New errors 2025-12-09 15:24:18 +01:00
Louis Dureuil
60669dfa35 Make error name public 2025-12-09 15:24:18 +01:00
Louis Dureuil
d6cd954e4b Update shard definition 2025-12-09 15:24:18 +01:00
Louis Dureuil
7429faf046 Add method to return the number of indexes to IndexMapper 2025-12-09 15:24:18 +01:00
Louis Dureuil
edbe32e53e AllTasks public and deserializable 2025-12-09 15:24:17 +01:00
Louis Dureuil
74fe44e18e Make task view fields public 2025-12-09 15:24:17 +01:00
Louis Dureuil
ccbcacec22 Create RoFeatures from RuntimeFeatures 2025-12-09 15:24:17 +01:00
Louis Dureuil
43a11d2f66 Update dependencies 2025-12-09 15:24:17 +01:00
Clément Renault
75fcbfc2fe Merge pull request #6039 from meilisearch/bump-rust-to-1-19-1
Move to Rust v1.91.1
2025-12-09 13:55:08 +00:00
Kerollmops
8c19b6d55e Make the new Clippy happy 2025-12-09 14:33:04 +01:00
Kerollmops
08d0f05ece Remove a warning 2025-12-09 13:58:37 +01:00
Kerollmops
4762e9afa0 Move to Rust v1.91.1 2025-12-09 13:52:46 +01:00
Clément Renault
12fcab91c5 Merge pull request #6037 from meilisearch/fix-intel-mac
Fix macos-amd64 compilation
2025-12-08 13:21:51 +00:00
Louis Dureuil
792a72a23f Add missing cfg 2025-12-08 13:22:01 +01:00
Louis Dureuil
2dd7f29edf Merge pull request #6034 from meilisearch/update-version-v1.29.0
Update version for the next release (v1.29.0) in Cargo.toml
2025-12-08 08:01:41 +00:00
dureuill
ff680d29a8 Update version for the next release (v1.29.0) in Cargo.toml 2025-12-04 16:24:56 +00:00
Clément Renault
00420dfca0 Merge pull request #6018 from qdequele/add-support-xlmrobertamodels
Add support for XLM Roberta models
2025-12-04 15:46:53 +00:00
Quentin de Quelen
a3a86ac629 chore: cargo fmt 2025-12-04 16:27:19 +01:00
Quentin de Quelen
f6210b8e5e add tests for the support of the models XLMRoberta 2025-12-04 16:27:19 +01:00
Quentin de Quelen
fe46af7ded add support of models XLMRoberta 2025-12-04 16:27:19 +01:00
Clément Renault
57b94b411f Merge pull request #6030 from meilisearch/require-git
Require git
2025-12-04 14:29:33 +00:00
Clément Renault
a7b6f65851 Merge pull request #6022 from meilisearch/xtask-generate-proto-name
Introduce xtask sub-command to generate prototypes
2025-12-04 13:53:20 +00:00
Louis Dureuil
1ec6646d8c Merge pull request #6029 from meilisearch/dumpless-upgrade-migrations
Switch to migration-oriented dumpless upgrade
2025-12-04 13:35:26 +00:00
Kerollmops
2dccacf273 Hide git fetch output 2025-12-04 14:35:03 +01:00
Kerollmops
ce0f04e9ee Improve the prototype guide 2025-12-04 14:35:03 +01:00
Kerollmops
9ba5c6d371 Update the prototype format 2025-12-04 14:35:03 +01:00
Kerollmops
56673fee56 Introduce the first working version of the tool 2025-12-04 14:35:03 +01:00
Clément Renault
b30bcbb931 Merge pull request #6032 from meilisearch/bump-hannoy
Bump hannoy to v0.1.0-nested-rtxns
2025-12-04 13:30:43 +00:00
Kerollmops
5fbe4436c8 Bump hannoy to v0.1.0-nested-rtxns 2025-12-04 14:06:45 +01:00
Louis Dureuil
8fa253c293 fmt 2025-12-04 13:55:28 +01:00
Louis Dureuil
4833da9edb Chore: remove some duplicated lambdas to ease compile time 2025-12-04 13:55:28 +01:00
Louis Dureuil
c0e31a4f01 Switch to migration-oriented dumpless upgrade 2025-12-04 13:55:28 +01:00
Louis Dureuil
c06ffb31d1 Update snapshots 2025-12-04 13:55:28 +01:00
Louis Dureuil
3097314b9d Make snapshots independent on the version 2025-12-04 13:55:27 +01:00
Louis Dureuil
786a978237 fmt 2025-12-04 13:52:57 +01:00
Louis Dureuil
03e53aaf6d Add binary to display build-info 2025-12-04 13:52:57 +01:00
Louis Dureuil
2206f045a4 replace git2 by the git command line in build-info 2025-12-04 13:52:56 +01:00
Louis Dureuil
246cf8b2d1 Mimic what is done for publish asset in the CI, for faster build 2025-12-04 13:52:56 +01:00
Louis Dureuil
82adabc5a0 Merge pull request #5861 from meilisearch/upgrade-tests
Declarative tests
2025-12-04 11:00:53 +00:00
Louis Dureuil
c9a22247d2 add hannoy test 2025-12-04 11:41:41 +01:00
Louis Dureuil
c535b8ddef Use variables to account for changes between local and CI 2025-12-04 09:47:37 +01:00
Louis Dureuil
8e89619aed Also evaluate variables in expected responses 2025-12-04 09:47:21 +01:00
Clément Renault
f617ca8e38 Merge pull request #6023 from meilisearch/curquiza-patch-1
Send notifications for Kubernetes integration when releasing
2025-12-04 07:00:50 +00:00
Louis Dureuil
959175ad2a switch to gh runner 2025-12-03 22:59:57 +01:00
Louis Dureuil
341ffbf5ef Modify bot message on db-change labeled PRs 2025-12-03 21:25:41 +01:00
Louis Dureuil
542f3073f4 Appease codeql 2025-12-03 21:25:41 +01:00
Louis Dureuil
0f134b079f hf-embed workload: add ranking scores 2025-12-03 21:25:41 +01:00
Louis Dureuil
9e7ae47355 Add missing sha 2025-12-03 21:25:41 +01:00
Louis Dureuil
1edf07df29 Add tests to CI 2025-12-03 21:25:40 +01:00
Louis Dureuil
88aa3cddde Support local builds of enterprise binaries 2025-12-03 21:25:40 +01:00
Louis Dureuil
e6846cb55a Rename and move the test instructions 2025-12-03 21:25:40 +01:00
Louis Dureuil
29b715e2f9 Update workloads 2025-12-03 21:25:40 +01:00
Louis Dureuil
f28dc5bd2b cleaning 2025-12-03 21:25:40 +01:00
Louis Dureuil
56d0b8ea54 Some cleaning 2025-12-03 21:25:40 +01:00
Louis Dureuil
514edb1b79 Add workloads 2025-12-03 21:25:40 +01:00
Louis Dureuil
cfb609d41d clippy 2025-12-03 21:25:40 +01:00
Louis Dureuil
11cb062067 fmt 2025-12-03 21:25:40 +01:00
Louis Dureuil
2ca4926ac5 Support editions, move to common 2025-12-03 21:25:40 +01:00
Louis Dureuil
834bd9b879 Fix uninitialization issue on unsupported platforms 2025-12-03 21:25:39 +01:00
Louis Dureuil
cac7e00983 Remove chrono 2025-12-03 21:25:39 +01:00
Mubelotix
e9300bac64 Add documentation 2025-12-03 21:25:39 +01:00
Mubelotix
b0da7864a4 Api key tests 2025-12-03 21:25:39 +01:00
Mubelotix
2b9d379feb Add variable registration mechanism 2025-12-03 21:25:39 +01:00
Mubelotix
8d585a04d4 Update movies workload 2025-12-03 21:25:39 +01:00
Mubelotix
0095a72fba Test for upgrade 2025-12-03 21:25:39 +01:00
Mubelotix
651339648c Fix processing time ms 2025-12-03 21:25:39 +01:00
Mubelotix
a489f4c172 Update issue template 2025-12-03 21:25:39 +01:00
Mubelotix
3b875ea00e Update movies 2025-12-03 21:25:39 +01:00
Mubelotix
9d269c499c Fix line feed at the end of files 2025-12-03 21:25:39 +01:00
Mubelotix
da35ae0a6e Update emojis 2025-12-03 21:25:38 +01:00
Mubelotix
61945b235d Add redaction system 2025-12-03 21:25:38 +01:00
Mubelotix
e936ac172d Fix compilation 2025-12-03 21:25:38 +01:00
Mubelotix
162a84cdbf Improve error detection 2025-12-03 21:25:38 +01:00
Mubelotix
92c63cf351 Improve diffing 2025-12-03 21:25:38 +01:00
Mubelotix
fca35b7476 Add upgrade system 2025-12-03 21:25:38 +01:00
Mubelotix
4056657a55 Refactor around meili_path 2025-12-03 21:25:38 +01:00
Mubelotix
685d227597 Move file to common 2025-12-03 21:25:38 +01:00
Mubelotix
49b9f6ff38 Remove useless data 2025-12-03 21:25:38 +01:00
Mubelotix
79d0a3fb97 Remove useless parameter 2025-12-03 21:25:38 +01:00
Mubelotix
313ef7e79b Add response updating logic 2025-12-03 21:25:37 +01:00
Mubelotix
256407be61 Fix asset version issues 2025-12-03 21:25:37 +01:00
Mubelotix
8b3943bd32 Do so that meilisearch versions get downloaded 2025-12-03 21:25:37 +01:00
Mubelotix
87b972d29a Implement test workload running logic 2025-12-03 21:25:37 +01:00
Mubelotix
09ab61b360 Continue integrating commands to tests 2025-12-03 21:25:37 +01:00
Mubelotix
2459f381b4 Remove dead code 2025-12-03 21:25:37 +01:00
Mubelotix
6442f02de4 Make commands common 2025-12-03 21:25:37 +01:00
Mubelotix
91c4d9ea79 Tag workloads 2025-12-03 21:25:37 +01:00
Mubelotix
92a4091da3 Create test workload 2025-12-03 21:25:37 +01:00
Mubelotix
29a337f0f9 Create the test function 2025-12-03 21:25:36 +01:00
Mubelotix
8c3cebadaa Create the test xtask command and args 2025-12-03 21:25:36 +01:00
Clément Renault
b566458aa2 Merge pull request #6027 from meilisearch/release-v1.28.2
Bring back changes from v1.28.2
2025-12-03 17:46:44 +00:00
Clément Renault
ae4344e359 Merge pull request #6004 from meilisearch/default-experimental-vector-store
Make Hannoy the default vector store
2025-12-03 17:16:46 +00:00
Kerollmops
b6cb384650 Fix settings tests 2025-12-03 17:52:52 +01:00
Clément Renault
2c3e3d856c Make hannoy the default vector store when creating an index 2025-12-03 17:52:52 +01:00
Clémentine
93e97f814c Add notifications for Kubernetes integration
Updated comments and conditions for notifying integration teams.
2025-12-03 17:49:46 +01:00
Kerollmops
e9350f033d Limit the number of retrieved task to one 2025-12-03 17:43:48 +01:00
Kerollmops
54c92fd6c0 Update the snapshots 2025-12-03 17:43:48 +01:00
Kerollmops
4f4df83a51 Bump the version to v1.28.2 2025-12-03 17:43:48 +01:00
Clément Renault
a51021cab7 Merge pull request #6026 from meilisearch/free-space
Fix the CI issues
2025-12-03 16:18:41 +00:00
Louis Dureuil
e33f4fdeae Attempt to eschew containers for ubuntu 2025-12-03 16:28:19 +01:00
Louis Dureuil
e407bca196 use feature as cache key 2025-12-03 16:24:48 +01:00
Louis Dureuil
cd24ea11b4 correctly clean space + remove test in debug 2025-12-03 16:12:08 +01:00
Louis Dureuil
ba578e7ab5 Fix ollama test following update on their side 2025-12-03 15:48:30 +01:00
Louis Dureuil
05a74d1e68 remove non-existing rust-toolchain action arguments 2025-12-03 15:37:51 +01:00
Louis Dureuil
41d61deb97 Make runners/containers more uniform 2025-12-03 15:34:57 +01:00
Louis Dureuil
bba292b01a Run ollama test on 22.04 2025-12-03 15:21:02 +01:00
Louis Dureuil
96923dff33 adjust test suite 2025-12-03 15:01:58 +01:00
Louis Dureuil
8f9c9305da set back the cache 2025-12-03 14:10:18 +01:00
Louis Dureuil
a9f309e1d1 Remove macos and windows from PRs 2025-12-03 13:54:02 +01:00
Louis Dureuil
e456a9acd8 Add the disk freeing to all ubuntu-22.04 jobs 2025-12-03 11:51:42 +01:00
Louis Dureuil
9b7d29466c Attempt to earn some free space... 2025-12-03 11:41:00 +01:00
Clément Renault
b0ef14b6f0 Merge pull request #5983 from meilisearch/new-searchable-settings-indexer
Support the searchable and exact attributes in the new Settings Indexer
2025-12-02 11:03:36 +00:00
Clément Renault
36febe2068 Merge pull request #6021 from meilisearch/skip-macos-windows-in-merge-queue
Skip the macOS and Windows CI in the merge queue
2025-12-02 08:29:06 +00:00
Kerollmops
6f14a6ec18 Skip the macOS and Windows CI in the merge queue 2025-12-01 16:59:55 +01:00
Clément Renault
1a45b19e7e Merge pull request #6020 from meilisearch/fix-release-ci-enterprise
Fix release CI after enterprise merge
2025-12-01 15:12:00 +00:00
Kerollmops
bd7525b166 Update the snapshots 2025-12-01 15:26:00 +01:00
Kerollmops
359757d939 Bump patch version 2025-12-01 15:25:56 +01:00
Paul de Nonancourt
1c6eea596c fix: Only trigger Cloud CI for enterprise edition 2025-12-01 15:08:23 +01:00
Paul de Nonancourt
693b6f483e fix: Update binary path for target x86_64 meilisearch release 2025-12-01 15:07:55 +01:00
Many the fish
818a4aa6d9 Merge pull request #6016 from EclipseAditya/fix-sort-on-empty-attribute-5998
Fix sort on /documents endpoint when field has no values
2025-12-01 13:50:05 +00:00
Clément Renault
ddadeb99e9 Merge pull request #6019 from meilisearch/bump-version
Bump version to v1.28
2025-12-01 10:26:51 +00:00
Kerollmops
b8d8be934a Update snapshots 2025-12-01 10:52:57 +01:00
Kerollmops
7175d70b8f List the version in the upgrades 2025-12-01 10:29:33 +01:00
Kerollmops
8a3e65ab6f Bump version to v1.28 2025-12-01 10:23:42 +01:00
EclipseAditya
4737e1a2a5 Fix rustfmt formatting issues 2025-11-30 06:02:02 +00:00
EclipseAditya
36522e951b Fix sort on /documents endpoint when field has no values 2025-11-28 15:22:57 +00:00
Kerollmops
fce046d84d Fix non-detected searchable attribute 2025-11-28 11:29:31 +01:00
Kerollmops
3fc507bb44 Introduce a test for when a new nested field becomes searchable 2025-11-28 11:29:31 +01:00
Kerollmops
fdbcd033fb Clean up the CI 2025-11-28 11:29:31 +01:00
Clément Renault
aaab49baca Fix a bug and improve code quality
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Kerollmops
0d0d6e8099 Update the proximity precision for the settings delta 2025-11-28 11:29:31 +01:00
Clément Renault
c1e351c92b Show available space 2025-11-28 11:29:31 +01:00
Clément Renault
67cab4cc9d Trigger the new settings indexer when changing the proximity precision 2025-11-28 11:29:31 +01:00
Clément Renault
f30a37b0fe Clear old word prefix fid docids entries when removing searchable fields 2025-11-28 11:29:31 +01:00
Clément Renault
a78a9f80dd Introduce the word pair proximity extractor 2025-11-28 11:29:31 +01:00
Clément Renault
439fee5434 Move the has_searchable_children function to the appropriate module 2025-11-28 11:29:31 +01:00
Clément Renault
9e858590e0 Rename the function to extract document words when a setting changes
Co-authored-By: Maxime Legendre <maxime@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
29eebd5f93 Merge the logic of the function detecting searchable children fields 2025-11-28 11:29:31 +01:00
Clément Renault
07da6edbdf Fix a bug when nested fields appear
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
22b83042e6 Add some comments
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
52ab13906a Fix a test trying to change settings with a wtxn 2025-11-28 11:29:31 +01:00
Clément Renault
29bec8efd4 Make sure the embedders supports changing searchables 2025-11-28 11:29:31 +01:00
Clément Renault
6947a8990b Make sure we don't crash on unreferenced fields 2025-11-28 11:29:31 +01:00
Clément Renault
fbb2bb0c73 Make clippy happy 2025-11-28 11:29:31 +01:00
Clément Renault
15918f53a9 Introduce new progress steps when deleting fid-based entries 2025-11-28 11:29:30 +01:00
Clément Renault
d7f5f3a0a3 Delete entries from fid-based databases when searchables are deleted 2025-11-28 11:29:30 +01:00
Clément Renault
1afbf35f27 Support exact attributes in the settings delta 2025-11-28 11:29:30 +01:00
Clément Renault
d7675233d5 Call the post processing in the new settings indexer 2025-11-28 11:29:30 +01:00
Clément Renault
c63c1ac32b Support exact attributes in the field metadata 2025-11-28 11:29:30 +01:00
Clément Renault
6171dcde0d Call the new searchable extractor 2025-11-28 11:29:30 +01:00
Clément Renault
04bc134324 Introduce the new searchable extractor 2025-11-28 11:29:30 +01:00
Clément Renault
8ff39d927d Enable the new settings indexer when the searchable or exact are updates 2025-11-28 11:29:30 +01:00
Clément Renault
ffd461c800 Merge pull request #6011 from meilisearch/enterprise-feature
Add support for conditional compilation of the EE
2025-11-27 20:43:09 +00:00
Clément Renault
9134d27980 Merge pull request #6013 from meilisearch/fix-sdk-tests
Fix SDK test to use EE
2025-11-27 19:24:37 +00:00
curquiza
f60242979f Fix SDK test to use EE 2025-11-27 17:51:27 +01:00
Clément Renault
d347417cfd Merge pull request #5956 from meilisearch/progress-trace-in-metrics
Expose batch progress traces on the metrics route
2025-11-27 16:05:13 +00:00
Paul de Nonancourt
55d54afd69 Build different community and enterprise Docker images in CI 2025-11-27 14:31:08 +01:00
Kerollmops
dca7679c47 Change the binary name format to suffix meilisearch with enterprise 2025-11-27 13:56:29 +01:00
Kerollmops
a34b692396 Remove Cross compilation file 2025-11-27 13:53:23 +01:00
Kerollmops
63829b62e9 Cleanup useless references to jemalloc 2025-11-27 13:53:23 +01:00
Kerollmops
44c8252ad5 Merge the publish binaries job 2025-11-27 13:53:23 +01:00
Kerollmops
19ae428890 Introduce a matrix for the tests CIs 2025-11-27 13:53:13 +01:00
Many the fish
7adcb657ae Merge pull request #6007 from meilisearch/update-charabia-v0.9.9
Update charabia v0.9.9
2025-11-27 12:37:55 +00:00
Louis Dureuil
9624768976 Add support for conditional compilation of the EE 2025-11-27 10:53:46 +01:00
Clément Renault
5025acfd2a Merge pull request #6012 from meilisearch/update-test-job-name
Remove version from the name of the test job in CI
2025-11-27 08:29:32 +00:00
Paul de Nonancourt
4bbfdccc3e Remove version from the label of the test 2025-11-26 16:25:15 +01:00
Many the fish
a5b24b54b8 Merge pull request #6002 from meilisearch/update-dependencies
Upgrade most of the dependencies
2025-11-26 13:26:27 +00:00
Clément Renault
461e69c143 Merge pull request #6003 from meilisearch/build-arm-images-on-arm-runner
Build x86 and ARM images on Github-hosted runners
2025-11-26 11:53:47 +00:00
Clément Renault
915aeafefe Update the workflow name 2025-11-26 11:33:23 +01:00
Louis Dureuil
408529d8b2 compile gemm-16 optimized for ARM compatibility
Co-Authored-By: Paul de Nonancourt <paul@meilisearch.com>
2025-11-26 10:49:10 +01:00
Paul de Nonancourt
1724ab6d94 Run tests on both arm64 and x86 Github-hosted runners 2025-11-26 10:49:10 +01:00
Paul de Nonancourt
49a500a342 Fix cosign digest signature 2025-11-26 10:49:10 +01:00
Paul de Nonancourt
f26eabcfa1 Merge manifests into multi-architecture Docker image 2025-11-26 10:49:10 +01:00
Paul de Nonancourt
b468c090f3 Build ARM64 and AMD64 images on Github-hosted runners 2025-11-26 10:49:10 +01:00
Clément Renault
c14114840e Remove container 2025-11-26 10:45:12 +01:00
ManyTheFish
7933d1f9ea Update charabia v0.9.9 2025-11-24 15:13:11 +01:00
Clément Renault
6f1d3f337b Merge pull request #6006 from meilisearch/bump-version
Bump version to v1.27.0
2025-11-24 12:28:44 +00:00
Clément Renault
9640706c5a Do a no-op when upgrading version 2025-11-24 10:43:27 +01:00
Clément Renault
01cd273a52 Update the snapshots 2025-11-24 10:40:06 +01:00
Clément Renault
ae87d1cab9 Bump version in Cargo.toml 2025-11-24 10:32:32 +01:00
Clément Renault
d5a5372aba Only provide the last batch info 2025-11-20 12:02:29 +01:00
Clément Renault
cf62af13e8 Merge pull request #6005 from meilisearch/clamp-max-batch-size
Clamp max batch size to 10 GiB
2025-11-20 10:45:23 +00:00
Clément Renault
0d5e176dc2 Bump some of the incompatible dependencies 2025-11-20 11:45:08 +01:00
Clément Renault
d6f36a773d Update the compatible dependencies 2025-11-20 11:45:08 +01:00
Many the fish
91cf94c196 Merge pull request #5999 from meilisearch/fix-document-fetch-sort
Fix the Document Fetch pagination bug when Sort is applied
2025-11-20 10:15:04 +00:00
Clément Renault
753ba39199 Update the documentation of the batch size 2025-11-20 10:33:02 +01:00
Clément Renault
3944c25853 Clamp the maximum batch size to maximum 10GiB 2025-11-20 10:29:50 +01:00
ManyTheFish
925bce5fbd Modify the test to test all the sort branches and fix the untested branch 2025-11-20 10:27:24 +01:00
ManyTheFish
62065ed30d Fix the pagination bug
where the last document of the previous page was duplicated as the first
document of the current page. This was due to a bug on the custom nth
function of the sort ranking rule skipping `n-1` documents instead of `n`
2025-11-20 10:27:24 +01:00
Clément Renault
97e6ae1957 Merge pull request #5994 from meilisearch/improve-s3-error-messages
Improve S3 upload by showing errors in the task queue
2025-11-19 16:58:02 +00:00
Clément Renault
5ed9be0789 Merge pull request #5990 from meilisearch/default-max-batch-size
Make the limit batched tasks total size defaults to half of the max indexing memory
2025-11-19 16:56:34 +00:00
Clément Renault
7597b1049f Merge pull request #6001 from meilisearch/update-windows-macos-ci
Update the macOS platform version in the CI
2025-11-19 16:12:52 +00:00
Clément Renault
d99150f21b Improve error message extraction
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-19 17:09:15 +01:00
Kerollmops
c9726674a0 Make the limit batched tasks total size default to half of max indexing
memory
2025-11-19 17:04:45 +01:00
Clément Renault
205f40b3b8 Update the macOS platform version to use version 14 2025-11-19 16:10:41 +01:00
Clément Renault
3d013cdebe Merge pull request #5995 from meilisearch/fix-embedding-skip
Fix embedding skip
2025-11-18 10:02:53 +00:00
Louis Dureuil
ddeff5678f Clippy happy 2025-11-17 14:48:40 +01:00
Louis Dureuil
a235434910 Add test 2025-11-17 13:52:23 +01:00
Louis Dureuil
a376525348 Do not skip embedding request for the document that exceeds capacity 2025-11-17 13:18:58 +01:00
Kerollmops
361580f451 Display the error message on failure 2025-11-17 09:21:18 +01:00
Clément Renault
ea70a7d1c9 Merge pull request #5969 from xuhongxu96/main
Remove unused dependency `allocator-api2`
2025-11-15 10:03:15 +00:00
Clément Renault
9304f8e586 Merge pull request #5991 from meilisearch/release-v1.26.0
Release v1.26.0
2025-11-13 17:54:01 +00:00
Hongxu Xu
08bc982748 Remove unused dependency allocator-api2 2025-11-04 03:29:24 +00:00
Kerollmops
a8d55562e9 Expose the three last batches timings 2025-11-03 16:01:05 +01:00
Kerollmops
40d649ec9e Update utoipa 2025-11-03 15:53:14 +01:00
Kerollmops
c272ac8204 Reset metrics values to keep current steps only 2025-11-03 15:41:54 +01:00
Kerollmops
e18c677f0e Expose the step currently running on the metrics route 2025-11-03 15:28:58 +01:00
Kerollmops
84a288da57 Simplify the auth filters 2025-11-03 15:11:28 +01:00
Kerollmops
cbfc325b56 Expose the metrics for the last finished batch and not the processing
one
2025-11-03 15:10:23 +01:00
Kerollmops
ea640b076e Expose batch progress traces on the metrics route 2025-10-24 14:36:21 +02:00
225 changed files with 11992 additions and 3710 deletions

View File

@@ -24,6 +24,11 @@ TBD
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
### Reminders when adding features
- [ ] Write unit tests using insta
- [ ] Write declarative integration tests in [workloads/tests](https://github.com/meilisearch/meilisearch/tree/main/workloads/test). Specify the routes to call and then call `cargo xtask test workloads/tests/YOUR_TEST.json --update-responses` so that responses are automatically filled.
### Reminders when modifying the API
- [ ] Update the openAPI file with utoipa:

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -66,9 +66,7 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |

View File

@@ -12,9 +12,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- uses: dtolnay/rust-toolchain@1.91.1
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -44,7 +44,7 @@ jobs:
exit 1
fi
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -19,6 +19,7 @@ env:
- [ ] Detail the change to the DB format and why they are forward compatible
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
## This PR makes breaking changes
@@ -35,8 +36,7 @@ env:
- [ ] Write the code to go from the old database to the new one
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
- [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
jobs:
add-comment:

View File

@@ -3,7 +3,7 @@ name: Look for flaky tests
on:
workflow_dispatch:
schedule:
- cron: '0 4 * * *' # Every day at 4:00AM
- cron: "0 4 * * *" # Every day at 4:00AM
jobs:
flaky:
@@ -13,11 +13,17 @@ jobs:
image: ubuntu:22.04
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps

View File

@@ -12,9 +12,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- uses: dtolnay/rust-toolchain@1.91.1
# Run benchmarks
- name: Run the fuzzer

View File

@@ -25,7 +25,13 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.89
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v5

View File

@@ -14,10 +14,105 @@ on:
workflow_dispatch:
jobs:
docker:
runs-on: docker
build:
runs-on: ${{ matrix.runner }}
strategy:
matrix:
platform: [amd64, arm64]
edition: [community, enterprise]
include:
- platform: amd64
runner: ubuntu-24.04
- platform: arm64
runner: ubuntu-24.04-arm
- edition: community
registry: getmeili/meilisearch
feature-flag: ""
- edition: enterprise
registry: getmeili/meilisearch-enterprise
feature-flag: "--features enterprise"
permissions: {}
steps:
- uses: actions/checkout@v5
- name: Prepare
run: |
platform=linux/${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
platforms: linux/${{ matrix.platform }}
install: true
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ matrix.registry }}
# Prevent `latest` to be updated for each new tag pushed.
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
flavor: latest=false
tags: |
type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push by digest
uses: docker/build-push-action@v6
id: build-and-push
with:
platforms: linux/${{ matrix.platform }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ matrix.registry }}
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
EXTRA_ARGS=${{ matrix.feature-flag }}
- name: Export digest
run: |
mkdir -p ${{ runner.temp }}/digests
digest="${{ steps.build-and-push.outputs.digest }}"
touch "${{ runner.temp }}/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ matrix.edition }}-${{ env.PLATFORM_PAIR }}
path: ${{ runner.temp }}/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
strategy:
matrix:
edition: [community, enterprise]
include:
- edition: community
registry: getmeili/meilisearch
- edition: enterprise
registry: getmeili/meilisearch-enterprise
needs:
- build
permissions:
id-token: write # This is needed to use Cosign in keyless mode
steps:
- uses: actions/checkout@v5
@@ -58,26 +153,30 @@ jobs:
echo "date=$commit_date" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install cosign
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
- name: Download digests
uses: actions/download-artifact@v4
with:
path: ${{ runner.temp }}/digests
pattern: digests-${{ matrix.edition }}-*
merge-multiple: true
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: getmeili/meilisearch
images: ${{ matrix.registry }}
# Prevent `latest` to be updated for each new tag pushed.
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
flavor: latest=false
@@ -88,33 +187,31 @@ jobs:
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v6
id: build-and-push
with:
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
- name: Create manifest list and push
working-directory: ${{ runner.temp }}/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ matrix.registry }}@sha256:%s ' *)
- name: Inspect image to fetch digest to sign
run: |
digest=$(docker buildx imagetools inspect --format='{{ json .Manifest }}' ${{ matrix.registry }}:${{ steps.meta.outputs.version }} | jq -r '.digest')
echo "DIGEST=${digest}" >> $GITHUB_ENV
- name: Sign the images with GitHub OIDC Token
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
TAGS: ${{ steps.meta.outputs.tags }}
run: |
images=""
for tag in ${TAGS}; do
images+="${tag}@${DIGEST} "
images+="${tag}@${{ env.DIGEST }} "
done
cosign sign --yes ${images}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# /!\ Don't touch this without checking with engineers working on the Cloud code base on #discussion-engineering Slack channel
- name: Notify meilisearch-cloud
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push'
if: ${{ (github.event_name == 'push') && (matrix.edition == 'enterprise') }}
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
@@ -122,21 +219,13 @@ jobs:
event-type: cloud-docker-build
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
# Send notification to Swarmia to notify of a deployment: https://app.swarmia.com
# - name: 'Setup jq'
# uses: dcarbone/install-jq-action
# - name: Send deployment to Swarmia
# if: github.event_name == 'push' && success()
# run: |
# JSON_STRING=$( jq --null-input --compact-output \
# --arg version "${{ github.ref_name }}" \
# --arg appName "meilisearch" \
# --arg environment "production" \
# --arg commitSha "${{ github.sha }}" \
# --arg repositoryFullName "${{ github.repository }}" \
# '{"version": $version, "appName": $appName, "environment": $environment, "commitSha": $commitSha, "repositoryFullName": $repositoryFullName}' )
# curl -H "Authorization: ${{ secrets.SWARMIA_DEPLOYMENTS_AUTHORIZATION }}" \
# -H "Content-Type: application/json" \
# -d "$JSON_STRING" \
# https://hook.swarmia.com/deployments
# /!\ Don't touch this without checking with integration team members on #discussion-integrations Slack channel
- name: Notify meilisearch-kubernetes
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event), or if not stable
if: ${{ github.event_name == 'push' && matrix.edition == 'community' && steps.check-tag-format.outputs.stable == 'true' }}
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-kubernetes
event-type: meilisearch-release
client-payload: '{ "version": "${{ github.ref_name }}" }'

View File

@@ -32,157 +32,61 @@ jobs:
if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true'
run: bash .github/scripts/check-release.sh
publish-linux:
name: Publish binary for Linux
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }}
publish-macos-windows:
name: Publish binary for ${{ matrix.os }}
publish-binaries:
name: Publish binary for ${{ matrix.release }} ${{ matrix.edition }} edition
runs-on: ${{ matrix.os }}
needs: check-version
strategy:
fail-fast: false
matrix:
os: [macos-13, windows-2022]
edition: [community, enterprise]
release:
[macos-amd64, macos-aarch64, windows, linux-amd64, linux-aarch64]
include:
- os: macos-13
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
- os: windows-2022
artifact_name: meilisearch.exe
asset_name: meilisearch-windows-amd64.exe
- edition: "community"
feature-flag: ""
edition-suffix: ""
- edition: "enterprise"
feature-flag: "--features enterprise"
edition-suffix: "enterprise-"
- release: macos-amd64
os: macos-15-intel
binary_path: release/meilisearch
asset_name: macos-amd64
extra-args: ""
- release: macos-aarch64
os: macos-14
binary_path: aarch64-apple-darwin/release/meilisearch
asset_name: macos-apple-silicon
extra-args: "--target aarch64-apple-darwin"
- release: windows
os: windows-2022
binary_path: release/meilisearch.exe
asset_name: windows-amd64.exe
extra-args: ""
- release: linux-amd64
os: ubuntu-22.04
binary_path: x86_64-unknown-linux-gnu/release/meilisearch
asset_name: linux-amd64
extra-args: "--target x86_64-unknown-linux-gnu"
- release: linux-aarch64
os: ubuntu-22.04-arm
binary_path: aarch64-unknown-linux-gnu/release/meilisearch
asset_name: linux-aarch64
extra-args: "--target aarch64-unknown-linux-gnu"
needs: check-version
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
- name: Build
run: cargo build --release --locked
run: cargo build --release --locked ${{ matrix.feature-flag }} ${{ matrix.extra-args }}
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-macos-apple-silicon:
name: Publish binary for macOS silicon
runs-on: macos-13
needs: check-version
strategy:
matrix:
include:
- target: aarch64-apple-darwin
asset_name: meilisearch-macos-apple-silicon
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
args: --release --target ${{ matrix.target }}
- name: Upload the binary to release
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-aarch64:
name: Publish binary for aarch64
runs-on: ubuntu-latest
needs: check-version
env:
DEBIAN_FRONTEND: noninteractive
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
strategy:
matrix:
include:
- target: aarch64-unknown-linux-gnu
asset_name: meilisearch-linux-aarch64
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Install needed dependencies
run: |
apt-get update -y && apt upgrade -y
apt-get install -y curl build-essential gcc-aarch64-linux-gnu
- name: Set up Docker for cross compilation
run: |
apt-get install -y curl apt-transport-https ca-certificates software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
- name: Configure target aarch64 GNU
## Environment variable is not passed using env:
## LD gold won't work with MUSL
# env:
# JEMALLOC_SYS_WITH_LG_PAGE: 16
# RUSTFLAGS: '-Clink-arg=-fuse-ld=gold'
run: |
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
- name: Install a default toolchain that will be used to build cargo cross
run: |
rustup default stable
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
use-cross: true
args: --release --target ${{ matrix.target }}
env:
CROSS_DOCKER_IN_DOCKER: true
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
asset_name: ${{ matrix.asset_name }}
file: target/${{ matrix.binary_path }}
asset_name: meilisearch-${{ matrix.edition-suffix }}${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-openapi-file:

View File

@@ -25,14 +25,18 @@ jobs:
- uses: actions/checkout@v5
- name: Define the Docker image we need to use
id: define-image
env:
EVENT_NAME: ${{ github.event_name }}
DOCKER_IMAGE_INPUT: ${{ github.event.inputs.docker_image }}
run: |
event=${{ github.event_name }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
if [[ "$EVENT_NAME" == 'workflow_dispatch' ]]; then
echo "docker-image=$DOCKER_IMAGE_INPUT" >> $GITHUB_OUTPUT
fi
- name: Docker image is ${{ steps.define-image.outputs.docker-image }}
run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"
env:
DOCKER_IMAGE: ${{ steps.define-image.outputs.docker-image }}
run: echo "Docker image is $DOCKER_IMAGE"
##########
## SDKs ##
@@ -68,7 +72,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -92,7 +96,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -122,7 +126,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -149,7 +153,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -184,7 +188,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -213,7 +217,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -238,7 +242,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -263,7 +267,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -284,7 +288,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -307,7 +311,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -338,7 +342,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -370,7 +374,7 @@ jobs:
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}

View File

@@ -15,31 +15,40 @@ env:
jobs:
test-linux:
name: Tests on ubuntu-22.04
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
name: Tests on Ubuntu
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [ubuntu-22.04, ubuntu-22.04-arm]
features: ["", "--features enterprise"]
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: check free space before
run: df -h
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: check free space after
run: df -h
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.89
uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo check without any default features
with:
key: ${{ matrix.features }}
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
args: --locked --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
args: --locked --all ${{ matrix.features }}
test-others:
name: Tests on ${{ matrix.os }}
@@ -47,51 +56,58 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-13, windows-2022]
os: [macos-14, windows-2022]
features: ["", "--features enterprise"]
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo check without any default features
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
args: --locked --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
args: --locked --all ${{ matrix.features }}
test-all-features:
name: Tests almost all features
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
runs-on: ubuntu-22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
cargo build --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
cargo test --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
ollama-ubuntu:
name: Test with Ollama
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
@@ -115,21 +131,21 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all --features test-ollama ollama
args: --locked -p meilisearch --features test-ollama ollama
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:22.04
runs-on: ubuntu-22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -140,36 +156,39 @@ jobs:
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
build:
name: Build in release
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --all
- name: Build
run: cargo build --release --locked --target x86_64-unknown-linux-gnu
clippy:
name: Run Clippy
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
strategy:
matrix:
features: ["", "--features enterprise"]
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
@@ -177,18 +196,21 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-targets -- --deny warnings
args: --all-targets ${{ matrix.features }} -- --deny warnings -D clippy::todo
fmt:
name: Run Rustfmt
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal
toolchain: nightly-2024-07-09
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
@@ -199,3 +221,23 @@ jobs:
run: |
echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs
cargo fmt --all -- --check
declarative-tests:
name: Run declarative tests
runs-on: ubuntu-22.04-arm
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run declarative tests
run: |
cargo xtask test workloads/tests/*.json

View File

@@ -18,9 +18,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml file

View File

@@ -124,6 +124,7 @@ They are JSON files with the following structure (comments are not actually supp
{
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
"name": "hackernews.ndjson_1M,no-threads",
"type": "bench",
// Number of consecutive runs of the commands that should be performed.
// Each run uses a fresh instance of Meilisearch and a fresh database.
// Each run produces its own report file.

1195
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.26.0"
version = "1.30.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
@@ -50,3 +50,5 @@ opt-level = 3
opt-level = 3
[profile.dev.package.roaring]
opt-level = 3
[profile.dev.package.gemm-f16]
opt-level = 3

View File

@@ -1,7 +0,0 @@
[build.env]
passthrough = [
"RUST_BACKTRACE",
"CARGO_TERM_COLOR",
"RUSTFLAGS",
"JEMALLOC_SYS_WITH_LG_PAGE"
]

View File

@@ -8,16 +8,14 @@ WORKDIR /
ARG COMMIT_SHA
ARG COMMIT_DATE
ARG GIT_TAG
ARG EXTRA_ARGS
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release -p meilisearch -p meilitool
cargo build --release -p meilisearch -p meilitool ${EXTRA_ARGS}
# Run
FROM alpine:3.22

326
TESTING.md Normal file
View File

@@ -0,0 +1,326 @@
# Declarative tests
Declarative tests ensure that Meilisearch features remain stable across versions.
While we already have unit tests, those are run against **temporary databases** that are created fresh each time and therefore never risk corruption.
Declarative tests instead **simulate the lifetime of a database**: they chain together commands and requests to change the binary, verifying that database state and API responses remain consistent.
## Basic example
```jsonc
{
"type": "test",
"name": "api-keys",
"binary": { // the first command will run on the binary following this specification.
"source": "release", // get the binary as a release from GitHub
"version": "1.19.0", // version to fetch
"edition": "community" // edition to fetch
},
"commands": []
}
```
This example defines a no-op test (it does nothing).
If the file is saved at `workloads/tests/example.json`, you can run it with:
```bash
cargo xtask test workloads/tests/example.json
```
## Commands
Commands represent API requests sent to Meilisearch endpoints during a test.
They are executed sequentially, and their responses can be validated to ensure consistent behavior across upgrades.
```jsonc
{
"route": "keys",
"method": "POST",
"body": {
"inline": {
"actions": [
"search",
"documents.add"
],
"description": "Test API Key",
"expiresAt": null,
"indexes": [ "movies" ]
}
}
}
```
This command issues a `POST /keys` request, creating an API key with permissions to search and add documents in the `movies` index.
### Using assets in commands
To keep tests concise and reusable, you can define **assets** at the root of the workload file.
Assets are external data sources (such as datasets) that are cached between runs, making tests faster and easier to read.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.19.0",
"edition": "community"
},
"assets": {
"movies.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
}
},
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
}
}
]
}
```
In this example:
- The `movies.json` dataset is defined as an asset, pointing to a remote URL.
- The SHA-256 checksum ensures integrity.
- The `POST /indexes/movies/documents` command uses this asset as the request body.
This makes the test much cleaner than inlining a large dataset directly into the command.
For asset handling, please refer to the [declarative benchmarks documentation](/BENCHMARKS.md#adding-new-assets).
### Asserting responses
Commands can specify both the **expected status code** and the **expected response body**.
```jsonc
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
},
"expectedStatus": 202,
"expectedResponse": {
"enqueuedAt": "[timestamp]", // Set to a bracketed string to ignore the value
"indexUid": "movies",
"status": "enqueued",
"taskUid": 1,
"type": "documentAdditionOrUpdate"
},
"synchronous": "WaitForTask"
}
```
Manually writing `expectedResponse` fields can be tedious.
Instead, you can let the test runner populate them automatically:
```bash
# Run the workload to populate expected fields. Only adds the missing ones, doesn't change existing data
cargo xtask test workloads/tests/example.json --add-missing-responses
# OR
# Run the workload to populate expected fields. Updates all fields including existing ones
cargo xtask test workloads/tests/example.json --update-responses
```
This workflow is recommended:
1. Write the test without expected fields.
2. Run it with `--add-missing-responses` to capture the actual responses.
3. Review and commit the generated expectations.
## Changing binary
It is possible to insert an instruction to change the current Meilisearch instance from one binary specification to another during a test.
When executed, such an instruction will:
1. Stop the current Meilisearch instance.
2. Fetch the binary specified by the instruction.
3. Restart the server with the specified binary on the same database.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.19.0", // start with version v1.19.0
"edition": "community"
},
"assets": {
"movies.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
}
},
"commands": [
// setup some data
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
}
},
// switch binary to v1.24.0
{
"binary": {
"source": "release",
"version": "1.24.0",
"edition": "community"
}
}
]
}
```
### Typical Usage
In most cases, the change binary instruction will be used to update a database.
- **Set up** some data using commands on an older version.
- **Upgrade** to the latest version.
- **Assert** that the data and API behavior remain correct after the upgrade.
To properly test the dumpless upgrade, one should typically:
1. Open the database without processing the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` and `--experimental-max-number-of-batched-tasks=0` as extra CLI arguments
2. Check that the search, stats and task queue still work.
3. Open the database and process the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` as the extra CLI argument. Use a `health` command to wait for the upgrade task to finish.
4. Check that the indexing, search, stats, and task queue still work.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.12.0",
"edition": "community"
},
"commands": [
// 0. Run commands to populate the database
{
// ..
},
// 1. Open the database with new MS without processing the update task
{
"binary": {
"source": "build", // build the binary from the sources in the current git repository
"edition": "community",
"extraCliArgs": [
"--experimental-dumpless-upgrade", // allows to open with a newer MS
"--experimental-max-number-of-batched-tasks=0" // prevent processing of the update task
]
}
},
// 2. Check the search etc.
{
// ..
},
// 3. Open the database with new MS and processing the update task
{
"binary": {
"source": "build", // build the binary from the sources in the current git repository
"edition": "community",
"extraCliArgs": [
"--experimental-dumpless-upgrade" // allows to open with a newer MS
// no `--experimental-max-number-of-batched-tasks=0`
]
}
},
// 4. Check the indexing, search, etc.
{
// ..
}
]
}
```
This ensures backward compatibility: databases created with older Meilisearch versions should remain functional and consistent after an upgrade.
## Variables
Sometimes a command needs to use a value returned by a **previous response**.
These values can be captured and reused using the register field.
```jsonc
{
"route": "keys",
"method": "POST",
"body": {
"inline": {
"actions": [
"search",
"documents.add"
],
"description": "Test API Key",
"expiresAt": null,
"indexes": [ "movies" ]
}
},
"expectedResponse": {
"key": "c6f64630bad2996b1f675007c8800168e14adf5d6a7bb1a400a6d2b158050eaf",
// ...
},
"register": {
"key": "/key"
},
"synchronous": "WaitForResponse"
}
```
The `register` field captures the value at the JSON path `/key` from the response.
Paths follow the **JavaScript Object Notation Pointer (RFC 6901)** format.
Registered variables are available for all subsequent commands.
Registered variables can be referenced by wrapping their name in double curly braces:
In the route/path:
```jsonc
{
"route": "tasks/{{ task_id }}",
"method": "GET"
}
```
In the request body:
```jsonc
{
"route": "indexes/movies/documents",
"method": "PATCH",
"body": {
"inline": {
"id": "{{ document_id }}",
"overview": "Shazam turns evil and the world is in danger.",
}
}
}
```
Or they can be referenced by their name (**without curly braces**) as an API key:
```jsonc
{
"route": "indexes/movies/documents",
"method": "POST",
"body": { /* ... */ },
"apiKeyVariable": "key" // The **content** of the key variable will be used as an API key
}
```

View File

@@ -11,27 +11,27 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.98"
bumpalo = "3.18.1"
csv = "1.3.1"
memmap2 = "0.9.7"
anyhow = "1.0.100"
bumpalo = "3.19.0"
csv = "1.4.0"
memmap2 = "0.9.9"
milli = { path = "../milli" }
mimalloc = { version = "0.1.47", default-features = false }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tempfile = "3.20.0"
mimalloc = { version = "0.1.48", default-features = false }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
tempfile = "3.23.0"
[dev-dependencies]
criterion = { version = "0.6.0", features = ["html_reports"] }
criterion = { version = "0.7.0", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.12"
[build-dependencies]
anyhow = "1.0.98"
bytes = "1.10.1"
convert_case = "0.8.0"
flate2 = "1.1.2"
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
anyhow = "1.0.100"
bytes = "1.11.0"
convert_case = "0.9.0"
flate2 = "1.1.5"
reqwest = { version = "0.12.24", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@@ -21,6 +21,10 @@ use roaring::RoaringBitmap;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn no_cancel() -> bool {
false
}
const BENCHMARK_ITERATION: usize = 10;
fn setup_dir(path: impl AsRef<Path>) {
@@ -65,7 +69,7 @@ fn setup_settings<'t>(
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
builder.execute(&no_cancel, &Progress::default(), Default::default()).unwrap();
}
fn setup_index_with_settings(
@@ -152,7 +156,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -168,7 +172,7 @@ fn indexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -220,7 +224,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -236,7 +240,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -266,7 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -282,7 +286,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -336,7 +340,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -352,7 +356,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -414,7 +418,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -430,7 +434,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -460,7 +464,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -476,7 +480,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -502,7 +506,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -518,7 +522,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -571,7 +575,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -587,7 +591,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -639,7 +643,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -655,7 +659,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -707,7 +711,7 @@ fn indexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -723,7 +727,7 @@ fn indexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -774,7 +778,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -790,7 +794,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -820,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -836,7 +840,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -889,7 +893,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -905,7 +909,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -967,7 +971,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -983,7 +987,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1014,7 +1018,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1030,7 +1034,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1057,7 +1061,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1073,7 +1077,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1125,7 +1129,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1141,7 +1145,7 @@ fn indexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1192,7 +1196,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1208,7 +1212,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1238,7 +1242,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1254,7 +1258,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1307,7 +1311,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1323,7 +1327,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1372,7 +1376,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
Some(primary_key),
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1422,7 +1426,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1438,7 +1442,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1468,7 +1472,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1484,7 +1488,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1510,7 +1514,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1526,7 +1530,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1601,7 +1605,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1617,7 +1621,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1693,7 +1697,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1709,7 +1713,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1777,7 +1781,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1793,7 +1797,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1845,7 +1849,7 @@ fn indexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1861,7 +1865,7 @@ fn indexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1912,7 +1916,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1928,7 +1932,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1958,7 +1962,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1974,7 +1978,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2027,7 +2031,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2043,7 +2047,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)

View File

@@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
time = { version = "0.3.41", features = ["parsing"] }
time = { version = "0.3.44", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.98"
vergen-git2 = "1.0.7"
anyhow = "1.0.100"
vergen-gitcl = "1.0.8"

View File

@@ -15,7 +15,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory.
let mut builder = vergen_git2::Git2Builder::default();
let mut builder = vergen_gitcl::GitclBuilder::default();
builder.branch(true);
builder.commit_timestamp(true);
@@ -25,5 +25,5 @@ fn emit_git_variables() -> anyhow::Result<()> {
let git2 = builder.build()?;
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
vergen_gitcl::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
}

View File

@@ -0,0 +1,6 @@
use build_info::BuildInfo;
fn main() {
let info = BuildInfo::from_build();
dbg!(info);
}

View File

@@ -11,24 +11,27 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.98"
flate2 = "1.1.2"
anyhow = "1.0.100"
flate2 = "1.1.5"
http = "1.3.1"
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.21.3"
regex = "1.11.1"
regex = "1.12.2"
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
serde = { version = "1.0.228", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
tar = "0.4.44"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tempfile = "3.23.0"
thiserror = "2.0.17"
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.41"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
uuid = { version = "1.18.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
meilisearch-types = { path = "../meilisearch-types" }
[features]
enterprise = ["meilisearch-types/enterprise"]

View File

@@ -9,8 +9,9 @@ use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::network::{DbTaskNetwork, NetworkTopologyChange};
use meilisearch_types::tasks::{
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
@@ -95,7 +96,7 @@ pub struct TaskDump {
)]
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
pub network: Option<DbTaskNetwork>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom_metadata: Option<String>,
}
@@ -163,6 +164,7 @@ pub enum KindDump {
IndexCompaction {
index_uid: String,
},
NetworkTopologyChange(NetworkTopologyChange),
}
impl From<Task> for TaskDump {
@@ -249,6 +251,9 @@ impl From<KindWithContent> for KindDump {
KindWithContent::IndexCompaction { index_uid } => {
KindDump::IndexCompaction { index_uid }
}
KindWithContent::NetworkTopologyChange(network_topology_change) => {
KindDump::NetworkTopologyChange(network_topology_change)
}
}
}
}
@@ -262,13 +267,13 @@ pub(crate) mod test {
use big_s::S;
use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::enterprise_edition::network::{Network, Remote};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::network::{Network, Remote};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
@@ -560,7 +565,8 @@ pub(crate) mod test {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
sharding: false,
leader: None,
version: Default::default(),
}
}
@@ -614,7 +620,10 @@ pub(crate) mod test {
assert_eq!(dump.features().unwrap().unwrap(), expected);
// ==== checking the network
let expected = create_test_network();
let mut expected = create_test_network();
// from v1.29, we drop `leader` and `local` on import
expected.leader = None;
expected.local = None;
assert_eq!(&expected, dump.network().unwrap().unwrap());
}
}

View File

@@ -434,7 +434,11 @@ pub(crate) mod test {
// network
let network = dump.network().unwrap().unwrap();
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
// since v1.29 we are dropping `local` and `leader` on import
insta::assert_snapshot!(network.local.is_none(), @"true");
insta::assert_snapshot!(network.leader.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");

View File

@@ -107,19 +107,14 @@ impl Settings<Unchecked> {
}
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Default, Debug, Clone, PartialEq)]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet)

View File

@@ -161,19 +161,14 @@ pub struct Facets {
pub min_level_size: Option<NonZeroUsize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn map<U, F>(self, f: F) -> Setting<U>
where

View File

@@ -1,9 +1,7 @@
use std::fmt::{self, Display, Formatter};
use std::marker::PhantomData;
use std::str::FromStr;
use serde::de::Visitor;
use serde::{Deserialize, Deserializer};
use serde::Deserialize;
use uuid::Uuid;
use super::settings::{Settings, Unchecked};
@@ -82,59 +80,3 @@ impl Display for IndexUidFormatError {
}
impl std::error::Error for IndexUidFormatError {}
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum StarOr<T> {
Star,
Other(T),
}
impl<'de, T, E> Deserialize<'de> for StarOr<T>
where
T: FromStr<Err = E>,
E: Display,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
/// deserialize everything as a `StarOr::Other`, including "*".
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
/// not supported on untagged enums.
struct StarOrVisitor<T>(PhantomData<T>);
impl<T, FE> Visitor<'_> for StarOrVisitor<T>
where
T: FromStr<Err = FE>,
FE: Display,
{
type Value = StarOr<T>;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("a string")
}
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
where
SE: serde::de::Error,
{
match v {
"*" => Ok(StarOr::Star),
v => {
let other = FromStr::from_str(v).map_err(|e: T::Err| {
SE::custom(format!("Invalid `other` value: {}", e))
})?;
Ok(StarOr::Other(other))
}
}
}
}
deserializer.deserialize_str(StarOrVisitor(PhantomData))
}
}

View File

@@ -192,19 +192,14 @@ pub struct Facets {
pub min_level_size: Option<NonZeroUsize>,
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn set(self) -> Option<T> {
match self {

View File

@@ -47,20 +47,15 @@ pub struct Settings<T> {
pub _kind: PhantomData<T>,
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn set(self) -> Option<T> {
match self {

View File

@@ -322,7 +322,7 @@ impl From<Task> for TaskView {
_ => None,
});
let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
let duration = finished_at.zip(started_at).map(|(tf, ts)| tf - ts);
Self {
uid: id,

View File

@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::enterprise_edition::network::Network;
pub type Network = meilisearch_types::network::Network;
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
// ===== Other types to clarify the code of the compat module
@@ -95,17 +95,26 @@ impl V6Reader {
Err(e) => return Err(e.into()),
};
let network = match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled.
ErrorKind::NotFound => {
debug!("`network.json` not found in dump");
None
}
_ => return Err(error.into()),
},
};
let mut network: Option<meilisearch_types::network::Network> =
match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled.
ErrorKind::NotFound => {
debug!("`network.json` not found in dump");
None
}
_ => return Err(error.into()),
},
};
if let Some(network) = &mut network {
// as dumps are typically imported in a different machine as the emitter (otherwise dumpless upgrade would be used),
// we decide to remove the self to avoid alias issues
network.local = None;
// for the same reason we disable automatic sharding
network.leader = None;
}
let webhooks = match fs::read(dump.path().join("webhooks.json")) {
Ok(webhooks_file) => Some(serde_json::from_reader(&*webhooks_file)?),

View File

@@ -5,9 +5,9 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key;
use meilisearch_types::network::Network;
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::webhooks::WebhooksDumpView;
use serde_json::{Map, Value};

View File

@@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.20.0"
thiserror = "2.0.12"
tempfile = "3.23.0"
thiserror = "2.0.17"
tracing = "0.1.41"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
uuid = { version = "1.18.1", features = ["serde", "v4"] }

View File

@@ -16,7 +16,7 @@ license.workspace = true
serde_json = "1.0"
[dev-dependencies]
criterion = { version = "0.6.0", features = ["html_reports"] }
criterion = { version = "0.7.0", features = ["html_reports"] }
[[bench]]
name = "benchmarks"

View File

@@ -11,12 +11,12 @@ edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.4.1", features = ["derive"] }
bumpalo = "3.18.1"
clap = { version = "4.5.40", features = ["derive"] }
arbitrary = { version = "1.4.2", features = ["derive"] }
bumpalo = "3.19.0"
clap = { version = "4.5.52", features = ["derive"] }
either = "1.15.0"
fastrand = "2.3.0"
milli = { path = "../milli" }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tempfile = "3.20.0"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
tempfile = "3.23.0"

View File

@@ -11,33 +11,34 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.98"
anyhow = "1.0.100"
bincode = "1.3.3"
byte-unit = "5.1.6"
bytes = "1.10.1"
bumpalo = "3.18.1"
bytes = "1.11.0"
bumpalo = "3.19.0"
bumparaw-collections = "0.1.4"
convert_case = "0.8.0"
csv = "1.3.1"
convert_case = "0.9.0"
csv = "1.4.0"
derive_builder = "0.20.2"
dump = { path = "../dump" }
enum-iterator = "2.1.0"
enum-iterator = "2.3.0"
file-store = { path = "../file-store" }
flate2 = "1.1.2"
indexmap = "2.9.0"
flate2 = "1.1.5"
hashbrown = "0.15.5"
indexmap = "2.12.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.7"
memmap2 = "0.9.9"
page_size = "0.6.0"
rayon = "1.10.0"
rayon = "1.11.0"
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
serde = { version = "1.0.228", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
tar = "0.4.44"
synchronoise = "1.0.1"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = [
tempfile = "3.23.0"
thiserror = "2.0.17"
time = { version = "0.3.44", features = [
"serde-well-known",
"formatting",
"parsing",
@@ -45,11 +46,15 @@ time = { version = "0.3.41", features = [
] }
tracing = "0.1.41"
ureq = "2.12.1"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
uuid = { version = "1.18.1", features = ["serde", "v4"] }
backoff = "0.4.0"
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
reqwest = { version = "0.12.24", features = [
"rustls-tls",
"http2",
], default-features = false }
rusty-s3 = "0.8.1"
tokio = { version = "1.47.1", features = ["full"] }
tokio = { version = "1.48.0", features = ["full"] }
urlencoding = "2.1.3"
[dev-dependencies]
big_s = "1.0.2"
@@ -58,3 +63,6 @@ crossbeam-channel = "0.5.15"
insta = { version = "=1.39.0", features = ["json", "redactions"] }
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
[features]
enterprise = ["meilisearch-types/enterprise"]

View File

@@ -238,6 +238,9 @@ impl<'a> Dump<'a> {
KindDump::IndexCompaction { index_uid } => {
KindWithContent::IndexCompaction { index_uid }
}
KindDump::NetworkTopologyChange(network_topology_change) => {
KindWithContent::NetworkTopologyChange(network_topology_change)
}
},
};

View File

@@ -3,10 +3,13 @@ use std::fmt::Display;
use meilisearch_types::batches::BatchId;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::milli::index::RollbackOutcome;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::tasks::network::ReceiveTaskError;
use meilisearch_types::tasks::{Kind, Status};
use meilisearch_types::{heed, milli};
use reqwest::StatusCode;
use thiserror::Error;
use uuid::Uuid;
use crate::TaskId;
@@ -191,6 +194,17 @@ pub enum Error {
#[error(transparent)]
HeedTransaction(heed::Error),
#[error("No network topology change task is currently enqueued or processing")]
ImportTaskWithoutNetworkTask,
#[error("The network task version (`{network_task}`) does not match the import task version (`{import_task}`)")]
NetworkVersionMismatch { network_task: Uuid, import_task: Uuid },
#[error("The import task emanates from an unknown remote `{0}`")]
ImportTaskUnknownRemote(String),
#[error("The import task with key `{0}` was already received")]
ImportTaskAlreadyReceived(DocumentId),
#[error("{action} requires the Enterprise Edition")]
RequiresEnterpriseEdition { action: &'static str },
#[cfg(test)]
#[error("Planned failure for tests.")]
PlannedFailure,
@@ -248,6 +262,11 @@ impl Error {
| Error::Persist(_)
| Error::FeatureNotEnabled(_)
| Error::Export(_)
| Error::ImportTaskWithoutNetworkTask
| Error::NetworkVersionMismatch { .. }
| Error::ImportTaskAlreadyReceived(_)
| Error::ImportTaskUnknownRemote(_)
| Error::RequiresEnterpriseEdition { .. }
| Error::Anyhow(_) => true,
Error::CreateBatch(_)
| Error::CorruptedTaskQueue
@@ -307,6 +326,11 @@ impl ErrorCode for Error {
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::ImportTaskWithoutNetworkTask => Code::ImportTaskWithoutNetworkTask,
Error::NetworkVersionMismatch { .. } => Code::NetworkVersionMismatch,
Error::ImportTaskAlreadyReceived(_) => Code::ImportTaskAlreadyReceived,
Error::ImportTaskUnknownRemote(_) => Code::ImportTaskUnknownRemote,
Error::RequiresEnterpriseEdition { .. } => Code::RequiresEnterpriseEdition,
Error::S3Error { status, .. } if status.is_client_error() => {
Code::InvalidS3SnapshotRequest
}
@@ -345,3 +369,12 @@ impl ErrorCode for Error {
}
}
}
impl From<ReceiveTaskError> for Error {
fn from(value: ReceiveTaskError) -> Self {
match value {
ReceiveTaskError::UnknownRemote(unknown) => Error::ImportTaskUnknownRemote(unknown),
ReceiveTaskError::DuplicateTask(dup) => Error::ImportTaskAlreadyReceived(dup),
}
}
}

View File

@@ -1,9 +1,9 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
use meilisearch_types::network::Network;
use crate::error::FeatureNotEnabledError;
use crate::Result;
@@ -38,6 +38,10 @@ impl RoFeatures {
Self { runtime }
}
pub fn from_runtime_features(features: RuntimeTogglableFeatures) -> Self {
Self { runtime: features }
}
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
self.runtime
}

View File

@@ -361,6 +361,12 @@ impl IndexMapper {
Ok(())
}
/// The number of indexes in the database
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
pub fn index_count(&self, rtxn: &RoTxn) -> Result<u64> {
Ok(self.index_mapping.len(rtxn)?)
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =

View File

@@ -6,7 +6,7 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning;
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
@@ -27,6 +27,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
queue,
scheduler,
persisted,
export_default_payload_size_bytes: _,
index_mapper,
features: _,
@@ -320,11 +321,18 @@ fn snapshot_details(d: &Details) -> String {
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
}
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
if to == &(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) {
format!("{{ from: {from:?}, to: [current version] }}")
} else {
format!("{{ from: {from:?}, to: {to:?} }}")
}
}
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
}
Details::NetworkTopologyChange { moved_documents, message } => {
format!("{{ moved_documents: {moved_documents:?}, message: {message:?}")
}
}
}
@@ -400,7 +408,21 @@ pub fn snapshot_batch(batch: &Batch) -> String {
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
let details = if let Some(upgrade_to) = &details.upgrade_to {
if upgrade_to.as_str()
== format!("v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}").as_str()
{
let mut details = details.clone();
details.upgrade_to = Some("[current version]".into());
serde_json::to_string(&details).unwrap()
} else {
serde_json::to_string(details).unwrap()
}
} else {
serde_json::to_string(details).unwrap()
};
snap.push_str(&format!("details: {details}, "));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
if !embedder_stats.skip_serializing() {
snap.push_str(&format!(

View File

@@ -48,13 +48,13 @@ use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::time::Duration;
use byte_unit::Byte;
use dump::Dump;
pub use error::Error;
pub use features::RoFeatures;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
};
@@ -67,11 +67,14 @@ use meilisearch_types::milli::vector::{
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
};
use meilisearch_types::milli::{self, Index};
use meilisearch_types::network::Network;
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
use meilisearch_types::tasks::network::{
DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork,
};
use meilisearch_types::tasks::{KindWithContent, Task};
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks;
pub use queue::Query;
use queue::Queue;
use roaring::RoaringBitmap;
@@ -82,6 +85,7 @@ use uuid::Uuid;
use versioning::Versioning;
use crate::index_mapper::IndexMapper;
use crate::processing::ProcessingTasks;
use crate::utils::clamp_to_page_size;
pub(crate) type BEI128 = I128<BE>;
@@ -144,9 +148,11 @@ pub struct IndexSchedulerOptions {
/// If the autobatcher is allowed to automatically batch tasks
/// it will only batch this defined maximum size (in bytes) of tasks at once.
pub batched_tasks_size_limit: u64,
/// The maximum size of the default payload for exporting documents, in bytes
pub export_default_payload_size_bytes: Byte,
/// The experimental features enabled for this instance.
pub instance_features: InstanceTogglableFeatures,
/// The experimental features enabled for this instance.
/// Whether the index scheduler is able to auto upgrade or not.
pub auto_upgrade: bool,
/// The maximal number of entries in the search query cache of an embedder.
///
@@ -199,6 +205,9 @@ pub struct IndexScheduler {
/// to the same embeddings for the same input text.
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
/// The maximum size of the default payload for exporting documents, in bytes
pub export_default_payload_size_bytes: Byte,
// ================= test
// The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -234,6 +243,7 @@ impl IndexScheduler {
cleanup_enabled: self.cleanup_enabled,
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
persisted: self.persisted,
export_default_payload_size_bytes: self.export_default_payload_size_bytes,
webhooks: self.webhooks.clone(),
embedders: self.embedders.clone(),
@@ -345,6 +355,7 @@ impl IndexScheduler {
persisted,
webhooks: Arc::new(webhooks),
embedders: Default::default(),
export_default_payload_size_bytes: options.export_default_payload_size_bytes,
#[cfg(test)] // Will be replaced in `new_tests` in test environments
test_breakpoint_sdr: crossbeam_channel::bounded(0).0,
@@ -700,14 +711,14 @@ impl IndexScheduler {
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
}
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
pub fn set_task_network(&self, task_id: TaskId, network: DbTaskNetwork) -> Result<Task> {
let mut wtxn = self.env.write_txn()?;
let mut task =
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
task.network = Some(network);
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
wtxn.commit()?;
Ok(())
Ok(task)
}
/// Return the batches matching the query from the user's point of view along
@@ -757,18 +768,30 @@ impl IndexScheduler {
task_id: Option<TaskId>,
dry_run: bool,
) -> Result<Task> {
self.register_with_custom_metadata(kind, task_id, None, dry_run)
self.register_with_custom_metadata(kind, task_id, None, dry_run, None)
}
/// Register a new task in the scheduler, with metadata.
///
/// If it fails and data was associated with the task, it tries to delete the associated data.
///
/// # Parameters
///
/// - task_network: network of the task to check.
///
/// If the task is an import task, only accept it if:
///
/// 1. There is an ongoing network topology change task
/// 2. The task to register matches the network version of the network topology change task
///
/// Always accept the task if it is not an import task.
pub fn register_with_custom_metadata(
&self,
kind: KindWithContent,
task_id: Option<TaskId>,
custom_metadata: Option<String>,
dry_run: bool,
task_network: Option<TaskNetwork>,
) -> Result<Task> {
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
@@ -779,7 +802,19 @@ impl IndexScheduler {
}
let mut wtxn = self.env.write_txn()?;
let task = self.queue.register(&mut wtxn, &kind, task_id, custom_metadata, dry_run)?;
if let Some(TaskNetwork::Import { import_from, network_change, metadata }) = &task_network {
self.update_network_task(&mut wtxn, import_from, network_change, metadata)?;
}
let task = self.queue.register(
&mut wtxn,
&kind,
task_id,
custom_metadata,
dry_run,
task_network.map(DbTaskNetwork::from),
)?;
// If the registered task is a task cancelation
// we inform the processing tasks to stop (if necessary).
@@ -801,6 +836,91 @@ impl IndexScheduler {
Ok(task)
}
pub fn network_no_index_for_remote(
&self,
remote_name: String,
origin: Origin,
) -> Result<(), Error> {
let mut wtxn = self.env.write_txn()?;
self.update_network_task(
&mut wtxn,
&ImportData { remote_name, index_name: None, document_count: 0 },
&origin,
&ImportMetadata { index_count: 0, task_key: None, total_index_documents: 0 },
)?;
wtxn.commit()?;
// wake up the scheduler as the task state has changed
self.scheduler.wake_up.signal();
Ok(())
}
fn update_network_task(
&self,
wtxn: &mut heed::RwTxn<'_>,
import_from: &ImportData,
network_change: &Origin,
metadata: &ImportMetadata,
) -> Result<(), Error> {
let mut network_tasks = self
.queue
.tasks
.get_kind(&*wtxn, meilisearch_types::tasks::Kind::NetworkTopologyChange)?;
if network_tasks.is_empty() {
return Err(Error::ImportTaskWithoutNetworkTask);
}
let network_task = {
let processing = self.processing_tasks.read().unwrap().processing.clone();
if processing.is_disjoint(&network_tasks) {
let enqueued = self
.queue
.tasks
.get_status(&*wtxn, meilisearch_types::tasks::Status::Enqueued)?;
network_tasks &= enqueued;
if let Some(network_task) = network_tasks.into_iter().next() {
network_task
} else {
return Err(Error::ImportTaskWithoutNetworkTask);
}
} else {
network_tasks &= &*processing;
network_tasks.into_iter().next().unwrap()
}
};
let mut network_task = self.queue.tasks.get_task(&*wtxn, network_task)?.unwrap();
let network_task_version = network_task
.network
.as_ref()
.map(|network| network.network_version())
.unwrap_or_default();
if network_task_version != network_change.network_version {
return Err(Error::NetworkVersionMismatch {
network_task: network_task_version,
import_task: network_change.network_version,
});
}
let KindWithContent::NetworkTopologyChange(network_topology_change) =
&mut network_task.kind
else {
tracing::error!("unexpected network kind for network task while registering task");
return Err(Error::CorruptedTaskQueue);
};
network_topology_change.receive_remote_task(
&import_from.remote_name,
import_from.index_name.as_deref(),
metadata.task_key,
import_from.document_count,
metadata.index_count,
metadata.total_index_documents,
)?;
self.queue.tasks.update_task(wtxn, &mut network_task)?;
Ok(())
}
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {

View File

@@ -42,12 +42,10 @@ impl ProcessingTasks {
/// Set the processing tasks to an empty list
pub fn stop_processing(&mut self) -> Self {
self.progress = None;
Self {
batch: std::mem::take(&mut self.batch),
processing: std::mem::take(&mut self.processing),
progress: None,
progress: std::mem::take(&mut self.progress),
}
}

View File

@@ -15,6 +15,7 @@ use file_store::FileStore;
use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::network::DbTaskNetwork;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use time::format_description::well_known::Rfc3339;
@@ -259,6 +260,7 @@ impl Queue {
task_id: Option<TaskId>,
custom_metadata: Option<String>,
dry_run: bool,
network: Option<DbTaskNetwork>,
) -> Result<Task> {
let next_task_id = self.tasks.next_task_id(wtxn)?;
@@ -280,7 +282,7 @@ impl Queue {
details: kind.default_details(),
status: Status::Enqueued,
kind: kind.clone(),
network: None,
network,
custom_metadata,
};
// For deletion and cancelation tasks, we want to make extra sure that they
@@ -348,6 +350,7 @@ impl Queue {
None,
None,
false,
None,
)?;
Ok(())

View File

@@ -3,7 +3,8 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status, Task};
use meilisearch_types::tasks::network::DbTaskNetwork;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
@@ -114,14 +115,15 @@ impl TaskQueue {
/// - CorruptedTaskQueue: The task doesn't exist in the database
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
let reprocessing = old_task.status != Status::Enqueued;
// network topology tasks may be processed multiple times.
let maybe_reprocessing = old_task.status != Status::Enqueued
|| task.kind.as_kind() == Kind::NetworkTopologyChange;
debug_assert!(old_task != *task);
debug_assert_eq!(old_task.uid, task.uid);
// If we're processing a task that failed it may already contains a batch_uid
debug_assert!(
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
maybe_reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
"\n==> old: {old_task:?}\n==> new: {task:?}"
);
@@ -143,13 +145,24 @@ impl TaskQueue {
})?;
}
// Avoids rewriting part of the network topology change because of TOCTOU errors
if let (
KindWithContent::NetworkTopologyChange(old_state),
KindWithContent::NetworkTopologyChange(new_state),
) = (old_task.kind, &mut task.kind)
{
new_state.merge(old_state);
// the state possibly just changed, rewrite the details
task.details = Some(new_state.to_details());
}
assert_eq!(
old_task.enqueued_at, task.enqueued_at,
"Cannot update a task's enqueued_at time"
);
if old_task.started_at != task.started_at {
assert!(
reprocessing || old_task.started_at.is_none(),
maybe_reprocessing || old_task.started_at.is_none(),
"Cannot update a task's started_at time"
);
if let Some(started_at) = old_task.started_at {
@@ -161,7 +174,7 @@ impl TaskQueue {
}
if old_task.finished_at != task.finished_at {
assert!(
reprocessing || old_task.finished_at.is_none(),
maybe_reprocessing || old_task.finished_at.is_none(),
"Cannot update a task's finished_at time"
);
if let Some(finished_at) = old_task.finished_at {
@@ -175,7 +188,16 @@ impl TaskQueue {
task.network = match (old_task.network, task.network.take()) {
(None, None) => None,
(None, Some(network)) | (Some(network), None) => Some(network),
(Some(_), Some(network)) => Some(network),
(Some(left), Some(right)) => Some(match (left, right) {
(
DbTaskNetwork::Remotes { remote_tasks: mut left, network_version: _ },
DbTaskNetwork::Remotes { remote_tasks: mut right, network_version },
) => {
left.append(&mut right);
DbTaskNetwork::Remotes { remote_tasks: left, network_version }
}
(_, right) => right,
}),
};
self.all_tasks.put(wtxn, &task.uid, task)?;

View File

@@ -203,26 +203,30 @@ fn test_disable_auto_deletion_of_tasks() {
)
.unwrap();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
}
// now we're above the max number of tasks
// and if we try to advance in the tick function no new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
}
}
#[test]
@@ -267,59 +271,69 @@ fn test_auto_deletion_of_tasks() {
)
.unwrap();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
}
// now we're above the max number of tasks
// and if we try to advance in the tick function a new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
drop(rtxn);
drop(proc);
{
// now we're above the max number of tasks
// and if we try to advance in the tick function a new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
}
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
drop(rtxn);
drop(proc);
{
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
}
handle.advance_one_failed_batch();
// a new task deletion has been enqueued
handle.advance_one_successful_batch();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
}
handle.advance_one_failed_batch();
handle.advance_one_successful_batch();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
}
}
#[test]

View File

@@ -74,6 +74,7 @@ impl From<KindWithContent> for AutobatchKind {
| KindWithContent::DumpCreation { .. }
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::NetworkTopologyChange(_)
| KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
}

View File

@@ -0,0 +1,27 @@
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::tasks::Task;
use super::create_batch::Batch;
use crate::scheduler::process_batch::ProcessBatchInfo;
use crate::utils::ProcessingBatch;
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
pub(super) fn process_network_index_batch(
&self,
_network_task: Task,
_inner_batch: Box<Batch>,
_current_batch: &mut ProcessingBatch,
_progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
}
pub(super) fn process_network_ready(
&self,
_task: Task,
_progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
}
}

View File

@@ -4,6 +4,7 @@ use std::io::ErrorKind;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::network::NetworkTopologyState;
use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use uuid::Uuid;
@@ -59,6 +60,14 @@ pub(crate) enum Batch {
index_uid: String,
task: Task,
},
#[allow(clippy::enum_variant_names)] // warranted because we are executing an inner index batch
NetworkIndexBatch {
network_task: Task,
inner_batch: Box<Batch>,
},
NetworkReady {
task: Task,
},
}
#[derive(Debug)]
@@ -140,9 +149,14 @@ impl Batch {
..
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
},
Batch::IndexSwap { task } => {
Batch::IndexSwap { task } | Batch::NetworkReady { task } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
Batch::NetworkIndexBatch { network_task, inner_batch } => {
let mut tasks = inner_batch.ids();
tasks.insert(network_task.uid);
tasks
}
}
}
@@ -156,12 +170,14 @@ impl Batch {
| Dump(_)
| Export { .. }
| UpgradeDatabase { .. }
| NetworkReady { .. }
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. }
| IndexCompaction { index_uid, .. } => Some(index_uid),
NetworkIndexBatch { network_task: _, inner_batch } => inner_batch.index_uid(),
}
}
}
@@ -184,6 +200,8 @@ impl fmt::Display for Batch {
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
Batch::Export { .. } => f.write_str("Export")?,
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
Batch::NetworkIndexBatch { .. } => f.write_str("NetworkTopologyChange")?,
Batch::NetworkReady { .. } => f.write_str("NetworkTopologyChange")?,
};
match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
@@ -452,6 +470,7 @@ impl IndexScheduler {
pub(crate) fn create_next_batch(
&self,
rtxn: &RoTxn,
processing_network_tasks: &RoaringBitmap,
) -> Result<Option<(Batch, ProcessingBatch)>> {
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
@@ -460,7 +479,6 @@ impl IndexScheduler {
let mut current_batch = ProcessingBatch::new(batch_id);
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let count_total_enqueued = enqueued.len();
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
// 0. we get the last task to cancel.
@@ -509,7 +527,15 @@ impl IndexScheduler {
)));
}
// 2. we get the next task to delete
// 2. Check for enqueued network topology changes
let network_changes = self.queue.tasks.get_kind(rtxn, Kind::NetworkTopologyChange)?
& (enqueued | processing_network_tasks);
if let Some(task_id) = network_changes.iter().next() {
let task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
return self.start_processing_network(rtxn, task, enqueued, current_batch);
}
// 3. we get the next task to delete
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
@@ -519,7 +545,7 @@ impl IndexScheduler {
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
}
// 3. we get the next task to compact
// 4. we get the next task to compact
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
if let Some(task_id) = to_compact.min() {
let mut task =
@@ -534,7 +560,7 @@ impl IndexScheduler {
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
}
// 4. we batch the export.
// 5. we batch the export.
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
if !to_export.is_empty() {
let task_id = to_export.iter().next().expect("There must be at least one export task");
@@ -545,7 +571,7 @@ impl IndexScheduler {
return Ok(Some((Batch::Export { task }, current_batch)));
}
// 5. we batch the snapshot.
// 6. we batch the snapshot.
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
if !to_snapshot.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
@@ -555,7 +581,7 @@ impl IndexScheduler {
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
}
// 6. we batch the dumps.
// 7. we batch the dumps.
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
if let Some(to_dump) = to_dump.min() {
let mut task =
@@ -568,25 +594,66 @@ impl IndexScheduler {
return Ok(Some((Batch::Dump(task), current_batch)));
}
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
let network = self.network();
// If the task is not associated with any index, verify that it is an index swap and
// create the batch directly. Otherwise, get the index name associated with the task
// and use the autobatcher to batch the enqueued tasks associated with it
// 8. We make a batch from the unprioritised tasks.
let (batch, current_batch) =
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
// We want to execute all tasks, except those that have a version strictly higher than the network version
let index_name = if let Some(&index_name) = task.indexes().first() {
index_name
} else {
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
current_batch.processing(Some(&mut task));
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
kind: Kind::IndexSwap,
id: task.uid,
});
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
let Some(task_version) =
task.network.as_ref().map(|tastk_network| tastk_network.network_version())
else {
// do not skip tasks that have no network version, otherwise we will never execute them
return false;
};
// skip tasks with a version strictly higher than the network version
task_version > network.version
})?;
Ok(batch.map(|batch| (batch, current_batch)))
}
fn create_next_batch_unprioritized<F>(
&self,
rtxn: &RoTxn,
enqueued: &RoaringBitmap,
mut current_batch: ProcessingBatch,
mut skip_if: F,
) -> Result<(Option<Batch>, ProcessingBatch)>
where
F: FnMut(&Task) -> bool,
{
let count_total_enqueued = enqueued.len();
let mut enqueued_it = enqueued.iter();
let mut task;
let index_name = loop {
let Some(task_id) = enqueued_it.next() else {
return Ok((None, current_batch));
};
task = self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
if skip_if(&task) {
continue;
}
// If the task is not associated with any index, verify that it is an index swap and
// create the batch directly. Otherwise, get the index name associated with the task
// and use the autobatcher to batch the enqueued tasks associated with it
if let Some(&index_name) = task.indexes().first() {
break index_name;
} else {
assert!(
matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())
);
current_batch.processing(Some(&mut task));
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
kind: Kind::IndexSwap,
id: task.uid,
});
return Ok((Some(Batch::IndexSwap { task }), current_batch));
};
};
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
@@ -621,6 +688,10 @@ impl IndexScheduler {
.get_task(rtxn, task_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
if skip_if(&task) {
continue;
}
if let Some(uuid) = task.content_uuid() {
let content_size = match self.queue.file_store.compute_size(uuid) {
Ok(content_size) => content_size,
@@ -651,19 +722,127 @@ impl IndexScheduler {
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
{
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
return Ok(self
.create_next_batch_index(
rtxn,
index_name.to_string(),
batchkind,
&mut current_batch,
create_index,
)?
.map(|batch| (batch, current_batch)));
let batch = self.create_next_batch_index(
rtxn,
index_name.to_string(),
batchkind,
&mut current_batch,
create_index,
)?;
return Ok((batch, current_batch));
}
// If we found no tasks then we were notified for something that got autobatched
// somehow and there is nothing to do.
Ok(None)
Ok((None, current_batch))
}
fn start_processing_network(
&self,
rtxn: &RoTxn,
mut task: Task,
enqueued: &RoaringBitmap,
mut current_batch: ProcessingBatch,
) -> Result<Option<(Batch, ProcessingBatch)>> {
current_batch.processing(Some(&mut task));
current_batch.reason(BatchStopReason::NetworkTask { id: task.uid });
let change_version =
task.network.as_ref().map(|network| network.network_version()).unwrap_or_default();
let KindWithContent::NetworkTopologyChange(network_topology_change) = &task.kind else {
panic!("inconsistent kind with content")
};
match network_topology_change.state() {
NetworkTopologyState::WaitingForOlderTasks => {
let res =
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
// in this limited mode of execution, we only want to run tasks:
// 0. with an index
// 1. with a version
// 2. that version strictly lower than the network task version
// 0. skip indexless tasks that are not index swap
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
return true;
}
// 1. skip tasks without version
let Some(task_version) =
task.network.as_ref().map(|network| network.network_version())
else {
return true;
};
// 2. skip tasks with a version equal or higher to the network task version
task_version >= change_version
});
let (batch, mut current_batch) = res?;
let batch = match batch {
Some(batch) => {
let inner_batch = Box::new(batch);
let inner_reason = current_batch.reason.to_string();
current_batch.reason(BatchStopReason::NetworkTaskOlderTasks {
id: task.uid,
inner_reason,
});
Batch::NetworkIndexBatch { network_task: task, inner_batch }
}
None => Batch::NetworkReady { task },
};
Ok(Some((batch, current_batch)))
}
NetworkTopologyState::ImportingDocuments => {
// if the import is done we need to go to the next state
if network_topology_change.is_import_finished() {
return Ok(Some((Batch::NetworkReady { task }, current_batch)));
}
let res =
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
// in this limited mode of execution, we only want to run tasks:
// 0. with an index
// 1. with a version
// 2. that version equal to the network task version
// 0. skip indexless tasks
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
return true;
}
// 1. skip tasks without version
let Some(task_version) =
task.network.as_ref().map(|network| network.network_version())
else {
return true;
};
// 2. skip tasks with a version different from the network task version
task_version != change_version
});
let (batch, mut current_batch) = res?;
let batch = batch.map(|batch| {
let inner_batch = Box::new(batch);
let inner_reason = current_batch.reason.to_string();
current_batch.reason(BatchStopReason::NetworkTaskImportTasks {
id: task.uid,
inner_reason,
});
(Batch::NetworkIndexBatch { network_task: task, inner_batch }, current_batch)
});
Ok(batch)
}
NetworkTopologyState::ExportingDocuments | NetworkTopologyState::Finished => {
Ok(Some((Batch::NetworkReady { task }, current_batch)))
}
}
}
}

View File

@@ -0,0 +1,308 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::time::Duration;
use bumpalo::Bump;
use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer;
use meilisearch_types::milli::update::new::indexer::current_edition::sharding::Shards;
use meilisearch_types::milli::{self};
use meilisearch_types::network::Remote;
use meilisearch_types::tasks::network::{NetworkTopologyState, Origin};
use meilisearch_types::tasks::{KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use super::create_batch::Batch;
use crate::scheduler::process_batch::ProcessBatchInfo;
use crate::scheduler::process_export::{ExportContext, ExportOptions, TargetInstance};
use crate::utils::ProcessingBatch;
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
pub(super) fn process_network_index_batch(
&self,
mut network_task: Task,
inner_batch: Box<Batch>,
current_batch: &mut ProcessingBatch,
progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
let KindWithContent::NetworkTopologyChange(network_topology_change) =
&mut network_task.kind
else {
tracing::error!("unexpected network kind for network task while processing batch");
return Err(Error::CorruptedTaskQueue);
};
let network = network_topology_change.network_for_state();
let (mut tasks, info) =
self.process_batch(*inner_batch, current_batch, progress, network)?;
for task in &tasks {
let Some(network) = task.network.as_ref() else {
continue;
};
let Some(import) = network.import_data() else {
continue;
};
if let Some(index_name) = import.index_name.as_deref() {
network_topology_change.process_remote_tasks(
&import.remote_name,
index_name,
import.document_count,
);
}
}
network_task.details = Some(network_topology_change.to_details());
tasks.push(network_task);
Ok((tasks, info))
}
pub(super) fn process_network_ready(
&self,
mut task: Task,
progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
let KindWithContent::NetworkTopologyChange(network_topology_change) = &mut task.kind else {
tracing::error!("network topology change task has the wrong kind with content");
return Err(Error::CorruptedTaskQueue);
};
let Some(task_network) = &task.network else {
tracing::error!("network topology change task has no network");
return Err(Error::CorruptedTaskQueue);
};
let origin;
let origin = match task_network.origin() {
Some(origin) => origin,
None => {
let myself = network_topology_change.in_name().expect("origin is not the leader");
origin = Origin {
remote_name: myself.to_string(),
task_uid: task.uid,
network_version: task_network.network_version(),
};
&origin
}
};
let mut moved_documents = None;
if let (Some((remotes, out_name)), Some(new_shards)) =
(network_topology_change.export_to_process(), network_topology_change.new_shards())
{
moved_documents = Some(self.balance_documents(
remotes,
out_name,
new_shards,
origin,
&progress,
&self.scheduler.must_stop_processing,
)?);
}
if let Some(moved_documents) = moved_documents {
// we need the mut moved documents to avoid a lifetime error in the previous if let.
network_topology_change.set_moved(moved_documents);
}
network_topology_change.update_state();
if network_topology_change.state() == NetworkTopologyState::Finished {
task.status = Status::Succeeded;
}
task.details = Some(network_topology_change.to_details());
Ok((vec![task], Default::default()))
}
fn balance_documents<'a, I: Iterator<Item = (&'a str, &'a Remote)> + Clone>(
&self,
remotes: I,
out_name: &str,
new_shards: Shards,
network_change_origin: &Origin,
progress: &Progress,
must_stop_processing: &crate::scheduler::MustStopProcessing,
) -> crate::Result<u64> {
// TECHDEBT: this spawns a `ureq` agent additionally to `reqwest`. We probably want to harmonize all of this.
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let mut indexer_alloc = Bump::new();
let scheduler_rtxn = self.env.read_txn()?;
let index_count = self.index_mapper.index_count(&scheduler_rtxn)?;
// when the instance is empty, we still need to tell that to remotes, as they cannot know of that fact and will be waiting for
// data
if index_count == 0 {
for (remote_name, remote) in remotes {
let target = TargetInstance {
remote_name: Some(remote_name),
base_url: &remote.url,
api_key: remote.write_api_key.as_deref(),
};
let res = self.export_no_index(
target,
out_name,
network_change_origin,
&agent,
must_stop_processing,
);
if let Err(err) = res {
tracing::warn!("Could not signal not to wait documents to `{remote_name}` due to error: {err}");
}
}
return Ok(0);
}
let mut total_moved_documents = 0;
self.index_mapper.try_for_each_index::<(), ()>(
&scheduler_rtxn,
|index_uid, index| -> crate::Result<()> {
indexer_alloc.reset();
let err = |err| Error::from_milli(err, Some(index_uid.to_string()));
let index_rtxn = index.read_txn()?;
let all_docids = index.external_documents_ids();
let mut documents_to_move_to =
hashbrown::HashMap::<String, RoaringBitmap>::new();
let mut documents_to_delete = RoaringBitmap::new();
for res in all_docids.iter(&index_rtxn)? {
let (external_docid, docid) = res?;
match new_shards.processing_shard(external_docid) {
Some(shard) if shard.is_own => continue,
Some(shard) => {
documents_to_move_to.entry_ref(&shard.name).or_default().insert(docid);
}
None => {
documents_to_delete.insert(docid);
}
}
}
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
for (remote_name, remote) in remotes.clone() {
let documents_to_move =
documents_to_move_to.remove(remote_name).unwrap_or_default();
let target = TargetInstance {
remote_name: Some(remote_name),
base_url: &remote.url,
api_key: remote.write_api_key.as_deref(),
};
let options = ExportOptions {
index_uid,
payload_size: None,
override_settings: false,
export_mode: super::process_export::ExportMode::NetworkBalancing {
index_count,
export_old_remote_name: out_name,
network_change_origin,
},
};
let ctx = ExportContext {
index,
index_rtxn: &index_rtxn,
universe: &documents_to_move,
progress,
agent: &agent,
must_stop_processing,
};
let res = self.export_one_index(target, options, ctx);
match res {
Ok(_) =>{ documents_to_delete |= documents_to_move;}
Err(err) => {
tracing::warn!("Could not export documents to `{remote_name}` due to error: {err}\n - Note: Documents will be kept");
}
}
}
if documents_to_delete.is_empty() {
return Ok(());
}
total_moved_documents += documents_to_delete.len();
self.delete_documents_from_index(progress, must_stop_processing, &indexer_alloc, index_uid, index, &err, index_rtxn, documents_to_delete, fields_ids_map)
},
)?;
Ok(total_moved_documents)
}
#[allow(clippy::too_many_arguments)]
fn delete_documents_from_index(
&self,
progress: &Progress,
must_stop_processing: &super::MustStopProcessing,
indexer_alloc: &Bump,
index_uid: &str,
index: &milli::Index,
err: &impl Fn(milli::Error) -> Error,
index_rtxn: milli::heed::RoTxn<'_, milli::heed::WithoutTls>,
documents_to_delete: RoaringBitmap,
fields_ids_map: milli::FieldsIdsMap,
) -> std::result::Result<(), Error> {
let mut new_fields_ids_map = fields_ids_map.clone();
// candidates not empty => index not empty => a primary key is set
let primary_key = index.primary_key(&index_rtxn)?.unwrap();
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(milli::Error::from)
.map_err(err)?;
let mut index_wtxn = index.write_txn()?;
let mut indexer = indexer::DocumentDeletion::new();
indexer.delete_documents_by_docids(documents_to_delete);
let document_changes = indexer.into_changes(indexer_alloc, primary_key);
let embedders = index
.embedding_configs()
.embedding_configs(&index_wtxn)
.map_err(milli::Error::from)
.map_err(err)?;
let embedders = self.embedders(index_uid.to_string(), embedders)?;
let indexer_config = self.index_mapper.indexer_config();
let pool = &indexer_config.thread_pool;
indexer::index(
&mut index_wtxn,
index,
pool,
indexer_config.grenad_parameters(),
&fields_ids_map,
new_fields_ids_map,
None, // document deletion never changes primary key
&document_changes,
embedders,
&|| must_stop_processing.get(),
progress,
&EmbedderStats::default(),
)
.map_err(err)?;
// update stats
let mut mapper_wtxn = self.env.write_txn()?;
let stats = crate::index_mapper::IndexStats::new(index, &index_wtxn).map_err(err)?;
self.index_mapper.store_stats_of(&mut mapper_wtxn, index_uid, &stats)?;
index_wtxn.commit()?;
// update stats after committing changes to index
mapper_wtxn.commit()?;
Ok(())
}
}

View File

@@ -1,7 +1,12 @@
mod autobatcher;
#[cfg(test)]
mod autobatcher_test;
#[cfg(not(feature = "enterprise"))]
mod community_edition;
mod create_batch;
#[cfg(feature = "enterprise")]
mod enterprise_edition;
mod process_batch;
mod process_dump_creation;
mod process_export;
@@ -21,7 +26,6 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli;
@@ -133,6 +137,7 @@ impl Scheduler {
max_number_of_tasks: _,
max_number_of_batched_tasks,
batched_tasks_size_limit,
export_default_payload_size_bytes: _,
instance_features: _,
auto_upgrade: _,
embedding_cache_cap,
@@ -178,6 +183,8 @@ impl IndexScheduler {
self.breakpoint(crate::test_utils::Breakpoint::Start);
}
let previous_processing_batch = self.processing_tasks.write().unwrap().stop_processing();
if self.cleanup_enabled {
let mut wtxn = self.env.write_txn()?;
self.queue.cleanup_task_queue(&mut wtxn)?;
@@ -185,11 +192,16 @@ impl IndexScheduler {
}
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
let (batch, mut processing_batch) =
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
Some(batch) => batch,
None => return Ok(TickOutcome::WaitForSignal),
};
let (batch, mut processing_batch) = match self
.create_next_batch(&rtxn, &previous_processing_batch.processing)
.map_err(|e| Error::CreateBatch(Box::new(e)))?
{
Some(batch) => batch,
None => {
*self.processing_tasks.write().unwrap() = previous_processing_batch;
return Ok(TickOutcome::WaitForSignal);
}
};
let index_uid = batch.index_uid().map(ToOwned::to_owned);
drop(rtxn);
@@ -219,7 +231,12 @@ impl IndexScheduler {
let handle = std::thread::Builder::new()
.name(String::from("batch-operation"))
.spawn_scoped(s, move || {
cloned_index_scheduler.process_batch(batch, processing_batch, p)
cloned_index_scheduler.process_batch(
batch,
processing_batch,
p,
&self.network(),
)
})
.unwrap();
@@ -260,7 +277,14 @@ impl IndexScheduler {
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
progress.update_progress(BatchProgress::WritingTasksToDisk);
processing_batch.finished();
// whether the batch made progress.
// a batch make progress if it failed or if it contains at least one fully processed (or cancelled) task.
//
// if a batch did not make progress, it means that all of its tasks are waiting on the scheduler to make progress,
// and so we must wait for new tasks. Such a batch is not persisted to DB, and is resumed on the next tick.
let mut batch_made_progress = false;
let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new();
@@ -281,7 +305,11 @@ impl IndexScheduler {
#[allow(unused_variables)]
for (i, mut task) in tasks.into_iter().enumerate() {
task_progress.fetch_add(1, Ordering::Relaxed);
processing_batch.update(&mut task);
processing_batch.update_from_task(&task);
if !matches!(task.status, Status::Processing | Status::Enqueued) {
batch_made_progress = true;
processing_batch.finish_task(&mut task);
}
if task.status == Status::Canceled {
canceled.insert(task.uid);
canceled_by = task.canceled_by;
@@ -348,6 +376,9 @@ impl IndexScheduler {
}
// In case of a failure we must get back and patch all the tasks with the error.
Err(err) => {
// always persist failed batches
batch_made_progress = true;
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
@@ -371,7 +402,10 @@ impl IndexScheduler {
task.status = Status::Failed;
task.error = Some(error.clone());
task.details = task.details.map(|d| d.to_failed());
processing_batch.update(&mut task);
processing_batch.update_from_task(&task);
if !matches!(task.status, Status::Processing | Status::Enqueued) {
processing_batch.finish_task(&mut task);
}
#[cfg(test)]
self.maybe_fail(
@@ -394,44 +428,12 @@ impl IndexScheduler {
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
process_batch_info;
processing_batch.stats.progress_trace =
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
let mut congestion_info = serde_json::Map::new();
congestion_info.insert("attempts".into(), congestion.attempts.into());
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info
});
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use std::cmp::Ordering::{Equal, Greater, Less};
use byte_unit::Byte;
use byte_unit::UnitType::Binary;
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
processing_batch.write_stats(
&progress,
congestion,
pre_commit_dabases_sizes,
post_commit_dabases_sizes,
);
if let Some(congestion) = congestion {
tracing::debug!(
@@ -444,46 +446,49 @@ impl IndexScheduler {
tracing::debug!("call trace: {:?}", progress.accumulated_durations());
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
if batch_made_progress {
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
}
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
wtxn.commit().map_err(Error::HeedTransaction)?;
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
// and then become « not found » for some time until the commit everything is written and the final commit is made.
self.processing_tasks.write().unwrap().stop_processing();
if batch_made_progress {
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
// and then become « not found » for some time until the commit everything is written and the final commit is made.
self.processing_tasks.write().unwrap().stop_processing();
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
tracing::debug!("Deleting the update files");
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
tracing::debug!("Deleting the update files");
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
let idx = AtomicU32::new(0);
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
let rtxn = self.read_txn()?;
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
let task = self
.queue
.tasks
.get_task(&rtxn, id)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
.ok_or(Error::CorruptedTaskQueue)?;
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
tracing::error!(
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
let idx = AtomicU32::new(0);
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
let rtxn = self.read_txn()?;
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
let task = self
.queue
.tasks
.get_task(&rtxn, id)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
.ok_or(Error::CorruptedTaskQueue)?;
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
tracing::error!(
"Failure to delete the content files associated with task {}. Error: {e}",
task.uid
);
}
}
}
Ok(())
})?;
Ok(())
})?;
self.notify_webhooks(ids);
self.notify_webhooks(ids);
}
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
if stop_scheduler_forever {
Ok(TickOutcome::StopProcessingForever)
} else {

View File

@@ -10,6 +10,7 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self, ChannelCongestion};
use meilisearch_types::network::Network;
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use milli::update::Settings as MilliSettings;
@@ -55,6 +56,7 @@ impl IndexScheduler {
batch: Batch,
current_batch: &mut ProcessingBatch,
progress: Progress,
network: &Network,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
#[cfg(test)]
{
@@ -176,6 +178,7 @@ impl IndexScheduler {
op,
&progress,
current_batch.embedder_stats.clone(),
network,
)?;
{
@@ -235,6 +238,7 @@ impl IndexScheduler {
Batch::IndexUpdate { index_uid, primary_key, new_index_uid: None, task },
current_batch,
progress,
network,
)
}
Batch::IndexUpdate { index_uid, primary_key, new_index_uid, mut task } => {
@@ -539,6 +543,10 @@ impl IndexScheduler {
Ok((tasks, ProcessBatchInfo::default()))
}
Batch::NetworkIndexBatch { network_task, inner_batch } => {
self.process_network_index_batch(network_task, inner_batch, current_batch, progress)
}
Batch::NetworkReady { task } => self.process_network_ready(task, progress),
}
}

View File

@@ -1,5 +1,6 @@
use std::collections::BTreeMap;
use std::io::{self, Write as _};
use std::ops::ControlFlow;
use std::sync::atomic;
use std::time::Duration;
@@ -7,6 +8,7 @@ use backoff::ExponentialBackoff;
use byte_unit::Byte;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::error::Code;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
@@ -15,7 +17,10 @@ use meilisearch_types::milli::update::{request_threads, Setting};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
use meilisearch_types::settings::{self, SecretPolicy};
use meilisearch_types::tasks::network::headers::SetHeader as _;
use meilisearch_types::tasks::network::{headers, ImportData, ImportMetadata, Origin};
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
use roaring::RoaringBitmap;
use serde::Deserialize;
use ureq::{json, Response};
@@ -50,6 +55,7 @@ impl IndexScheduler {
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let must_stop_processing = self.scheduler.must_stop_processing.clone();
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
let err = |err| Error::from_milli(err, Some(uid.to_string()));
if must_stop_processing.get() {
return Err(Error::AbortedTask);
}
@@ -61,261 +67,474 @@ impl IndexScheduler {
));
let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?;
let index_rtxn = index.read_txn()?;
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
// First, check if the index already exists
let url = format!("{base_url}/indexes/{uid}");
let response = retry(&must_stop_processing, || {
let mut request = agent.get(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
});
let index_exists = match response {
Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
false
}
Err(e) => return Err(e),
};
let primary_key = index
.primary_key(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Create the index
if !index_exists {
let url = format!("{base_url}/indexes");
retry(&must_stop_processing, || {
let mut request = agent.post(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Patch the index primary key
if index_exists && *override_settings {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Send the index settings
if !index_exists || *override_settings {
let mut settings =
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet;
}
// Retry logic for sending settings
let url = format!("{base_url}/indexes/{uid}/settings");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
request.send_json(settings.clone()).map_err(into_backoff_error)
})?;
}
let filter = filter
.as_ref()
.map(Filter::from_json)
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
.flatten();
let filter_universe = filter
.map(|f| f.evaluate(&index_rtxn, &index))
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let whole_universe = index
.documents_ids(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let filter = filter.as_ref().map(Filter::from_json).transpose().map_err(err)?.flatten();
let filter_universe =
filter.map(|f| f.evaluate(&index_rtxn, &index)).transpose().map_err(err)?;
let whole_universe =
index.documents_ids(&index_rtxn).map_err(milli::Error::from).map_err(err)?;
let universe = filter_universe.unwrap_or(whole_universe);
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// We don't need to keep this one alive as we will
// spawn many threads to process the documents
drop(index_rtxn);
let total_documents = universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
let target = TargetInstance { remote_name: None, base_url, api_key };
let ctx = ExportContext {
index: &index,
index_rtxn: &index_rtxn,
universe: &universe,
progress: &progress,
agent: &agent,
must_stop_processing: &must_stop_processing,
};
let options = ExportOptions {
index_uid: uid,
payload_size,
override_settings: *override_settings,
export_mode: ExportMode::ExportRoute,
};
let total_documents = self.export_one_index(target, options, ctx)?;
output.insert(
IndexUidPattern::new_unchecked(uid.clone()),
DetailsExportIndexSettings {
settings: (*export_settings).clone(),
matched_documents: Some(total_documents as u64),
matched_documents: Some(total_documents),
},
);
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
let results = request_threads()
.broadcast(|ctx| {
let index_rtxn = index
.read_txn()
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new();
for (i, docid) in universe.iter().enumerate() {
if i % ctx.num_threads() != ctx.index() {
continue;
}
let document = index
.document(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// TODO definitely factorize this code
'inject_vectors: {
let embeddings = index
.embeddings(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(Error::from_milli(
milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(
&index_rtxn,
std::iter::once(docid),
)
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={docid}")
}
},
value: vectors.clone(),
},
),
Some(uid.to_string()),
));
};
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
}
}
tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
// We compress the documents before sending them
let mut encoder =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder
.write_all(&buffer)
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
encoder
.finish()
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
buffer.clear();
compressed_buffer.clear();
}
buffer.extend_from_slice(&tmp_buffer);
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
Ok(())
})
.map_err(|e| {
Error::from_milli(
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
Some(uid.to_string()),
)
})?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
}
Ok(output)
}
pub(super) fn export_one_index(
&self,
target: TargetInstance<'_>,
options: ExportOptions<'_>,
ctx: ExportContext<'_>,
) -> Result<u64, Error> {
let err = |err| Error::from_milli(err, Some(options.index_uid.to_string()));
let total_index_documents = ctx.universe.len();
let task_network = options.task_network(total_index_documents);
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
let url = format!(
"{base_url}/indexes/{index_uid}",
base_url = target.base_url,
index_uid = options.index_uid
);
let response = retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.get(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
});
let index_exists = match response {
Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. })
if code == Code::IndexNotFound.name() =>
{
false
}
Err(e) => return Err(e),
};
let primary_key =
ctx.index.primary_key(ctx.index_rtxn).map_err(milli::Error::from).map_err(err)?;
if !index_exists {
let url = format!("{base_url}/indexes", base_url = target.base_url);
let _ = handle_response(
target.remote_name,
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.post(&url);
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
let index_param =
json!({ "uid": options.index_uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
}),
)?;
}
if index_exists && options.override_settings {
let _ = handle_response(
target.remote_name,
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
}),
)?;
}
if !index_exists || options.override_settings {
let mut settings =
settings::settings(ctx.index, ctx.index_rtxn, SecretPolicy::RevealSecrets)
.map_err(err)?;
// Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet;
}
// Retry logic for sending settings
let url = format!(
"{base_url}/indexes/{index_uid}/settings",
base_url = target.base_url,
index_uid = options.index_uid
);
let _ = handle_response(
target.remote_name,
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
request.send_json(settings.clone()).map_err(into_backoff_error)
}),
)?;
}
let fields_ids_map = ctx.index.fields_ids_map(ctx.index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let total_documents = ctx.universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
ctx.progress.update_progress(progress_step);
let limit = options
.payload_size
.map(|ps| ps.as_u64() as usize)
.unwrap_or(self.export_default_payload_size_bytes.as_u64() as usize);
let documents_url = format!(
"{base_url}/indexes/{index_uid}/documents",
base_url = target.base_url,
index_uid = options.index_uid
);
// no document to send, but we must still send a task when performing network balancing
if ctx.universe.is_empty() {
if let Some((import_data, network_change_origin, metadata)) = task_network {
let mut compressed_buffer = Vec::new();
// ignore control flow, we're returning anyway
let _ = send_buffer(
b" ", // needs something otherwise meili complains about missing payload
&mut compressed_buffer,
ctx.must_stop_processing,
ctx.agent,
&documents_url,
target.remote_name,
bearer.as_deref(),
Some(&(import_data, network_change_origin.clone(), metadata)),
&err,
)?;
}
return Ok(0);
}
let results = request_threads()
.broadcast(|broadcast| {
let mut task_network = options.task_network(total_index_documents);
let index_rtxn = ctx.index.read_txn().map_err(milli::Error::from).map_err(err)?;
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new();
for (i, docid) in ctx.universe.iter().enumerate() {
if i % broadcast.num_threads() != broadcast.index() {
continue;
}
if let Some((import_data, _, metadata)) = &mut task_network {
import_data.document_count += 1;
metadata.task_key = Some(docid);
}
let document = ctx.index.document(&index_rtxn, docid).map_err(err)?;
let mut document =
obkv_to_json(&all_fields, &fields_ids_map, document).map_err(err)?;
// TODO definitely factorize this code
'inject_vectors: {
let embeddings = ctx.index.embeddings(&index_rtxn, docid).map_err(err)?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = ctx
.index
.external_id_of(&index_rtxn, std::iter::once(docid))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={docid}")
}
},
value: vectors.clone(),
},
)));
};
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
}
tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from)
.map_err(milli::Error::from)
.map_err(err)?;
// Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
let control_flow = send_buffer(
&buffer,
&mut compressed_buffer,
ctx.must_stop_processing,
ctx.agent,
&documents_url,
target.remote_name,
bearer.as_deref(),
task_network.as_ref(),
&err,
)?;
buffer.clear();
compressed_buffer.clear();
if let Some((import_data, _, metadata)) = &mut task_network {
import_data.document_count = 0;
metadata.task_key = None;
}
if control_flow.is_break() {
return Ok(());
}
}
buffer.extend_from_slice(&tmp_buffer);
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
// send the last buffered documents if any
if !buffer.is_empty() {
// ignore control flow here
let _ = send_buffer(
&buffer,
&mut compressed_buffer,
ctx.must_stop_processing,
ctx.agent,
&documents_url,
target.remote_name,
bearer.as_deref(),
task_network.as_ref(),
&err,
)?;
}
Ok(())
})
.map_err(|e| err(milli::Error::InternalError(InternalError::PanicInThreadPool(e))))?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
Ok(total_documents as u64)
}
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
pub(super) fn export_no_index(
&self,
target: TargetInstance<'_>,
export_old_remote_name: &str,
network_change_origin: &Origin,
agent: &ureq::Agent,
must_stop_processing: &MustStopProcessing,
) -> Result<(), Error> {
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
let url = format!("{base_url}/network", base_url = target.base_url,);
{
let _ = handle_response(
target.remote_name,
retry(must_stop_processing, || {
let request = agent.patch(&url);
let mut request = set_network_ureq_headers(
request,
&ImportData {
remote_name: export_old_remote_name.to_string(),
index_name: None,
document_count: 0,
},
network_change_origin,
&ImportMetadata {
index_count: 0,
task_key: None,
total_index_documents: 0,
},
);
request = request.set("Content-Type", "application/json");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request
.send_json(
// empty payload that will be disregarded
serde_json::Value::Object(Default::default()),
)
.map_err(into_backoff_error)
}),
)?;
}
Ok(())
}
}
fn set_network_ureq_headers(
request: ureq::Request,
import_data: &ImportData,
origin: &Origin,
metadata: &ImportMetadata,
) -> ureq::Request {
let request = RequestWrapper(request);
let ImportMetadata { index_count, task_key, total_index_documents } = metadata;
let Origin { remote_name: origin_remote, task_uid, network_version } = origin;
let ImportData { remote_name: import_remote, index_name, document_count } = import_data;
let request = request
.set_origin_remote(origin_remote)
.set_origin_task_uid(*task_uid)
.set_origin_network_version(*network_version)
.set_import_remote(import_remote)
.set_import_docs(*document_count)
.set_import_index_count(*index_count)
.set_import_index_docs(*total_index_documents);
let request = if let Some(index_name) = index_name.as_deref() {
request.set_import_index(index_name)
} else {
request
};
let RequestWrapper(request) = if let Some(task_key) = task_key {
request.set_import_task_key(*task_key)
} else {
request
};
request
}
struct RequestWrapper(ureq::Request);
impl headers::SetHeader for RequestWrapper {
fn set_header(self, name: &str, value: &str) -> Self {
Self(self.0.set(name, value))
}
}
#[allow(clippy::too_many_arguments)]
fn send_buffer<'a>(
buffer: &'a [u8],
mut compressed_buffer: &'a mut Vec<u8>,
must_stop_processing: &MustStopProcessing,
agent: &ureq::Agent,
documents_url: &'a str,
remote_name: Option<&str>,
bearer: Option<&'a str>,
task_network: Option<&(ImportData, Origin, ImportMetadata)>,
err: &'a impl Fn(milli::Error) -> crate::Error,
) -> Result<ControlFlow<(), ()>> {
// We compress the documents before sending them
let mut encoder: GzEncoder<&mut &mut Vec<u8>> =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder.write_all(buffer).map_err(milli::Error::from).map_err(err)?;
encoder.finish().map_err(milli::Error::from).map_err(err)?;
let res = retry(must_stop_processing, || {
let mut request = agent.post(documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = bearer {
request = request.set("Authorization", bearer);
}
if let Some((import_data, origin, metadata)) = task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
request.send_bytes(compressed_buffer).map_err(into_backoff_error)
});
handle_response(remote_name, res)
}
fn handle_response(remote_name: Option<&str>, res: Result<Response>) -> Result<ControlFlow<()>> {
let remote_name = remote_name.unwrap_or("unnamed");
match res {
Ok(_response) => Ok(ControlFlow::Continue(())),
Err(Error::FromRemoteWhenExporting { code, .. })
if code == Code::ImportTaskAlreadyReceived.name() =>
{
Ok(ControlFlow::Continue(()))
}
Err(Error::FromRemoteWhenExporting { code, message, .. })
if code == Code::ImportTaskUnknownRemote.name() =>
{
tracing::warn!("remote `{remote_name}` answered with: {message}");
Ok(ControlFlow::Break(()))
}
// note: there has already been many attempts to get this due to exponential backoff
Err(Error::FromRemoteWhenExporting { code, message, .. })
if code == Code::ImportTaskWithoutNetworkTask.name() =>
{
tracing::warn!("remote `{remote_name}` answered with: {message}");
Ok(ControlFlow::Break(()))
}
Err(e) => {
tracing::warn!("error while exporting: {e}");
Err(e)
}
}
}
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
@@ -374,4 +593,65 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
}
}
// export_one_index arguments
pub(super) struct TargetInstance<'a> {
pub(super) remote_name: Option<&'a str>,
pub(super) base_url: &'a str,
pub(super) api_key: Option<&'a str>,
}
pub(super) struct ExportOptions<'a> {
pub(super) index_uid: &'a str,
pub(super) payload_size: Option<&'a Byte>,
pub(super) override_settings: bool,
pub(super) export_mode: ExportMode<'a>,
}
impl ExportOptions<'_> {
fn task_network(
&self,
total_index_documents: u64,
) -> Option<(ImportData, Origin, ImportMetadata)> {
if let ExportMode::NetworkBalancing {
index_count,
export_old_remote_name,
network_change_origin,
} = self.export_mode
{
Some((
ImportData {
remote_name: export_old_remote_name.to_string(),
index_name: Some(self.index_uid.to_string()),
document_count: 0,
},
network_change_origin.clone(),
ImportMetadata { index_count, task_key: None, total_index_documents },
))
} else {
None
}
}
}
pub(super) struct ExportContext<'a> {
pub(super) index: &'a meilisearch_types::milli::Index,
pub(super) index_rtxn: &'a milli::heed::RoTxn<'a>,
pub(super) universe: &'a RoaringBitmap,
pub(super) progress: &'a Progress,
pub(super) agent: &'a ureq::Agent,
pub(super) must_stop_processing: &'a MustStopProcessing,
}
pub(super) enum ExportMode<'a> {
ExportRoute,
#[cfg_attr(not(feature = "enterprise"), allow(dead_code))]
NetworkBalancing {
index_count: u64,
export_old_remote_name: &'a str,
network_change_origin: &'a Origin,
},
}
// progress related
enum ExportIndex {}

View File

@@ -8,6 +8,7 @@ use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::DocumentAdditionResult;
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
use meilisearch_types::network::Network;
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::Index;
@@ -36,6 +37,7 @@ impl IndexScheduler {
operation: IndexOperation,
progress: &Progress,
embedder_stats: Arc<EmbedderStats>,
network: &Network,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now();
@@ -67,8 +69,6 @@ impl IndexScheduler {
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
let network = self.network();
let shards = network.shards();
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
@@ -504,6 +504,7 @@ impl IndexScheduler {
},
progress,
embedder_stats.clone(),
network,
)?;
let (settings_tasks, _congestion) = self.apply_index_operation(
@@ -512,6 +513,7 @@ impl IndexScheduler {
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
progress,
embedder_stats,
network,
)?;
let mut tasks = settings_tasks;

View File

@@ -438,12 +438,15 @@ async fn multipart_stream_to_s3(
db_name: String,
reader: std::io::PipeReader,
) -> Result<(), Error> {
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
use std::collections::VecDeque;
use std::io;
use std::os::fd::OwnedFd;
use std::path::PathBuf;
use bytes::{Bytes, BytesMut};
use reqwest::{Client, Response};
use rusty_s3::S3Action as _;
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
use rusty_s3::actions::CreateMultipartUpload;
use rusty_s3::{Bucket, BucketError, Credentials, S3Action as _, UrlStyle};
use tokio::task::JoinHandle;
let reader = OwnedFd::from(reader);
@@ -517,7 +520,6 @@ async fn multipart_stream_to_s3(
while buffer.len() < (s3_multipart_part_size as usize / 2) {
// Wait for the pipe to be readable
use std::io;
reader.readable().await?;
match reader.try_read_buf(&mut buffer) {
@@ -581,15 +583,17 @@ async fn multipart_stream_to_s3(
async move {
match client.post(url).body(body).send().await {
Ok(resp) if resp.status().is_client_error() => {
resp.error_for_status().map_err(backoff::Error::Permanent)
Err(backoff::Error::Permanent(Error::S3Error {
status: resp.status(),
body: resp.text().await.unwrap_or_default(),
}))
}
Ok(resp) => Ok(resp),
Err(e) => Err(backoff::Error::transient(e)),
Err(e) => Err(backoff::Error::transient(Error::S3HttpError(e))),
}
}
})
.await
.map_err(Error::S3HttpError)?;
.await?;
let status = resp.status();
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -747,6 +747,7 @@ fn basic_get_stats() {
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"networkTopologyChange": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
@@ -782,6 +783,7 @@ fn basic_get_stats() {
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"networkTopologyChange": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
@@ -824,6 +826,7 @@ fn basic_get_stats() {
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"networkTopologyChange": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
@@ -867,6 +870,7 @@ fn basic_get_stats() {
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"networkTopologyChange": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,

View File

@@ -112,6 +112,7 @@ impl IndexScheduler {
max_number_of_batched_tasks: usize::MAX,
batched_tasks_size_limit: u64::MAX,
instance_features: Default::default(),
export_default_payload_size_bytes: byte_unit::Byte::parse_str("20MiB", false).unwrap(),
auto_upgrade: true, // Don't cost much and will ensure the happy path works
embedding_cache_cap: 10,
experimental_no_snapshot_compaction: false,

View File

@@ -1,89 +1,93 @@
use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use meilisearch_types::versioning;
use time::OffsetDateTime;
use tracing::info;
use crate::queue::TaskQueue;
use crate::versioning::Versioning;
mod v1_29;
mod v1_30;
trait UpgradeIndexScheduler {
fn upgrade(
&self,
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32);
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()>;
/// Whether the migration should be applied, depending on the initial version of the index scheduler before
/// any migration was applied
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
/// A progress-centric description of the migration
fn description(&self) -> &'static str;
}
/// Upgrade the index scheduler to the binary version.
///
/// # Warning
///
/// The current implementation uses a single wtxn to the index scheduler for the whole duration of the upgrade.
/// If migrations start taking take a long time, it might prevent tasks from being registered.
/// If this issue manifests, then it can be mitigated by adding a `fn target_version` to `UpgradeIndexScheduler`,
/// to be able to write intermediate versions and drop the wtxn between applying migrations.
pub fn upgrade_index_scheduler(
env: &Env<WithoutTls>,
versioning: &Versioning,
from: (u32, u32, u32),
to: (u32, u32, u32),
initial_version: (u32, u32, u32),
) -> anyhow::Result<()> {
let current_major = to.0;
let current_minor = to.1;
let current_patch = to.2;
let target_major: u32 = versioning::VERSION_MAJOR;
let target_minor: u32 = versioning::VERSION_MINOR;
let target_patch: u32 = versioning::VERSION_PATCH;
let target_version = (target_major, target_minor, target_patch);
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
];
let start = match from {
(1, 12, _) => 0,
(1, 13, _) => 0,
(1, 14, _) => 0,
(1, 15, _) => 0,
(1, 16, _) => 0,
(1, 17, _) => 0,
(1, 18, _) => 0,
(1, 19, _) => 0,
(1, 20, _) => 0,
(1, 21, _) => 0,
(1, 22, _) => 0,
(1, 23, _) => 0,
(1, 24, _) => 0,
(1, 25, _) => 0,
(1, 26, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)
|| (major == current_major && minor == current_minor && patch > current_patch)
{
bail!(
"Database version {major}.{minor}.{patch} is higher than the Meilisearch version {current_major}.{current_minor}.{current_patch}. Downgrade is not supported",
);
} else if major < 1 || (major == current_major && minor < 12) {
bail!(
"Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{current_major}.{current_minor}.{current_patch}",
);
} else {
bail!("Unknown database version: v{major}.{minor}.{patch}");
}
}
};
info!("Upgrading the task queue");
let mut local_from = from;
for upgrade in upgrade_functions[start..].iter() {
let target = upgrade.target_version();
info!(
"Upgrading from v{}.{}.{} to v{}.{}.{}",
local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
);
let mut wtxn = env.write_txn()?;
upgrade.upgrade(env, &mut wtxn, local_from)?;
versioning.set_version(&mut wtxn, target)?;
wtxn.commit()?;
local_from = target;
if initial_version == target_version {
return Ok(());
}
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
// List all upgrade functions to apply in order here.
&v1_30::MigrateNetwork,
];
let (initial_major, initial_minor, initial_patch) = initial_version;
if initial_version > target_version {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is higher than the Meilisearch version {target_major}.{target_minor}.{target_patch}. Downgrade is not supported",
);
}
if initial_version < (1, 12, 0) {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{initial_major}.{initial_minor}.{initial_patch} and import it in the v{target_major}.{target_minor}.{target_patch}",
);
}
info!("Upgrading the task queue");
let mut wtxn = env.write_txn()?;
let migration_count = upgrade_functions.len();
for (migration_index, upgrade) in upgrade_functions.iter().enumerate() {
if upgrade.must_upgrade(initial_version) {
info!(
"[{migration_index}/{migration_count}]Applying migration: {}",
upgrade.description()
);
upgrade.upgrade(env, &mut wtxn)?;
info!(
"[{}/{migration_count}]Migration applied: {}",
migration_index + 1,
upgrade.description()
)
} else {
info!(
"[{migration_index}/{migration_count}]Skipping unnecessary migration: {}",
upgrade.description()
)
}
}
versioning.set_version(&mut wtxn, target_version)?;
info!("Task queue upgraded, spawning the upgrade database task");
let queue = TaskQueue::new(env, &mut wtxn)?;
let uid = queue.next_task_id(&wtxn)?;
queue.register(
@@ -96,9 +100,9 @@ pub fn upgrade_index_scheduler(
finished_at: None,
error: None,
canceled_by: None,
details: Some(Details::UpgradeDatabase { from, to }),
details: Some(Details::UpgradeDatabase { from: initial_version, to: target_version }),
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from },
kind: KindWithContent::UpgradeDatabase { from: initial_version },
network: None,
custom_metadata: None,
},
@@ -107,21 +111,3 @@ pub fn upgrade_index_scheduler(
Ok(())
}
#[allow(non_camel_case_types)]
struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade(
&self,
_env: &Env<WithoutTls>,
_wtxn: &mut RwTxn,
_original: (u32, u32, u32),
) -> anyhow::Result<()> {
Ok(())
}
fn target_version(&self) -> (u32, u32, u32) {
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
}

View File

@@ -0,0 +1,47 @@
use std::collections::BTreeMap;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Env, RoTxn, WithoutTls};
use serde::{Deserialize, Serialize};
use crate::Result;
/// Database const names for the `FeatureData`.
mod db_name {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
}
mod db_keys {
pub const NETWORK: &str = "network";
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
#[serde(default)]
pub sharding: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
#[serde(default)]
pub write_api_key: Option<String>,
}
pub fn get_network(env: &Env<WithoutTls>, rtxn: &RoTxn) -> Result<Option<Network>> {
let Some(network_db) =
env.open_database::<Str, SerdeJson<Network>>(rtxn, Some(db_name::EXPERIMENTAL_FEATURES))?
else {
return Ok(None);
};
Ok(network_db.get(rtxn, db_keys::NETWORK)?)
}

View File

@@ -0,0 +1,82 @@
use std::collections::BTreeMap;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
#[serde(default)]
pub leader: Option<String>,
#[serde(default)]
pub version: Uuid,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
#[serde(default)]
pub write_api_key: Option<String>,
}
use super::v1_29;
use crate::Result;
/// Database const names for the `FeatureData`.
mod db_name {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
}
mod db_keys {
pub const NETWORK: &str = "network";
}
pub struct MigrateNetwork;
impl super::UpgradeIndexScheduler for MigrateNetwork {
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()> {
let Some(v1_29::Network { local, remotes, sharding }) = v1_29::get_network(env, wtxn)?
else {
return Ok(());
};
let leader = if sharding { remotes.keys().next().cloned() } else { None };
let remotes = remotes
.into_iter()
.map(|(name, v1_29::Remote { url, search_api_key, write_api_key })| {
(name, Remote { url, search_api_key, write_api_key })
})
.collect();
let network = Network { local, remotes, leader, version: Uuid::nil() };
set_network(env, wtxn, &network)?;
Ok(())
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 30, 0)
}
fn description(&self) -> &'static str {
"updating the network struct"
}
}
fn set_network(env: &Env<WithoutTls>, wtxn: &mut RwTxn<'_>, network: &Network) -> Result<()> {
let network_db =
env.create_database::<Str, SerdeJson<Network>>(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
network_db.put(wtxn, db_keys::NETWORK, network)?;
Ok(())
}

View File

@@ -4,9 +4,11 @@ use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{CboRoaringBitmapCodec, ChannelCongestion};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{
BatchStopReason, Details, IndexSwap, Kind, KindWithContent, Status,
@@ -119,17 +121,8 @@ impl ProcessingBatch {
self.stats.total_nb_tasks = 0;
}
/// Update the timestamp of the tasks and the inner structure of this structure.
pub fn update(&mut self, task: &mut Task) {
// We must re-set this value in case we're dealing with a task that has been added between
// the `processing` and `finished` state
// We must re-set this value in case we're dealing with a task that has been added between
// the `processing` and `finished` state or that failed.
task.batch_uid = Some(self.uid);
// Same
task.started_at = Some(self.started_at);
task.finished_at = self.finished_at;
/// Update batch task from a processed task
pub fn update_from_task(&mut self, task: &Task) {
self.statuses.insert(task.status);
// Craft an aggregation of the details of all the tasks encountered in this batch.
@@ -144,6 +137,63 @@ impl ProcessingBatch {
}
}
/// Update the timestamp of the tasks after they're done
pub fn finish_task(&self, task: &mut Task) {
// We must re-set this value in case we're dealing with a task that has been added between
// the `processing` and `finished` state or that failed.
task.batch_uid = Some(self.uid);
// Same
task.started_at = Some(self.started_at);
task.finished_at = self.finished_at;
}
pub fn write_stats(
&mut self,
progress: &Progress,
congestion: Option<ChannelCongestion>,
pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
) {
self.stats.progress_trace =
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
self.stats.write_channel_congestion = congestion.map(|congestion| {
let mut congestion_info = serde_json::Map::new();
congestion_info.insert("attempts".into(), congestion.attempts.into());
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info
});
self.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use std::cmp::Ordering::{Equal, Greater, Less};
use byte_unit::Byte;
use byte_unit::UnitType::Binary;
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
}
pub fn to_batch(&self) -> Batch {
Batch {
uid: self.uid,
@@ -286,6 +336,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
| K::DumpCreation { .. }
| K::Export { .. }
| K::UpgradeDatabase { .. }
| K::NetworkTopologyChange(_)
| K::SnapshotCreation => (),
};
if let Some(Details::IndexSwap { swaps }) = &mut task.details {
@@ -627,6 +678,9 @@ impl crate::IndexScheduler {
} => {
assert_eq!(kind.as_kind(), Kind::IndexCompaction);
}
Details::NetworkTopologyChange { moved_documents: _, message: _ } => {
assert_eq!(kind.as_kind(), Kind::NetworkTopologyChange);
}
}
}

View File

@@ -64,14 +64,7 @@ impl Versioning {
};
wtxn.commit()?;
let bin_major: u32 = versioning::VERSION_MAJOR;
let bin_minor: u32 = versioning::VERSION_MINOR;
let bin_patch: u32 = versioning::VERSION_PATCH;
let to = (bin_major, bin_minor, bin_patch);
if from != to {
upgrade_index_scheduler(env, &this, from, to)?;
}
upgrade_index_scheduler(env, &this, from)?;
// Once we reach this point it means the upgrade process, if there was one is entirely finished
// we can safely say we reached the latest version of the index scheduler

View File

@@ -15,7 +15,7 @@ license.workspace = true
serde_json = "1.0"
[dev-dependencies]
criterion = "0.6.0"
criterion = "0.7.0"
[[bench]]
name = "depth"

View File

@@ -13,7 +13,7 @@ license.workspace = true
[dependencies]
# fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] }
md5 = "0.7.0"
md5 = "0.8.0"
once_cell = "1.21"
regex-lite = "0.1.6"
uuid = { version = "1.17.0", features = ["v4"] }
regex-lite = "0.1.8"
uuid = { version = "1.18.1", features = ["v4"] }

View File

@@ -12,15 +12,15 @@ license.workspace = true
[dependencies]
base64 = "0.22.1"
enum-iterator = "2.1.0"
enum-iterator = "2.3.0"
hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
serde = { version = "1.0.228", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
sha2 = "0.10.9"
thiserror = "2.0.12"
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.17.0", features = ["serde", "v4"] }
thiserror = "2.0.17"
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.18.1", features = ["serde", "v4"] }

View File

@@ -11,38 +11,41 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.11.0", default-features = false }
anyhow = "1.0.98"
bumpalo = "3.18.1"
actix-web = { version = "4.12.0", default-features = false }
anyhow = "1.0.100"
base64 = "0.22.1"
bumpalo = "3.19.0"
bumparaw-collections = "0.1.4"
byte-unit = { version = "5.1.6", features = ["serde"] }
convert_case = "0.8.0"
csv = "1.3.1"
deserr = { version = "0.6.3", features = ["actix-web"] }
convert_case = "0.9.0"
csv = "1.4.0"
deserr = { version = "0.6.4", features = ["actix-web"] }
either = { version = "1.15.0", features = ["serde"] }
enum-iterator = "2.1.0"
enum-iterator = "2.3.0"
file-store = { path = "../file-store" }
flate2 = "1.1.2"
flate2 = "1.1.5"
fst = "0.4.7"
memmap2 = "0.9.7"
itertools = "0.14.0"
memmap2 = "0.9.9"
milli = { path = "../milli" }
roaring = { version = "0.10.12", features = ["serde"] }
rustc-hash = "2.1.1"
serde = { version = "1.0.219", features = ["derive"] }
serde = { version = "1.0.228", features = ["derive"] }
serde-cs = "0.2.4"
serde_json = { version = "1.0.140", features = ["preserve_order"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
tar = "0.4.44"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = [
tempfile = "3.23.0"
thiserror = "2.0.17"
time = { version = "0.3.44", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = "1.45"
tokio = "1.48"
urlencoding = "2.1.3"
utoipa = { version = "5.4.0", features = ["macros"] }
uuid = { version = "1.17.0", features = ["serde", "v4"] }
uuid = { version = "1.18.1", features = ["serde", "v4"] }
[dev-dependencies]
# fixed version due to format breakages in v1.40
@@ -56,6 +59,9 @@ all-tokenizations = ["milli/all-tokenizations"]
# chinese specialized tokenization
chinese = ["milli/chinese"]
chinese-pinyin = ["milli/chinese-pinyin"]
enterprise = ["milli/enterprise"]
# hebrew specialized tokenization
hebrew = ["milli/hebrew"]
# japanese specialized tokenization

View File

@@ -0,0 +1,16 @@
pub mod network {
use milli::update::new::indexer::current_edition::sharding::Shards;
use crate::network::Network;
impl Network {
pub fn shards(&self) -> Option<Shards> {
None
}
pub fn sharding(&self) -> bool {
// always false in CE
false
}
}
}

View File

@@ -3,45 +3,23 @@
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use milli::update::new::indexer::enterprise_edition::sharding::Shards;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
#[serde(default)]
pub sharding: bool,
}
use crate::network::Network;
impl Network {
pub fn shards(&self) -> Option<Shards> {
if self.sharding {
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
let others = self
.remotes
.keys()
.filter(|name| name.as_str() != this)
.map(|name| name.to_owned())
.collect();
Some(Shards { own: vec![this.to_owned()], others })
if self.sharding() {
Some(Shards::from_remotes_local(
self.remotes.keys().map(String::as_str),
self.local.as_deref(),
))
} else {
None
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
#[serde(default)]
pub write_api_key: Option<String>,
pub fn sharding(&self) -> bool {
self.leader.is_some()
}
}

View File

@@ -156,7 +156,7 @@ macro_rules! make_error_codes {
}
/// return error name, used as error code
fn name(&self) -> String {
pub fn name(&self) -> String {
match self {
$(
Code::$code_ident => stringify!($code_ident).to_case(convert_case::Case::Snake)
@@ -214,6 +214,9 @@ ImmutableApiKeyUid , InvalidRequest , BAD_REQU
ImmutableApiKeyUpdatedAt , InvalidRequest , BAD_REQUEST;
ImmutableIndexCreatedAt , InvalidRequest , BAD_REQUEST;
ImmutableIndexUpdatedAt , InvalidRequest , BAD_REQUEST;
ImportTaskAlreadyReceived , InvalidRequest , PRECONDITION_FAILED;
ImportTaskUnknownRemote , InvalidRequest , PRECONDITION_FAILED;
ImportTaskWithoutNetworkTask , InvalidRequest , SERVICE_UNAVAILABLE;
IndexAlreadyExists , InvalidRequest , CONFLICT ;
IndexCreationFailed , Internal , INTERNAL_SERVER_ERROR;
IndexNotFound , InvalidRequest , NOT_FOUND;
@@ -270,9 +273,9 @@ InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQU
InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
InvalidNetworkLeader , InvalidRequest , BAD_REQUEST ;
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
@@ -377,7 +380,9 @@ MissingPayload , InvalidRequest , BAD_REQU
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NetworkVersionMismatch , InvalidRequest , PRECONDITION_FAILED ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
NotLeader , InvalidRequest , BAD_REQUEST ;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
RemoteBadResponse , System , BAD_GATEWAY ;
RemoteBadRequest , InvalidRequest , BAD_REQUEST ;
@@ -391,6 +396,9 @@ TaskFileNotFound , InvalidRequest , NOT_FOUN
BatchNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnexpectedNetworkPreviousRemotes , InvalidRequest , BAD_REQUEST ;
NetworkVersionTooOld , InvalidRequest , BAD_REQUEST ;
UnprocessedNetworkTask , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
@@ -433,6 +441,7 @@ InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQU
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST ;
RequiresEnterpriseEdition , InvalidRequest , UNAVAILABLE_FOR_LEGAL_REASONS ;
// Webhooks
InvalidWebhooks , InvalidRequest , BAD_REQUEST ;
InvalidWebhookUrl , InvalidRequest , BAD_REQUEST ;

View File

@@ -2,10 +2,17 @@
pub mod batch_view;
pub mod batches;
#[cfg(not(feature = "enterprise"))]
pub mod community_edition;
pub mod compression;
pub mod deserr;
pub mod document_formats;
#[cfg(feature = "enterprise")]
pub mod enterprise_edition;
#[cfg(not(feature = "enterprise"))]
pub use community_edition as current_edition;
#[cfg(feature = "enterprise")]
pub use enterprise_edition as current_edition;
pub mod error;
pub mod facet_values_sort;
pub mod features;
@@ -13,6 +20,7 @@ pub mod index_uid;
pub mod index_uid_pattern;
pub mod keys;
pub mod locales;
pub mod network;
pub mod settings;
pub mod star_or;
pub mod task_view;

View File

@@ -0,0 +1,27 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
#[serde(default)]
pub leader: Option<String>,
#[serde(default)]
pub version: Uuid,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
#[serde(default)]
pub write_api_key: Option<String>,
}

View File

@@ -9,12 +9,12 @@ use utoipa::ToSchema;
use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::network::DbTaskNetwork;
use crate::tasks::{
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
TaskNetwork,
};
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct TaskView {
@@ -54,7 +54,7 @@ pub struct TaskView {
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
pub network: Option<DbTaskNetwork>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom_metadata: Option<String>,
@@ -151,6 +151,11 @@ pub struct DetailsView {
pub pre_compaction_size: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub post_compaction_size: Option<String>,
// network topology change
#[serde(skip_serializing_if = "Option::is_none")]
pub moved_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
}
impl DetailsView {
@@ -161,6 +166,17 @@ impl DetailsView {
(None, Some(doc)) | (Some(doc), None) => Some(doc),
(Some(left), Some(right)) => Some(left + right),
},
moved_documents: match (self.moved_documents, other.moved_documents) {
(None, None) => None,
(None, Some(doc)) | (Some(doc), None) => Some(doc),
(Some(left), Some(right)) => Some(left + right),
},
message: match (&mut self.message, &other.message) {
(None, None) => None,
(None, Some(message)) => Some(message.clone()),
(Some(message), None) => Some(std::mem::take(message)),
(Some(message), Some(_)) => Some(std::mem::take(message)),
},
indexed_documents: match (self.indexed_documents, other.indexed_documents) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
@@ -451,6 +467,11 @@ impl From<Details> for DetailsView {
..Default::default()
}
}
Details::NetworkTopologyChange { moved_documents, message } => DetailsView {
moved_documents: Some(moved_documents),
message: Some(message),
..Default::default()
},
}
}
}

View File

@@ -23,6 +23,8 @@ use crate::{versioning, InstanceUid};
pub type TaskId = u32;
pub mod network;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Task {
@@ -44,7 +46,7 @@ pub struct Task {
pub kind: KindWithContent,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
pub network: Option<network::DbTaskNetwork>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom_metadata: Option<String>,
@@ -61,6 +63,7 @@ impl Task {
| TaskDeletion { .. }
| Export { .. }
| UpgradeDatabase { .. }
| NetworkTopologyChange { .. }
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@@ -99,6 +102,7 @@ impl Task {
| KindWithContent::SnapshotCreation
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::NetworkTopologyChange { .. }
| KindWithContent::IndexCompaction { .. } => None,
}
}
@@ -178,6 +182,7 @@ pub enum KindWithContent {
IndexCompaction {
index_uid: String,
},
NetworkTopologyChange(network::NetworkTopologyChange),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
@@ -215,6 +220,7 @@ impl KindWithContent {
KindWithContent::Export { .. } => Kind::Export,
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction,
KindWithContent::NetworkTopologyChange { .. } => Kind::NetworkTopologyChange,
}
}
@@ -227,6 +233,7 @@ impl KindWithContent {
| TaskCancelation { .. }
| TaskDeletion { .. }
| Export { .. }
| NetworkTopologyChange { .. }
| UpgradeDatabase { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@@ -340,6 +347,10 @@ impl KindWithContent {
pre_compaction_size: None,
post_compaction_size: None,
}),
KindWithContent::NetworkTopologyChange { .. } => Some(Details::NetworkTopologyChange {
moved_documents: 0,
message: "processing tasks for previous network versions".into(),
}),
}
}
@@ -392,7 +403,7 @@ impl KindWithContent {
})
}
KindWithContent::IndexSwap { .. } => {
todo!()
unimplemented!("do not call `default_finished_details` for `IndexSwap` tasks")
}
KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation {
matched_tasks: tasks.len(),
@@ -427,6 +438,9 @@ impl KindWithContent {
pre_compaction_size: None,
post_compaction_size: None,
}),
KindWithContent::NetworkTopologyChange(network_topology_change) => {
Some(network_topology_change.to_details())
}
}
}
}
@@ -494,6 +508,9 @@ impl From<&KindWithContent> for Option<Details> {
pre_compaction_size: None,
post_compaction_size: None,
}),
KindWithContent::NetworkTopologyChange(network_topology_change) => {
Some(network_topology_change.to_details())
}
}
}
}
@@ -605,6 +622,7 @@ pub enum Kind {
Export,
UpgradeDatabase,
IndexCompaction,
NetworkTopologyChange,
}
impl Kind {
@@ -624,6 +642,7 @@ impl Kind {
| Kind::DumpCreation
| Kind::Export
| Kind::UpgradeDatabase
| Kind::NetworkTopologyChange
| Kind::SnapshotCreation => false,
}
}
@@ -646,6 +665,7 @@ impl Display for Kind {
Kind::Export => write!(f, "export"),
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
Kind::IndexCompaction => write!(f, "indexCompaction"),
Kind::NetworkTopologyChange => write!(f, "networkTopologyChange"),
}
}
}
@@ -683,6 +703,8 @@ impl FromStr for Kind {
Ok(Kind::UpgradeDatabase)
} else if kind.eq_ignore_ascii_case("indexCompaction") {
Ok(Kind::IndexCompaction)
} else if kind.eq_ignore_ascii_case("networkTopologyChange") {
Ok(Kind::NetworkTopologyChange)
} else {
Err(ParseTaskKindError(kind.to_owned()))
}
@@ -773,36 +795,10 @@ pub enum Details {
pre_compaction_size: Option<Byte>,
post_compaction_size: Option<Byte>,
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(untagged, rename_all = "camelCase")]
pub enum TaskNetwork {
Origin { origin: Origin },
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct Origin {
pub remote_name: String,
pub task_uid: usize,
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct RemoteTask {
#[serde(skip_serializing_if = "Option::is_none")]
task_uid: Option<TaskId>,
error: Option<ResponseError>,
}
impl From<Result<TaskId, ResponseError>> for RemoteTask {
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
match res {
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
}
}
NetworkTopologyChange {
moved_documents: u64,
message: String,
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
@@ -845,6 +841,9 @@ impl Details {
| Self::Export { .. }
| Self::UpgradeDatabase { .. }
| Self::IndexSwap { .. } => (),
Self::NetworkTopologyChange { moved_documents: _, message } => {
*message = format!("Failed. Previous status: {}", message);
}
}
details
@@ -900,6 +899,17 @@ pub enum BatchStopReason {
SettingsWithDocumentOperation {
id: TaskId,
},
NetworkTask {
id: TaskId,
},
NetworkTaskOlderTasks {
id: TaskId,
inner_reason: String,
},
NetworkTaskImportTasks {
id: TaskId,
inner_reason: String,
},
}
impl BatchStopReason {
@@ -988,6 +998,24 @@ impl Display for BatchStopReason {
"stopped before task with id {id} because it is a document operation which cannot be batched with settings changes"
)
}
BatchStopReason::NetworkTask { id } => {
write!(
f,
"stopped after task with id {id} because it is a network topology change task"
)
}
BatchStopReason::NetworkTaskOlderTasks { id, inner_reason } => {
write!(
f,
"stopped after batching network task with id {id} and a batch of older tasks: {inner_reason}"
)
}
BatchStopReason::NetworkTaskImportTasks { id, inner_reason } => {
write!(
f,
"stopped after batching network task with id {id} and a batch of import tasks: {inner_reason}"
)
}
}
}
}

View File

@@ -0,0 +1,782 @@
use std::collections::BTreeMap;
use base64::Engine as _;
use itertools::{EitherOrBoth, Itertools as _};
use milli::{CboRoaringBitmapCodec, DocumentId};
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid;
use crate::error::ResponseError;
use crate::network::Network;
use crate::tasks::{Details, TaskId};
#[cfg(not(feature = "enterprise"))]
mod community_edition;
#[cfg(feature = "enterprise")]
mod enterprise_edition;
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(untagged, rename_all = "camelCase")]
// This type is used in the database, care should be taken when modifying it.
pub enum DbTaskNetwork {
/// Tasks that were duplicated from `origin`
Origin { origin: Origin },
/// Tasks that were duplicated as `remote_tasks`
Remotes {
remote_tasks: BTreeMap<String, RemoteTask>,
#[serde(default)]
network_version: Uuid,
},
/// Document import tasks sent in the context of `network_change`
Import { import_from: ImportData, network_change: Origin },
}
impl DbTaskNetwork {
pub fn network_version(&self) -> Uuid {
match self {
DbTaskNetwork::Origin { origin } => origin.network_version,
DbTaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version,
DbTaskNetwork::Import { import_from: _, network_change } => {
network_change.network_version
}
}
}
pub fn import_data(&self) -> Option<&ImportData> {
match self {
DbTaskNetwork::Origin { .. } | DbTaskNetwork::Remotes { .. } => None,
DbTaskNetwork::Import { import_from, .. } => Some(import_from),
}
}
pub fn origin(&self) -> Option<&Origin> {
match self {
DbTaskNetwork::Origin { origin } => Some(origin),
DbTaskNetwork::Remotes { .. } => None,
DbTaskNetwork::Import { network_change, .. } => Some(network_change),
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum TaskNetwork {
/// Tasks that were duplicated from `origin`
Origin { origin: Origin },
/// Tasks that were duplicated as `remote_tasks`
Remotes { remote_tasks: BTreeMap<String, RemoteTask>, network_version: Uuid },
/// Document import tasks sent in the context of `network_change`
Import { import_from: ImportData, network_change: Origin, metadata: ImportMetadata },
}
impl TaskNetwork {
pub fn network_version(&self) -> Uuid {
match self {
TaskNetwork::Origin { origin } => origin.network_version,
TaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version,
TaskNetwork::Import { import_from: _, network_change, metadata: _ } => {
network_change.network_version
}
}
}
}
impl From<TaskNetwork> for DbTaskNetwork {
fn from(value: TaskNetwork) -> Self {
match value {
TaskNetwork::Origin { origin } => DbTaskNetwork::Origin { origin },
TaskNetwork::Remotes { remote_tasks, network_version } => {
DbTaskNetwork::Remotes { remote_tasks, network_version }
}
TaskNetwork::Import { import_from, network_change, metadata: _ } => {
DbTaskNetwork::Import { import_from, network_change }
}
}
}
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct Origin {
pub remote_name: String,
pub task_uid: u32,
#[serde(default)]
pub network_version: Uuid,
}
/// Import data stored in a task
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ImportData {
/// Remote that this task is imported from
pub remote_name: String,
/// Index relevant to this task
pub index_name: Option<String>,
/// Number of documents in this task
pub document_count: u64,
}
/// Import metadata associated with a task but not stored in the task
#[derive(Debug, PartialEq, Clone)]
pub struct ImportMetadata {
/// Total number of indexes to import from this host
pub index_count: u64,
/// Key unique to this (network_change, index, host, key).
///
/// In practice, an internal document id of one of the documents to import.
pub task_key: Option<DocumentId>,
/// Total number of documents to import for this index from this host.
pub total_index_documents: u64,
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct RemoteTask {
#[serde(skip_serializing_if = "Option::is_none")]
task_uid: Option<TaskId>,
error: Option<ResponseError>,
}
impl From<Result<TaskId, ResponseError>> for RemoteTask {
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
match res {
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
}
}
}
/// Contains the full state of a network topology change.
///
/// A network topology change task is unique in that it can be processed in multiple different batches, as its resolution
/// depends on various document additions tasks being processed.
///
/// A network topology task has 4 states:
///
/// 1. Processing any task that was meant for an earlier version of the network. This is necessary to know that we have the right version of
/// documents.
/// 2. Sending all documents that must be moved to other remotes.
/// 3. Processing any task coming from the remotes.
/// 4. Finished.
///
/// Furthermore, it maintains some stats
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NetworkTopologyChange {
state: NetworkTopologyState,
in_remotes: BTreeMap<String, InRemote>,
old_network: Network,
new_network: Network,
stats: NetworkTopologyStats,
}
impl NetworkTopologyChange {
pub fn new(old_network: Network, new_network: Network) -> Self {
let in_name = new_network.local.as_deref();
let out_name = old_network.local.as_deref().or(in_name);
let in_remotes = if in_name.is_some() {
old_network
.remotes
.keys()
.chain(new_network.remotes.keys())
// don't await imports from ourselves
.filter(|name| Some(name.as_str()) != out_name)
.cloned()
.map(|name| (name, InRemote::new()))
.collect()
} else {
Default::default()
};
Self {
state: NetworkTopologyState::WaitingForOlderTasks,
in_remotes,
stats: NetworkTopologyStats { moved_documents: 0 },
new_network,
old_network,
}
}
pub fn in_name(&self) -> Option<&str> {
self.new_network.local.as_deref()
}
pub fn out_name(&self) -> Option<&str> {
self.old_network.local.as_deref().or_else(|| self.in_name())
}
pub fn state(&self) -> NetworkTopologyState {
self.state
}
pub fn to_details(&self) -> Details {
let message = match self.state {
NetworkTopologyState::WaitingForOlderTasks => {
"Waiting for tasks enqueued before the network change to finish processing".into()
}
NetworkTopologyState::ExportingDocuments => "Exporting documents".into(),
NetworkTopologyState::ImportingDocuments => {
let mut finished_count = 0;
let mut first_ongoing = None;
let mut ongoing_total_indexes = 0;
let mut ongoing_processed_documents = 0;
let mut ongoing_missing_documents = 0;
let mut ongoing_total_documents = 0;
let mut other_ongoing_count = 0;
let mut first_waiting = None;
let mut other_waiting_count = 0;
for (remote_name, in_remote) in &self.in_remotes {
match &in_remote.import_state {
ImportState::WaitingForInitialTask => {
first_waiting = match first_waiting {
None => Some(remote_name),
first_waiting => {
other_waiting_count += 1;
first_waiting
}
};
}
ImportState::Ongoing { import_index_state, total_indexes } => {
first_ongoing = match first_ongoing {
None => {
ongoing_total_indexes = *total_indexes;
Some(remote_name)
}
first_ongoing => {
other_ongoing_count += 1;
first_ongoing
}
};
for import_state in import_index_state.values() {
match import_state {
ImportIndexState::Ongoing {
total_documents,
processed_documents,
received_documents,
task_keys: _,
} => {
ongoing_total_documents += total_documents;
ongoing_processed_documents += processed_documents;
ongoing_missing_documents +=
total_documents.saturating_sub(*received_documents);
}
ImportIndexState::Finished { total_documents } => {
ongoing_total_documents += total_documents;
ongoing_processed_documents += total_documents;
}
}
}
}
ImportState::Finished { total_indexes, total_documents } => {
finished_count += 1;
ongoing_total_indexes = *total_indexes;
ongoing_total_documents += *total_documents;
ongoing_processed_documents += *total_documents;
}
}
}
format!(
"Importing documents from {total} remotes{waiting}{ongoing}{finished}",
total = self.in_remotes.len(),
waiting = if let Some(first_waiting) = first_waiting {
format!(
", waiting on first task from `{}`{others}",
first_waiting,
others = if other_waiting_count > 0 {
format!(" and {other_waiting_count} other remotes")
} else {
"".into()
}
)
} else {
"".into()
},
ongoing = if let Some(first_ongoing) = first_ongoing {
format!(", awaiting {ongoing_missing_documents} and processed {ongoing_processed_documents} out of {ongoing_total_documents} documents in {ongoing_total_indexes} indexes from `{first_ongoing}`{others}",
others=if other_ongoing_count > 0 {format!(" and {other_ongoing_count} other remotes")} else {"".into()})
} else {
"".into()
},
finished = if finished_count >= 0 {
format!(", {finished_count} remotes finished processing")
} else {
"".into()
}
)
}
NetworkTopologyState::Finished => "Finished".into(),
};
Details::NetworkTopologyChange { moved_documents: self.stats.moved_documents, message }
}
pub fn merge(&mut self, other: NetworkTopologyChange) {
// The topology change has a guarantee of forward progress, so for each field we're going to keep the "most advanced" values.
let Self { state, new_network: _, old_network: _, in_remotes, stats } = self;
*state = Ord::max(*state, other.state);
*stats = Ord::max(*stats, other.stats);
for (old_value, new_value) in other.in_remotes.into_values().zip(in_remotes.values_mut()) {
new_value.import_state = match (old_value.import_state, std::mem::take(&mut new_value.import_state)) {
// waiting for initial task is always older
(ImportState::WaitingForInitialTask, newer)
| (newer, ImportState::WaitingForInitialTask)
// finished is always newer
| (_, newer @ ImportState::Finished { .. })
| (newer @ ImportState::Finished { .. }, _) => newer,
(
ImportState::Ongoing { import_index_state: left_import, total_indexes: left_total_indexes },
ImportState::Ongoing { import_index_state: right_import, total_indexes: right_total_indexes },
) => {
let import_index_state = left_import.into_iter().merge_join_by(right_import.into_iter(), |(k,_), (x, _)|k.cmp(x)).map(|eob|
match eob {
EitherOrBoth::Both((name, left), (_, right)) => {
let newer = merge_import_index_state(left, right);
(name, newer)
},
EitherOrBoth::Left(import) |
EitherOrBoth::Right(import) => import,
}
).collect();
ImportState::Ongoing{ import_index_state, total_indexes : u64::max(left_total_indexes, right_total_indexes) }
},
}
}
}
pub fn network_for_state(&self) -> &Network {
match self.state {
NetworkTopologyState::WaitingForOlderTasks => &self.old_network,
NetworkTopologyState::ExportingDocuments
| NetworkTopologyState::ImportingDocuments
| NetworkTopologyState::Finished => &self.new_network,
}
}
}
fn merge_import_index_state(left: ImportIndexState, right: ImportIndexState) -> ImportIndexState {
match (left, right) {
(_, newer @ ImportIndexState::Finished { .. }) => newer,
(newer @ ImportIndexState::Finished { .. }, _) => newer,
(
ImportIndexState::Ongoing {
total_documents: left_total_documents,
received_documents: left_received_documents,
processed_documents: left_processed_documents,
task_keys: mut left_task_keys,
},
ImportIndexState::Ongoing {
total_documents: right_total_documents,
received_documents: right_received_documents,
processed_documents: right_processed_documents,
task_keys: right_task_keys,
},
) => {
let total_documents = u64::max(left_total_documents, right_total_documents);
let received_documents = u64::max(left_received_documents, right_received_documents);
let processed_documents = u64::max(left_processed_documents, right_processed_documents);
left_task_keys.0 |= &right_task_keys.0;
let task_keys = left_task_keys;
ImportIndexState::Ongoing {
total_documents,
received_documents,
processed_documents,
task_keys,
}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)]
#[serde(rename_all = "camelCase")]
pub enum NetworkTopologyState {
WaitingForOlderTasks,
ExportingDocuments,
ImportingDocuments,
Finished,
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)]
#[serde(rename_all = "camelCase")]
pub struct NetworkTopologyStats {
#[serde(default)]
pub moved_documents: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct InRemote {
import_state: ImportState,
}
impl InRemote {
pub fn new() -> Self {
Self { import_state: ImportState::WaitingForInitialTask }
}
}
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
enum ImportState {
/// Initially Meilisearch doesn't know how many documents it should expect from a remote.
/// Any task from each remote contains the information of how many indexes will be imported,
/// and the number of documents to import for the index of the task.
#[default]
WaitingForInitialTask,
Ongoing {
import_index_state: BTreeMap<String, ImportIndexState>,
total_indexes: u64,
},
Finished {
total_indexes: u64,
total_documents: u64,
},
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
enum ImportIndexState {
Ongoing {
total_documents: u64,
received_documents: u64,
processed_documents: u64,
task_keys: TaskKeys,
},
Finished {
total_documents: u64,
},
}
#[derive(Debug, Clone, PartialEq)]
pub struct TaskKeys(pub RoaringBitmap);
impl Serialize for TaskKeys {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let TaskKeys(task_keys) = self;
let mut bytes = Vec::new();
CboRoaringBitmapCodec::serialize_into_vec(task_keys, &mut bytes);
let encoded = base64::prelude::BASE64_STANDARD.encode(&bytes);
serializer.serialize_str(&encoded)
}
}
impl<'de> Deserialize<'de> for TaskKeys {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer.deserialize_str(TaskKeysVisitor)
}
}
struct TaskKeysVisitor;
impl<'de> serde::de::Visitor<'de> for TaskKeysVisitor {
type Value = TaskKeys;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a base64 encoded cbo roaring bitmap")
}
fn visit_str<E>(self, encoded: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let decoded = base64::prelude::BASE64_STANDARD.decode(encoded).map_err(|_err| {
E::invalid_value(serde::de::Unexpected::Str(encoded), &"a base64 string")
})?;
self.visit_bytes(&decoded)
}
fn visit_bytes<E>(self, decoded: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let task_keys = CboRoaringBitmapCodec::deserialize_from(decoded).map_err(|_err| {
E::invalid_value(serde::de::Unexpected::Bytes(decoded), &"a cbo roaring bitmap")
})?;
Ok(TaskKeys(task_keys))
}
}
pub enum ReceiveTaskError {
UnknownRemote(String),
DuplicateTask(DocumentId),
}
pub mod headers {
use std::borrow::Cow;
use std::num::ParseIntError;
use std::string::FromUtf8Error;
use milli::DocumentId;
use uuid::Uuid;
use crate::tasks::TaskId;
/// Implement on response types to extract header values
pub trait GetHeader: Sized {
type Error: std::fmt::Debug + std::fmt::Display;
fn get_header(&self, name: &str) -> Result<Option<&str>, Self::Error>;
fn get_origin_remote(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_REMOTE_HEADER)? else {
return Ok(None);
};
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
inner,
header: PROXY_ORIGIN_REMOTE_HEADER,
})?))
}
fn get_origin_task_uid(&self) -> Result<Option<TaskId>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_TASK_UID_HEADER)? else {
return Ok(None);
};
let decoded = urlencoding::decode(encoded).map_err(|inner| {
DecodeError::UrlDecoding { inner, header: PROXY_ORIGIN_TASK_UID_HEADER }
})?;
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
inner,
header: PROXY_ORIGIN_TASK_UID_HEADER,
})?;
Ok(Some(parsed))
}
fn get_origin_network_version(&self) -> Result<Option<Uuid>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_NETWORK_VERSION_HEADER)?
else {
return Ok(None);
};
let decoded = urlencoding::decode(encoded).map_err(|inner| {
DecodeError::UrlDecoding { inner, header: PROXY_ORIGIN_NETWORK_VERSION_HEADER }
})?;
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseUuid {
inner,
header: PROXY_ORIGIN_NETWORK_VERSION_HEADER,
})?;
Ok(Some(parsed))
}
fn get_import_remote(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_REMOTE_HEADER)? else {
return Ok(None);
};
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
inner,
header: PROXY_IMPORT_REMOTE_HEADER,
})?))
}
fn get_import_index_count(&self) -> Result<Option<u64>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_INDEX_COUNT_HEADER)?
else {
return Ok(None);
};
let decoded = urlencoding::decode(encoded).map_err(|inner| {
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_INDEX_COUNT_HEADER }
})?;
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
inner,
header: PROXY_IMPORT_INDEX_COUNT_HEADER,
})?;
Ok(Some(parsed))
}
fn get_import_index(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_INDEX_HEADER)? else {
return Ok(None);
};
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
inner,
header: PROXY_IMPORT_INDEX_HEADER,
})?))
}
fn get_import_task_key(&self) -> Result<Option<DocumentId>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_TASK_KEY_HEADER)? else {
return Ok(None);
};
let decoded = urlencoding::decode(encoded).map_err(|inner| {
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_TASK_KEY_HEADER }
})?;
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
inner,
header: PROXY_IMPORT_TASK_KEY_HEADER,
})?;
Ok(Some(parsed))
}
fn get_import_docs(&self) -> Result<Option<u64>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_DOCS_HEADER)? else {
return Ok(None);
};
let decoded = urlencoding::decode(encoded).map_err(|inner| {
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_DOCS_HEADER }
})?;
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
inner,
header: PROXY_IMPORT_DOCS_HEADER,
})?;
Ok(Some(parsed))
}
fn get_import_index_docs(&self) -> Result<Option<u64>, DecodeError<Self>> {
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER)?
else {
return Ok(None);
};
let decoded = urlencoding::decode(encoded).map_err(|inner| {
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER }
})?;
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
inner,
header: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
})?;
Ok(Some(parsed))
}
}
/// Implement on query types to set header values
pub trait SetHeader: Sized {
fn set_header(self, name: &str, value: &str) -> Self;
fn set_origin_remote(self, value: &str) -> Self {
let encoded = urlencoding::encode(value);
set_header_and_legacy(self, PROXY_ORIGIN_REMOTE_HEADER, &encoded)
}
fn set_origin_task_uid(self, value: TaskId) -> Self {
let value = value.to_string();
let encoded = urlencoding::encode(&value);
set_header_and_legacy(self, PROXY_ORIGIN_TASK_UID_HEADER, &encoded)
}
fn set_origin_network_version(self, value: Uuid) -> Self {
let value = value.to_string();
let encoded = urlencoding::encode(&value);
set_header_and_legacy(self, PROXY_ORIGIN_NETWORK_VERSION_HEADER, &encoded)
}
fn set_import_remote(self, value: &str) -> Self {
let encoded = urlencoding::encode(value);
set_header_and_legacy(self, PROXY_IMPORT_REMOTE_HEADER, &encoded)
}
fn set_import_index_count(self, value: u64) -> Self {
let value = value.to_string();
let encoded = urlencoding::encode(&value);
set_header_and_legacy(self, PROXY_IMPORT_INDEX_COUNT_HEADER, &encoded)
}
fn set_import_index(self, value: &str) -> Self {
let encoded = urlencoding::encode(value);
set_header_and_legacy(self, PROXY_IMPORT_INDEX_HEADER, &encoded)
}
fn set_import_task_key(self, value: DocumentId) -> Self {
let value = value.to_string();
let encoded = urlencoding::encode(&value);
set_header_and_legacy(self, PROXY_IMPORT_TASK_KEY_HEADER, &encoded)
}
fn set_import_docs(self, value: u64) -> Self {
let value = value.to_string();
let encoded = urlencoding::encode(&value);
set_header_and_legacy(self, PROXY_IMPORT_DOCS_HEADER, &encoded)
}
fn set_import_index_docs(self, value: u64) -> Self {
let value = value.to_string();
let encoded = urlencoding::encode(&value);
set_header_and_legacy(self, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, &encoded)
}
}
#[derive(Debug, thiserror::Error)]
pub enum DecodeError<T: GetHeader> {
#[error("while getting header: {inner}")]
InResponse { inner: T::Error, header: &'static str },
#[error("while url-decoding: {inner}")]
UrlDecoding { inner: FromUtf8Error, header: &'static str },
#[error("while parsing as an integer: {inner}")]
ParseInt { inner: ParseIntError, header: &'static str },
#[error("while parsing as a UUID: {inner}")]
ParseUuid { inner: uuid::Error, header: &'static str },
}
impl<T: GetHeader> DecodeError<T> {
pub fn header(&self) -> &'static str {
match self {
DecodeError::InResponse { inner: _, header }
| DecodeError::UrlDecoding { inner: _, header }
| DecodeError::ParseInt { inner: _, header }
| DecodeError::ParseUuid { inner: _, header } => header,
}
}
}
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "X-Meili-Proxy-Origin-Remote";
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "X-Meili-Proxy-Origin-TaskUid";
pub const PROXY_ORIGIN_NETWORK_VERSION_HEADER: &str = "X-Meili-Proxy-Origin-Network-Version";
pub const PROXY_IMPORT_REMOTE_HEADER: &str = "X-Meili-Proxy-Import-Remote";
pub const PROXY_IMPORT_INDEX_COUNT_HEADER: &str = "X-Meili-Proxy-Import-Index-Count";
pub const PROXY_IMPORT_INDEX_HEADER: &str = "X-Meili-Proxy-Import-Index";
pub const PROXY_IMPORT_TASK_KEY_HEADER: &str = "X-Meili-Proxy-Import-Task-Key";
pub const PROXY_IMPORT_DOCS_HEADER: &str = "X-Meili-Proxy-Import-Docs";
pub const PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER: &str = "X-Meili-Proxy-Import-Total-Index-Docs";
fn get_header_and_legacy<'a, T: GetHeader>(
t: &'a T,
header: &'static str,
) -> Result<Option<&'a str>, DecodeError<T>> {
Ok(Some(
if let Some(encoded) =
t.get_header(header).map_err(|inner| DecodeError::InResponse { inner, header })?
{
encoded
} else {
let header = header.strip_prefix("X-").unwrap();
let Some(encoded) = t
.get_header(header)
.map_err(|inner| DecodeError::InResponse { inner, header })?
else {
return Ok(None);
};
encoded
},
))
}
fn set_header_and_legacy<T: SetHeader>(t: T, name: &'static str, value: &str) -> T {
let t = t.set_header(name, value);
let name = name.strip_prefix("X-").unwrap();
t.set_header(name, value)
}
}

View File

@@ -0,0 +1,52 @@
use std::collections::BTreeMap;
use milli::DocumentId;
use crate::network::Remote;
use crate::tasks::network::{ImportState, InRemote, NetworkTopologyChange, ReceiveTaskError};
impl NetworkTopologyChange {
pub fn export_to_process(&self) -> Option<(&BTreeMap<String, Remote>, &str)> {
None
}
pub fn set_moved(&mut self, _moved_documents: u64) {}
pub fn update_state(&mut self) {}
pub fn receive_remote_task(
&mut self,
_remote_name: &str,
_index_name: Option<&str>,
_task_key: Option<DocumentId>,
_document_count: u64,
_total_indexes: u64,
_total_index_documents: u64,
) -> Result<(), ReceiveTaskError> {
Ok(())
}
pub fn process_remote_tasks(
&mut self,
_remote_name: &str,
_index_name: &str,
_document_count: u64,
) {
}
pub fn is_import_finished(&self) -> bool {
true
}
}
impl InRemote {
pub fn is_finished(&self) -> bool {
matches!(self.import_state, ImportState::Finished { .. })
}
}
impl Default for InRemote {
fn default() -> Self {
Self::new()
}
}

View File

@@ -0,0 +1,253 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use milli::update::new::indexer::current_edition::sharding::Shards;
use milli::DocumentId;
use roaring::RoaringBitmap;
use super::TaskKeys;
use crate::network::Remote;
use crate::tasks::network::{
ImportIndexState, ImportState, InRemote, NetworkTopologyChange, NetworkTopologyState,
ReceiveTaskError,
};
impl NetworkTopologyChange {
pub fn export_to_process(
&self,
) -> Option<(impl Iterator<Item = (&str, &Remote)> + Clone, &str)> {
if self.state != NetworkTopologyState::ExportingDocuments {
return None;
}
if self.new_network.remotes.is_empty() {
return None;
}
let out_name = self.out_name()?;
Some((
self.new_network.remotes.iter().filter_map(|(name, remote)| {
// don't export to ourselves
(Some(name.as_str()) != self.in_name()).then_some((name.as_str(), remote))
}),
out_name,
))
}
pub fn new_shards(&self) -> Option<Shards> {
self.new_network.shards()
}
pub fn set_moved(&mut self, moved_documents: u64) {
self.stats.moved_documents = moved_documents;
}
/// Compute the next state from the current state of the task.
pub fn update_state(&mut self) {
self.state = match self.state {
NetworkTopologyState::WaitingForOlderTasks => {
// no more older tasks, so finished waiting
NetworkTopologyState::ExportingDocuments
}
NetworkTopologyState::ExportingDocuments => {
// processed all exported documents
if self.is_import_finished() {
NetworkTopologyState::Finished
} else {
NetworkTopologyState::ImportingDocuments
}
}
NetworkTopologyState::ImportingDocuments => {
if self.is_import_finished() {
NetworkTopologyState::Finished
} else {
NetworkTopologyState::ImportingDocuments
}
}
NetworkTopologyState::Finished => NetworkTopologyState::Finished,
};
}
pub fn receive_remote_task(
&mut self,
remote_name: &str,
index_name: Option<&str>,
task_key: Option<DocumentId>,
document_count: u64,
total_indexes: u64,
total_index_documents: u64,
) -> Result<(), ReceiveTaskError> {
let remote = self
.in_remotes
.get_mut(remote_name)
.ok_or_else(|| ReceiveTaskError::UnknownRemote(remote_name.to_string()))?;
remote.import_state = match std::mem::take(&mut remote.import_state) {
ImportState::WaitingForInitialTask => {
if total_indexes == 0 {
ImportState::Finished { total_indexes, total_documents: 0 }
} else {
let mut task_keys = RoaringBitmap::new();
if let Some(index_name) = index_name {
if let Some(task_key) = task_key {
task_keys.insert(task_key);
}
let mut import_index_state = BTreeMap::new();
import_index_state.insert(
index_name.to_owned(),
ImportIndexState::Ongoing {
total_documents: total_index_documents,
received_documents: document_count,
task_keys: TaskKeys(task_keys),
processed_documents: 0,
},
);
ImportState::Ongoing { import_index_state, total_indexes }
} else {
ImportState::WaitingForInitialTask
}
}
}
ImportState::Ongoing { mut import_index_state, total_indexes } => {
if let Some(index_name) = index_name {
if let Some((index_name, mut index_state)) =
import_index_state.remove_entry(index_name)
{
index_state = match index_state {
ImportIndexState::Ongoing {
total_documents,
received_documents: previously_received,
processed_documents,
mut task_keys,
} => {
if let Some(task_key) = task_key {
if !task_keys.0.insert(task_key) {
return Err(ReceiveTaskError::DuplicateTask(task_key));
}
}
ImportIndexState::Ongoing {
total_documents,
received_documents: previously_received + document_count,
processed_documents,
task_keys,
}
}
ImportIndexState::Finished { total_documents } => {
ImportIndexState::Finished { total_documents }
}
};
import_index_state.insert(index_name, index_state);
} else {
let mut task_keys = RoaringBitmap::new();
if let Some(task_key) = task_key {
task_keys.insert(task_key);
}
let state = ImportIndexState::Ongoing {
total_documents: total_index_documents,
received_documents: document_count,
processed_documents: 0,
task_keys: TaskKeys(task_keys),
};
import_index_state.insert(index_name.to_string(), state);
}
ImportState::Ongoing { import_index_state, total_indexes }
} else {
ImportState::Ongoing { import_index_state, total_indexes }
}
}
ImportState::Finished { total_indexes, total_documents } => {
ImportState::Finished { total_indexes, total_documents }
}
};
Ok(())
}
pub fn process_remote_tasks(
&mut self,
remote_name: &str,
index_name: &str,
document_count: u64,
) {
let remote = self
.in_remotes
.get_mut(remote_name)
.expect("process_remote_tasks called on a remote that is not in `in_remotes`");
remote.import_state = match std::mem::take(&mut remote.import_state) {
ImportState::WaitingForInitialTask => panic!("no task received yet one processed"),
ImportState::Ongoing { mut import_index_state, total_indexes } => {
let (index_name, mut index_state) =
import_index_state.remove_entry(index_name).unwrap();
index_state = match index_state {
ImportIndexState::Ongoing {
total_documents,
received_documents,
processed_documents: previously_processed,
task_keys,
} => {
let newly_processed_documents = previously_processed + document_count;
if newly_processed_documents >= total_documents {
ImportIndexState::Finished { total_documents }
} else {
ImportIndexState::Ongoing {
total_documents,
received_documents,
processed_documents: newly_processed_documents,
task_keys,
}
}
}
ImportIndexState::Finished { total_documents } => {
ImportIndexState::Finished { total_documents }
}
};
import_index_state.insert(index_name, index_state);
if import_index_state.len() as u64 == total_indexes
&& import_index_state.values().all(|index| index.is_finished())
{
let total_documents =
import_index_state.values().map(|index| index.total_documents()).sum();
ImportState::Finished { total_indexes, total_documents }
} else {
ImportState::Ongoing { import_index_state, total_indexes }
}
}
ImportState::Finished { total_indexes, total_documents } => {
ImportState::Finished { total_indexes, total_documents }
}
}
}
pub fn is_import_finished(&self) -> bool {
self.in_remotes.values().all(|remote| remote.is_finished())
}
}
impl InRemote {
pub fn is_finished(&self) -> bool {
matches!(self.import_state, ImportState::Finished { .. })
}
}
impl Default for InRemote {
fn default() -> Self {
Self::new()
}
}
impl ImportIndexState {
pub fn is_finished(&self) -> bool {
matches!(self, ImportIndexState::Finished { .. })
}
fn total_documents(&self) -> u64 {
match *self {
ImportIndexState::Ongoing { total_documents, .. }
| ImportIndexState::Finished { total_documents } => total_documents,
}
}
}

View File

@@ -14,91 +14,91 @@ default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.1"
actix-http = { version = "3.11.0", default-features = false, features = [
actix-http = { version = "3.11.2", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls-0_23",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.11.0", default-features = false, features = [
actix-web = { version = "4.12.0", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls-0_23",
] }
anyhow = { version = "1.0.98", features = ["backtrace"] }
bstr = "1.12.0"
anyhow = { version = "1.0.100", features = ["backtrace"] }
bstr = "1.12.1"
byte-unit = { version = "5.1.6", features = ["serde"] }
bytes = "1.10.1"
bumpalo = "3.18.1"
clap = { version = "4.5.40", features = ["derive", "env"] }
bytes = "1.11.0"
bumpalo = "3.19.0"
clap = { version = "4.5.52", features = ["derive", "env"] }
crossbeam-channel = "0.5.15"
deserr = { version = "0.6.3", features = ["actix-web"] }
deserr = { version = "0.6.4", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.15.0"
file-store = { path = "../file-store" }
flate2 = "1.1.2"
flate2 = "1.1.5"
fst = "0.4.7"
futures = "0.3.31"
futures-util = "0.3.31"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.9.0", features = ["serde"] }
is-terminal = "0.4.16"
indexmap = { version = "2.12.0", features = ["serde"] }
is-terminal = "0.4.17"
itertools = "0.14.0"
jsonwebtoken = "9.3.1"
lazy_static = "1.5.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.7"
mimalloc = { version = "0.1.47", default-features = false }
memmap2 = "0.9.9"
mimalloc = { version = "0.1.48", default-features = false }
mime = "0.3.17"
num_cpus = "1.17.0"
obkv = "0.3.0"
once_cell = "1.21.3"
ordered-float = "5.0.0"
parking_lot = "0.12.4"
ordered-float = "5.1.0"
parking_lot = "0.12.5"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.16"
platform-dirs = "0.3.0"
prometheus = { version = "0.14.0", features = ["process"] }
rand = "0.8.5"
rayon = "1.10.0"
regex = "1.11.1"
reqwest = { version = "0.12.20", features = [
rayon = "1.11.0"
regex = "1.12.2"
reqwest = { version = "0.12.24", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
rustls = { version = "0.23.35", features = ["ring"], default-features = false }
rustls-pki-types = { version = "1.13.0", features = ["alloc"] }
rustls-pemfile = "2.2.0"
segment = { version = "0.2.6" }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
serde = { version = "1.0.228", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] }
sha2 = "0.10.9"
siphasher = "1.0.1"
slice-group-by = "0.3.1"
static-files = { version = "0.2.5", optional = true }
sysinfo = "0.35.2"
static-files = { version = "0.3.1", optional = true }
sysinfo = "0.37.2"
tar = "0.4.44"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = [
tempfile = "3.23.0"
thiserror = "2.0.17"
time = { version = "0.3.44", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = { version = "1.45.1", features = ["full"] }
toml = "0.8.23"
uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
tokio = { version = "1.48.0", features = ["full"] }
toml = "0.9.8"
uuid = { version = "1.18.1", features = ["serde", "v4", "v7"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.4", features = ["serde"] }
url = { version = "2.5.7", features = ["serde"] }
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.20", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.18"
tracing-actix-web = "0.7.19"
build-info = { version = "1.7.0", path = "../build-info" }
roaring = "0.10.12"
mopa-maintained = "0.2.3"
@@ -114,35 +114,35 @@ utoipa = { version = "5.4.0", features = [
utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
secrecy = "0.10.3"
actix-web-lab = { version = "0.24.1", default-features = false }
actix-web-lab = { version = "0.24.3", default-features = false }
urlencoding = "2.1.3"
backoff = { version = "0.4.0", features = ["tokio"] }
humantime = { version = "2.3.0", default-features = false }
[dev-dependencies]
actix-rt = "2.10.0"
brotli = "8.0.1"
actix-rt = "2.11.0"
brotli = "8.0.2"
# fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["redactions"] }
manifest-dir-macros = "0.1.18"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
wiremock = "0.6.3"
wiremock = "0.6.5"
yaup = "0.3.1"
[build-dependencies]
anyhow = { version = "1.0.98", optional = true }
cargo_toml = { version = "0.22.1", optional = true }
anyhow = { version = "1.0.100", optional = true }
cargo_toml = { version = "0.22.3", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.12.20", features = [
reqwest = { version = "0.12.24", features = [
"blocking",
"rustls-tls",
], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.5", optional = true }
tempfile = { version = "3.20.0", optional = true }
zip = { version = "4.1.0", optional = true }
static-files = { version = "0.3.1", optional = true }
tempfile = { version = "3.23.0", optional = true }
zip = { version = "6.0.0", optional = true }
[features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
@@ -160,6 +160,7 @@ mini-dashboard = [
]
chinese = ["meilisearch-types/chinese"]
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
enterprise = ["meilisearch-types/enterprise", "index-scheduler/enterprise"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
korean = ["meilisearch-types/korean"]

View File

@@ -1,7 +1,7 @@
use std::any::TypeId;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::sync::Arc;
use std::time::{Duration, Instant};
@@ -195,7 +195,7 @@ struct Infos {
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
experimental_limit_batched_tasks_total_size: u64,
experimental_limit_batched_tasks_total_size: Option<u64>,
experimental_network: bool,
experimental_multimodal: bool,
experimental_chat_completions: bool,
@@ -344,14 +344,14 @@ impl Infos {
experimental_no_edition_2024_for_dumps,
experimental_vector_store_setting: vector_store_setting,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
db_path: db_path != Path::new("./data.ms"),
import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"),
dump_dir: dump_dir != Path::new("dumps/"),
ignore_missing_dump,
ignore_dump_if_db_exists,
import_snapshot: import_snapshot.is_some(),
schedule_snapshot,
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
snapshot_dir: snapshot_dir != Path::new("snapshots/"),
uses_s3_snapshots: s3_snapshot_options.is_some(),
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
@@ -359,7 +359,7 @@ impl Infos {
http_payload_size_limit,
experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size:
experimental_limit_batched_tasks_total_size.into(),
experimental_limit_batched_tasks_total_size.map(|size| size.as_u64()),
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(),

View File

@@ -6,10 +6,14 @@ use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
use meilisearch_types::milli;
use meilisearch_types::milli::OrderBy;
use meilisearch_types::tasks::network::headers::{
PROXY_IMPORT_DOCS_HEADER, PROXY_IMPORT_INDEX_COUNT_HEADER, PROXY_IMPORT_INDEX_HEADER,
PROXY_IMPORT_REMOTE_HEADER, PROXY_IMPORT_TASK_KEY_HEADER, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER,
};
use serde_json::Value;
use tokio::task::JoinError;
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
use uuid::Uuid;
#[derive(Debug, thiserror::Error)]
#[allow(clippy::large_enum_variant)]
@@ -93,8 +97,58 @@ pub enum MeilisearchHttpError {
} else { PROXY_ORIGIN_TASK_UID_HEADER }
)]
InconsistentOriginHeaders { is_remote_missing: bool },
#[error("Invalid value for header {header_name}: {msg}")]
#[error("Inconsistent `Import` headers: {remote}: {remote_status}, {index}: {index_status}, {docs}: {docs_status}.\n - Hint: either all three headers should be provided, or none of them",
remote = PROXY_IMPORT_REMOTE_HEADER,
remote_status = if *is_remote_missing { "missing" } else{ "provided" },
index = PROXY_IMPORT_INDEX_HEADER,
index_status = if *is_index_missing { "missing" } else { "provided" },
docs = PROXY_IMPORT_DOCS_HEADER,
docs_status = if *is_docs_missing { "missing" } else { "provided" }
)]
InconsistentImportHeaders {
is_remote_missing: bool,
is_index_missing: bool,
is_docs_missing: bool,
},
#[error("Inconsistent `Import-Metadata` headers: {index_count}: {index_count_status}, {task_key}: {task_key_status}, {total_index_documents}: {total_index_documents_status}.\n - Hint: either all three headers should be provided, or none of them",
index_count = PROXY_IMPORT_INDEX_COUNT_HEADER,
index_count_status = if *is_index_count_missing { "missing" } else { "provided"},
task_key = PROXY_IMPORT_TASK_KEY_HEADER,
task_key_status = if *is_task_key_missing { "missing" } else { "provided"},
total_index_documents = PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
total_index_documents_status = if *is_total_index_documents_missing { "missing" } else { "provided"},
)]
InconsistentImportMetadataHeaders {
is_index_count_missing: bool,
is_task_key_missing: bool,
is_total_index_documents_missing: bool,
},
#[error(
"Inconsistent task network headers: origin headers: {origin_status}, import headers: {import_status}, import metadata: {import_metadata_status}",
origin_status = if *is_missing_origin { "missing"} else { "present" },
import_status = if *is_missing_import { "missing"} else { "present" },
import_metadata_status = if *is_missing_import_metadata { "missing"} else { "present" })]
InconsistentTaskNetworkHeaders {
is_missing_origin: bool,
is_missing_import: bool,
is_missing_import_metadata: bool,
},
#[error("Invalid value for header `{header_name}`: {msg}")]
InvalidHeaderValue { header_name: &'static str, msg: String },
#[error("This remote is not the leader of the network.\n - Note: only the leader `{leader}` can receive new tasks.")]
NotLeader { leader: String },
#[error("Unexpected `previousRemotes` in network call.\n - Note: `previousRemote` is reserved for internal use.")]
UnexpectedNetworkPreviousRemotes,
#[error("The network version in request is too old.\n - Received: {received}\n - Expected at least: {expected_at_least}")]
NetworkVersionTooOld { received: Uuid, expected_at_least: Uuid },
#[error("Remote `{remote}` encountered an error: {error}")]
RemoteIndexScheduler { remote: String, error: index_scheduler::Error },
#[error("{if_remote}Already has a pending network task with uid {task_uid}.\n - Note: No network task can be registered while any previous network task is not done processing.\n - Hint: Wait for task {task_uid} to complete or cancel it.",
if_remote=if let Some(remote) = remote {
format!("Remote `{remote}` encountered an error: ")
} else {"".into()} )]
UnprocessedNetworkTask { remote: Option<String>, task_uid: meilisearch_types::tasks::TaskId },
}
impl MeilisearchHttpError {
@@ -122,6 +176,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
MeilisearchHttpError::RemoteIndexScheduler { error, .. } => error.error_code(),
MeilisearchHttpError::Milli { error, .. } => error.error_code(),
MeilisearchHttpError::Payload(e) => e.error_code(),
MeilisearchHttpError::FileStore(_) => Code::Internal,
@@ -142,10 +197,19 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::PersonalizationInFederatedQuery(_) => {
Code::InvalidMultiSearchQueryPersonalization
}
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
MeilisearchHttpError::InconsistentOriginHeaders { .. }
| MeilisearchHttpError::InconsistentImportHeaders { .. }
| MeilisearchHttpError::InconsistentImportMetadataHeaders { .. }
| MeilisearchHttpError::InconsistentTaskNetworkHeaders { .. } => {
Code::InconsistentDocumentChangeHeaders
}
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
MeilisearchHttpError::NotLeader { .. } => Code::NotLeader,
MeilisearchHttpError::UnexpectedNetworkPreviousRemotes => {
Code::UnexpectedNetworkPreviousRemotes
}
MeilisearchHttpError::NetworkVersionTooOld { .. } => Code::NetworkVersionTooOld,
MeilisearchHttpError::UnprocessedNetworkTask { .. } => Code::UnprocessedNetworkTask,
}
}
}
@@ -169,6 +233,14 @@ impl From<aweb::error::PayloadError> for MeilisearchHttpError {
}
}
impl<T: meilisearch_types::tasks::network::headers::GetHeader>
From<meilisearch_types::tasks::network::headers::DecodeError<T>> for MeilisearchHttpError
{
fn from(value: meilisearch_types::tasks::network::headers::DecodeError<T>) -> Self {
Self::InvalidHeaderValue { header_name: value.header(), msg: value.to_string() }
}
}
#[derive(Debug, thiserror::Error)]
pub enum ActixPayloadError {
#[error("The provided payload is incomplete and cannot be parsed")]
@@ -199,12 +271,13 @@ impl ErrorCode for PayloadError {
PayloadError::Payload(e) => match e {
ActixPayloadError::IncompleteError => Code::BadRequest,
ActixPayloadError::OtherError(error) => match error {
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::EncodingCorrupted => Code::BadRequest,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::UnknownLength => Code::BadRequest,
aweb::error::PayloadError::Http2Payload(_) => Code::BadRequest,
aweb::error::PayloadError::Io(_) => Code::Internal,
_ => todo!(),
aweb::error::PayloadError::Incomplete(_) => Code::BadRequest,
_ => Code::Internal,
},
},
PayloadError::Json(err) => match err {

View File

@@ -12,6 +12,7 @@ pub mod option;
#[cfg(test)]
mod option_test;
pub mod personalization;
pub mod proxy;
pub mod routes;
pub mod search;
pub mod search_queue;
@@ -229,8 +230,19 @@ pub fn setup_meilisearch(
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
export_default_payload_size_bytes: almost_as_big_as(opt.http_payload_size_limit),
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.map_or_else(
|| {
opt.indexer_options
.max_indexing_memory
// By default, we use half of the available memory to determine the size of batched tasks
.map_or(u64::MAX, |mem| mem.as_u64() / 2)
// And never exceed 10 GiB when we infer the limit
.min(10 * 1024 * 1024 * 1024)
},
|size| size.as_u64(),
),
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features: opt.to_instance_features(),
@@ -329,6 +341,13 @@ pub fn setup_meilisearch(
Ok((index_scheduler, auth_controller))
}
/// Returns the input - 1MiB, or at least 20MiB
fn almost_as_big_as(input: byte_unit::Byte) -> byte_unit::Byte {
let with_margin = input.subtract(byte_unit::Byte::MEBIBYTE);
let at_least = byte_unit::Byte::MEBIBYTE.multiply(20).unwrap();
with_margin.unwrap_or(at_least).max(at_least)
}
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
fn open_or_create_database_unchecked(
opt: &Opt,

View File

@@ -1,7 +1,8 @@
use lazy_static::lazy_static;
use prometheus::{
opts, register_gauge, register_histogram_vec, register_int_counter_vec, register_int_gauge,
register_int_gauge_vec, Gauge, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
opts, register_gauge, register_gauge_vec, register_histogram_vec, register_int_counter_vec,
register_int_gauge, register_int_gauge_vec, Gauge, GaugeVec, HistogramVec, IntCounterVec,
IntGauge, IntGaugeVec,
};
lazy_static! {
@@ -73,6 +74,20 @@ lazy_static! {
&["kind", "value"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_BATCH_RUNNING_PROGRESS_TRACE: GaugeVec = register_gauge_vec!(
opts!("meilisearch_batch_running_progress_trace", "The currently running progress trace"),
&["batch_uid", "step_name"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS: IntGaugeVec =
register_int_gauge_vec!(
opts!(
"meilisearch_last_finished_batches_progress_trace_ms",
"The last few batches progress trace in milliseconds"
),
&["batch_uid", "step_name"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_LAST_UPDATE: IntGauge =
register_int_gauge!(opts!("meilisearch_last_update", "Meilisearch Last Update"))
.expect("Can't create a metric");

View File

@@ -473,11 +473,14 @@ pub struct Opt {
#[serde(default = "default_limit_batched_tasks")]
pub experimental_max_number_of_batched_tasks: usize,
/// Experimentally reduces the maximum total size, in bytes, of tasks that will be processed at once,
/// see: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
#[serde(default = "default_limit_batched_tasks_total_size")]
pub experimental_limit_batched_tasks_total_size: Byte,
/// Experimentally controls the maximum total size, in bytes, of tasks that will be processed
/// simultaneously. When unspecified, defaults to half of the maximum indexing memory and
/// clamped to 10 GiB.
///
/// See: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE)]
#[serde(default)]
pub experimental_limit_batched_tasks_total_size: Option<Byte>,
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
/// distinct embedder.
@@ -701,10 +704,12 @@ impl Opt {
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
experimental_max_number_of_batched_tasks.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
experimental_limit_batched_tasks_total_size.to_string(),
);
if let Some(limit) = experimental_limit_batched_tasks_total_size {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
limit.to_string(),
);
}
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
experimental_embedding_cache_entries.to_string(),
@@ -1273,10 +1278,6 @@ fn default_limit_batched_tasks() -> usize {
usize::MAX
}
fn default_limit_batched_tasks_total_size() -> Byte {
Byte::from_u64(u64::MAX)
}
fn default_embedding_cache_entries() -> usize {
0
}

View File

@@ -1,14 +1,14 @@
use crate::search::{Personalize, SearchResult};
use meilisearch_types::{
error::{Code, ErrorCode, ResponseError},
milli::TimeBudget,
};
use std::time::Duration;
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::milli::TimeBudget;
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use tracing::{debug, info, warn};
use crate::search::{Personalize, SearchResult};
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
const MAX_RETRIES: u32 = 10;

View File

@@ -0,0 +1,43 @@
use std::fs::File;
use meilisearch_types::network::Remote;
pub enum Body<T, F>
where
T: serde::Serialize,
F: FnMut(&str, &Remote, &mut T),
{
NdJsonPayload(File),
Inline(T),
Generated(T, F),
None,
}
impl Body<(), fn(&str, &Remote, &mut ())> {
pub fn with_ndjson_payload(file: File) -> Self {
Self::NdJsonPayload(file)
}
pub fn none() -> Self {
Self::None
}
}
impl<T> Body<T, fn(&str, &Remote, &mut T)>
where
T: serde::Serialize,
{
pub fn inline(payload: T) -> Self {
Self::Inline(payload)
}
}
impl<T, F> Body<T, F>
where
T: serde::Serialize,
F: FnMut(&str, &Remote, &mut T),
{
pub fn generated(initial: T, f: F) -> Self {
Self::Generated(initial, f)
}
}

View File

@@ -0,0 +1,31 @@
use actix_web::HttpRequest;
use index_scheduler::IndexScheduler;
use meilisearch_types::network::{Network, Remote};
use meilisearch_types::tasks::network::{DbTaskNetwork, TaskNetwork};
use meilisearch_types::tasks::Task;
use crate::error::MeilisearchHttpError;
use crate::proxy::Body;
pub fn task_network_and_check_leader_and_version(
_req: &HttpRequest,
_network: &Network,
) -> Result<Option<TaskNetwork>, MeilisearchHttpError> {
Ok(None)
}
pub async fn proxy<T, F>(
_index_scheduler: &IndexScheduler,
_index_uid: Option<&str>,
_req: &HttpRequest,
_task_network: DbTaskNetwork,
_network: Network,
_body: Body<T, F>,
task: &Task,
) -> Result<Task, MeilisearchHttpError>
where
T: serde::Serialize,
F: FnMut(&str, &Remote, &mut T),
{
Ok(task.clone())
}

Some files were not shown because too many files have changed in this diff Show More