Compare commits

...

505 Commits

Author SHA1 Message Date
3aef2c9e42 Remove accidentally added db snap 2025-07-17 17:15:22 +02:00
53dbb790bb Fix new indexer 2025-07-17 17:15:22 +02:00
556a9ce9dc Fix in old indexer 2025-07-17 17:15:22 +02:00
b9e4c6b8c2 Add test 2025-07-17 17:15:22 +02:00
fd8b2451d7 Merge pull request #5754 from kametsun/fix/incorrect-stats-doc-count
Fix incorrect document count in stats after clearing all documents
2025-07-17 06:48:51 +00:00
058f9ffda5 Merge pull request #5734 from meilisearch/request-fragments-test
Tests for multimodal
2025-07-16 11:04:00 +00:00
5d363205a5 Merge pull request #5716 from meilisearch/document-sorting
Allow sorting on the /documents route
2025-07-16 10:26:50 +00:00
a683faa882 Apply review suggestions 2025-07-16 11:03:24 +02:00
8887cbdcd5 Merge pull request #5725 from meilisearch/fix-threshold-overcounting-bug
Fix Total Hits being wrong when rankingScoreThreshold is used
2025-07-16 07:15:24 +00:00
634865ff53 Merge pull request #5710 from meilisearch/chat-route-support-filters
Introduce filters in the chat completions
2025-07-15 16:10:49 +00:00
36fccf8525 Merge remote-tracking branch 'origin/release-v1.16.0' into fix-threshold-overcounting-bug 2025-07-15 18:01:29 +02:00
d6bd60d569 Apply review suggestions
Co-Authored-By: Louis Dureuil <louis.dureuil@xinra.net>
2025-07-15 18:00:37 +02:00
48ad959fc1 Merge remote-tracking branch 'origin/release-v1.16.0' into document-sorting 2025-07-15 17:41:46 +02:00
1bc30cb4c8 Restore old benchmark names 2025-07-15 17:34:04 +02:00
77138a42d6 Apply review suggestions
Add preconditions

Fix underflow

Remove unwrap

Turn methods to associated functions

Apply review suggestions
2025-07-15 17:31:11 +02:00
0791506124 Fix some proposals 2025-07-15 17:10:45 +02:00
2a015ac3b8 Implement basic few shot prompting to improve the query capabilities 2025-07-15 14:50:10 +02:00
6f248b78a9 Merge pull request #5751 from meilisearch/fix-searchable-attributes-order
Fix: Preserve order of searchable attributes when modified
2025-07-15 10:38:11 +00:00
d694e312ff Update crates/milli/src/update/settings.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-07-15 11:54:59 +02:00
d76dcc8998 Make clippy happy 2025-07-15 11:49:48 +02:00
e654f66223 Support filtering 2025-07-15 11:49:47 +02:00
34f2ab7093 WIP report search errors to the LLM 2025-07-15 11:49:46 +02:00
1a9dbd364e Fix some issues 2025-07-15 11:49:46 +02:00
662c5d9871 Introduce filters in the chat completions 2025-07-15 11:49:45 +02:00
5cd61b50f9 Fix formatting 2025-07-12 18:19:26 +09:00
9a9be76757 add: verify that the statistics are correctly update assert 2025-07-12 11:15:44 +09:00
cfa6ba6c3b Fix stats showing wrong document count after clear all
Update database stats after clearing documents to ensure
/stats endpoint returns correct numberOfDocuments: 0 instead
of stale count.
2025-07-12 11:15:44 +09:00
f4f333dbf6 Merge pull request #5753 from meilisearch/export-fixes
Various fixes on the export route
2025-07-11 19:15:42 +00:00
1ade76ba10 Remove sneaky debug 2025-07-11 12:27:04 +02:00
ae26658913 Use the most appropriate unit in payload_too_large error 2025-07-11 12:27:03 +02:00
aa09edb3fb Fix errors being silently dropped 2025-07-11 12:27:03 +02:00
3f42f1a036 Get rid of bearer 2025-07-11 12:27:03 +02:00
9bdfdd395b Fix document step overflowing 2025-07-11 12:27:03 +02:00
78d0625a91 Decrease default payload size for exports 2025-07-11 12:27:03 +02:00
3f655ea20e compare user defined searchable fields instead of internal searchable fields 2025-07-10 18:24:23 +02:00
50bc1d55f3 Add test reproducing the bug 2025-07-10 18:23:46 +02:00
0a4f2ef891 Leak mock servers 2025-07-08 15:27:35 +02:00
faa1f7c5b7 Merge pull request #5693 from Mubelotix/default-key
Add a Read-Only Admin API Key by default
2025-07-08 12:38:29 +00:00
3cc5d86598 Format 2025-07-08 13:57:17 +02:00
1ae47bec77 Improve composite test 2025-07-08 13:57:07 +02:00
2f1be0ff86 Ignore faulty test (see #5746) 2025-07-08 13:55:07 +02:00
9cee432255 Fix broken tests 2025-07-08 13:36:26 +02:00
ff8d48d2f1 Merge branch 'main' into default-key 2025-07-08 12:21:46 +02:00
a56c036994 Update crates/meilisearch-types/src/keys.rs
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2025-07-08 12:18:52 +02:00
511c48f520 Merge pull request #5737 from meilisearch/request-fragments-dumpless-upgrade
Fix the dumpless upgrade from v1.15 to v1.16 for request fragments
2025-07-08 08:49:38 +00:00
4623691d1f Don't make the type-that-shall-not-be-written serializable
Following tamo's advice

Co-Authored-By: Tamo <tamo@meilisearch.com>
2025-07-08 10:04:33 +02:00
3261aadcf2 Add composite test 2025-07-07 16:50:39 +02:00
073e9f2967 Disable similarity check on composite embedders using fragments 2025-07-07 16:46:16 +02:00
5f8f48ec95 Add new snapshot checking for regenerativeness 2025-07-07 16:43:05 +02:00
ed2fe365a0 Fix existing snaps 2025-07-07 16:42:50 +02:00
f7c8a77f89 Update v1.12.0 DB to contain vectors 2025-07-07 16:01:50 +02:00
a8030850ee Merge pull request #5733 from meilisearch/improve-export-analytics
Improve the analytics of the `/export` route
2025-07-07 12:26:11 +00:00
132065afda Minor improvements 2025-07-07 13:10:16 +02:00
51c298662b Merge branch 'main' into request-fragments-test 2025-07-07 13:00:21 +02:00
70a860a0f0 Merge branch 'main' into fix-threshold-overcounting-bug 2025-07-07 12:26:37 +02:00
a3254d7d7d Implement dumpless upgrade from v1.15 to v1.16 2025-07-07 11:57:08 +02:00
73c9c1ebdc Add compile-time checks for dumpless upgrade 2025-07-07 11:34:18 +02:00
4c7a6e5c1b Do not leak private URLs 2025-07-07 11:07:58 +02:00
ef4c87accf Merge pull request #5732 from meilisearch/chat-route-support-metrics
Add chat-related metrics on the prometheus route
2025-07-07 08:33:31 +00:00
ced7ea4a5c Merge pull request #5731 from meilisearch/chat-route-support-dumps
Export and import chat completions workspace settings in dumps
2025-07-07 08:31:41 +00:00
fa3990daf9 Format 2025-07-04 13:33:49 +02:00
c5993196b3 Add test 2025-07-04 13:32:55 +02:00
16234e1313 Add fragment swapping test 2025-07-04 13:25:42 +02:00
be9f4f96df Add experimental feature test 2025-07-04 13:15:15 +02:00
b274106ad3 Add test 2025-07-04 13:05:52 +02:00
48527761e7 Add test 2025-07-04 12:01:15 +02:00
6792d048b8 Test both fragments and document template 2025-07-04 11:47:38 +02:00
07bfed99e6 Expose the host in the analytics 2025-07-04 11:08:02 +02:00
8dfded2993 Update tests 2025-07-04 10:49:03 +02:00
3714f16696 Fix bug 2025-07-04 10:40:50 +02:00
d0cd3cacec Add a way to reproduce the bug 2025-07-03 18:18:04 +02:00
fef089c7b6 Merge pull request #5596 from meilisearch/request-fragments
Request fragments
2025-07-03 15:01:44 +00:00
d47e1e15de Merge pull request #5730 from meilisearch/update-version-v1.16.0
Update version for the next release (v1.16.0) in Cargo.toml
2025-07-03 14:45:43 +00:00
caccb51814 Add a complex value test 2025-07-03 16:10:23 +02:00
a76a3e8f11 Change the metric name for the search to use a label 2025-07-03 16:01:31 +02:00
32dede35c7 Update snapshots 2025-07-03 15:59:14 +02:00
6397ef12a0 Use three metrics for the three different tokens 2025-07-03 15:56:56 +02:00
cf9b311f71 Format 2025-07-03 15:53:09 +02:00
7423243be0 Add test with multiple embedders 2025-07-03 15:52:18 +02:00
b5e41f0e46 Fix the Mistral uncompatibility with the usage of OpenAI 2025-07-03 15:21:40 +02:00
5690700601 Add fragment addition test 2025-07-03 15:19:31 +02:00
2faad504c6 Add test 2025-07-03 15:12:47 +02:00
2bcd69750f Add fragment modification test 2025-07-03 15:08:27 +02:00
9f0d33ec99 Expose the number of tokens on the chat completions routes 2025-07-03 15:05:15 +02:00
de24e75be8 Update test 2025-07-03 15:00:11 +02:00
a3af9fe057 new extractor bugfixes:
- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
2025-07-03 14:44:34 +02:00
90683d0e4e add snapshot of get settings 2025-07-03 14:43:06 +02:00
5c79273748 Add TODOs 2025-07-03 14:42:49 +02:00
90e6b6416f new extractor bugfixes:
- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
2025-07-03 14:35:02 +02:00
2b75072b09 Expose the number of internal chat searches on the /metrics route 2025-07-03 14:04:27 +02:00
6e6fd077d4 Ignore unexisting chat completions settings folder 2025-07-03 13:37:38 +02:00
b45eea0d3e Add test for fragment deletion 2025-07-03 13:26:44 +02:00
a051ab3d9a Support importing chat completions settings 2025-07-03 12:04:40 +02:00
0b89ef1fd7 Make tests use a shared index 2025-07-03 11:32:49 +02:00
65ba7b47af Test search fragments 2025-07-03 11:32:49 +02:00
8af76a65bf Add test_fragment_indexing 2025-07-03 11:32:49 +02:00
6b94033c97 Correctly export the chat completions settings in dumps 2025-07-03 11:30:24 +02:00
dfe0c8664e Add a version of prompt::Context that has no fields 2025-07-03 11:08:31 +02:00
0ca652de28 Extract vector points: remove the { 2025-07-03 10:52:30 +02:00
87f105747f Add documentation to Extractor trait 2025-07-03 10:41:20 +02:00
735634e998 Send owned metadata and clear inputs in case of error 2025-07-03 10:32:57 +02:00
3740755d9c Compare to RawValue::NULL constant rather than explicit "null" 2025-07-03 10:11:07 +02:00
bbcabc47bd Update version for the next release (v1.16.0) in Cargo.toml 2025-07-03 08:06:38 +00:00
a06cb1bfd6 Remove Embed::process_embeddings and have it be an inherent function of the type that uses it 2025-07-03 10:02:16 +02:00
549dc985b8 Old dump import indexer: fix the case where going from Generated to Generated 2025-07-03 09:58:41 +02:00
428463e45c Check indexing fragments as well as search fragments 2025-07-02 16:17:22 +02:00
7113fcf63a New error 2025-07-02 16:17:12 +02:00
aa6855cd4f Vector settings: don't assume which kind of request is asked when looking at a settings update without fragments 2025-07-02 16:12:23 +02:00
895db76a51 Fix snaps 2025-07-02 16:10:05 +02:00
a88146d59e Merge pull request #5728 from meilisearch/bump-minidashboard-v0.2.20
Bump the mini-dashboard to v0.2.20
2025-07-02 11:03:00 +00:00
91e77abf4f Bump the mini-dashboard to v0.2.20 2025-07-02 12:15:11 +02:00
f60814b319 Add benchmark 2025-07-02 12:06:00 +02:00
5a675bcb82 Add benchmarks 2025-07-02 11:50:32 +02:00
82a796aea7 vector settings: fix bug where removed fragments were returned as new 2025-07-02 11:36:50 +02:00
f6287602e9 Improve error message when request contains the wrong type of placeholder 2025-07-02 11:36:50 +02:00
ede456c5b0 New error: rest inconsistent fragments 2025-07-02 11:36:50 +02:00
3f5b5df139 Check consistency of fragments 2025-07-02 11:36:50 +02:00
d72e5f5f69 Hide documentTemplate and documentTemplateMaxBytes when indexing_fragment is defined 2025-07-02 11:29:50 +02:00
aa366d593d Merge pull request #5726 from meilisearch/dependabot/github_actions/Swatinem/rust-cache-2.8.0
Bump Swatinem/rust-cache from 2.7.8 to 2.8.0
2025-07-02 08:09:11 +00:00
205430854d Merge pull request #5727 from meilisearch/dependabot/github_actions/svenstaro/upload-release-action-2.11.1
Bump svenstaro/upload-release-action from 2.7.0 to 2.11.1
2025-07-02 08:05:07 +00:00
be64006211 Fix process export 2025-07-02 09:12:18 +02:00
eda309d562 make sure fragments are ordered 2025-07-02 00:05:13 +02:00
119d618a76 Do not "upgrade" regnerate fragments to regenerate prompt 2025-07-02 00:05:13 +02:00
2b2e6c0b3a Settings changes 2025-07-02 00:05:13 +02:00
e6329e77e1 settings fragment_diffs 2025-07-02 00:05:13 +02:00
b086c51a23 new settings indexer 2025-07-02 00:05:13 +02:00
9ce5598fef parsed vectors: embeddings is None when it is null when read from DB 2025-07-02 00:05:13 +02:00
e30c24b5bf Prompt: relax lifetime constraints 2025-07-02 00:05:13 +02:00
c1a132fa06 multimodal experimental feature 2025-07-02 00:05:13 +02:00
e54fc59248 Fix snaps 2025-07-02 00:05:13 +02:00
11e7c0d75f Fix tests 2025-07-02 00:05:13 +02:00
c593fbe648 Analytics 2025-07-02 00:05:12 +02:00
2b3327ea74 Use media to determine search kind 2025-07-02 00:05:12 +02:00
d14184f4da Add media to search 2025-07-02 00:05:12 +02:00
46bceb91f1 New search errors 2025-07-02 00:05:12 +02:00
cab5e35ff7 Implement in old settings indexer and old dump import indexer 2025-07-02 00:05:12 +02:00
f8232976ed Implement in new document indexer 2025-07-02 00:05:12 +02:00
22d363c05a Clear DB on clear documents 2025-07-02 00:05:12 +02:00
41620d5325 Support indexingFragments and searchFragments in settings 2025-07-02 00:05:12 +02:00
f3d5c74c02 Vector settings to add indexingFragments and searchFragments 2025-07-02 00:05:12 +02:00
d48baece51 New error when too many fragments in settings 2025-07-02 00:05:12 +02:00
c45ede44a8 Add new parameters to openai and rest embedders 2025-07-02 00:05:11 +02:00
4235a82dcf REST embedder supports fragments 2025-07-02 00:05:11 +02:00
e7b9b8f002 Change embedder API 2025-07-02 00:05:11 +02:00
5716ab70f3 EmbeddingConfigs -> RuntimeEmbedders 2025-07-02 00:05:11 +02:00
422a786ffd RuntimeEmbedder and RuntimeFragments 2025-07-02 00:05:11 +02:00
836ae19bec ArroyWrapper changes 2025-07-02 00:05:11 +02:00
0b5bc41b79 Add new vector errors 2025-07-02 00:05:11 +02:00
b45059e8f2 Add vector::session module 2025-07-02 00:05:11 +02:00
c16c60b599 Add vector::extractor module 2025-07-02 00:05:11 +02:00
0114796d2a Index uses the vector::db stuff 2025-07-02 00:05:10 +02:00
17a94c40dc Add vector::db module 2025-07-02 00:05:10 +02:00
76ca44b214 Expand json_template module 2025-07-02 00:05:10 +02:00
d2e4d6dd8a prompt: Publishes some types 2025-07-02 00:04:04 +02:00
879cf85037 Bump svenstaro/upload-release-action from 2.7.0 to 2.11.1
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.7.0 to 2.11.1.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.7.0...2.11.1)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-version: 2.11.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-07-01 17:23:13 +00:00
c2d5b20a42 Bump Swatinem/rust-cache from 2.7.8 to 2.8.0
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.8 to 2.8.0.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.7.8...v2.8.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-version: 2.8.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-07-01 17:23:08 +00:00
600178c5ab Still limit to max hits 2025-07-01 18:33:09 +02:00
b93ca3945e Merge pull request #5723 from meilisearch/fix-flaky-embedder-test
Fix flaky last_error test
2025-07-01 15:14:28 +00:00
8fef48f8ca Merge pull request #5670 from meilisearch/export-and-transfer-route
Introduce a new route to export indexes
2025-07-01 14:37:02 +00:00
dedae94102 Fix #5274 2025-07-01 16:22:25 +02:00
7ae9a4afee Add a test for issue #5274 2025-07-01 15:42:43 +02:00
d2776efb11 Fix flaky last_error test 2025-07-01 15:14:56 +02:00
9211e94c4f Format 2025-07-01 15:03:20 +02:00
b7bebe9bbb Fix export when index already exists 2025-07-01 15:03:04 +02:00
37a692f942 Keep IndexUidPattern 2025-07-01 14:47:43 +02:00
25c19a306b Rename variable
Co-authored-by: Kero <clement@meilisearch.com>
2025-07-01 14:42:44 +02:00
c078efd730 Remove experimental todo 2025-07-01 14:40:59 +02:00
9dac91efe0 Fix utoipa response 2025-07-01 14:40:39 +02:00
074d509d92 Fix expect message 2025-07-01 14:39:52 +02:00
d439a3cb9d Fix progress names 2025-07-01 14:39:24 +02:00
e92b6beb20 Revert making check_sort_criteria usable without a search context 2025-07-01 14:26:55 +02:00
27cc357362 Document code 2025-07-01 14:21:55 +02:00
73dfeefc7c Remove plural form 2025-07-01 14:08:46 +02:00
d85480de89 Move sort code out of facet 2025-07-01 14:05:47 +02:00
9f55708d84 Format 2025-07-01 13:58:56 +02:00
280c3907be Add test to sort the unsortable 2025-07-01 13:58:37 +02:00
8419fd9b3b Ditch usage of check_sort_criteria 2025-07-01 13:42:38 +02:00
283944ea89 Differentiate between document sort error and search sort error 2025-07-01 12:03:50 +02:00
8aacd6374a Optimize geo sort 2025-07-01 11:50:01 +02:00
8326f34ad1 Add analytics 2025-07-01 11:35:28 +02:00
259fc067d3 Count exported documents by index name, not pattern 2025-07-01 11:14:59 +02:00
e8b2bb3ea6 Merge pull request #5709 from meilisearch/analytics-chat-completions
Add analytics to the chat completions
2025-07-01 09:14:47 +00:00
7dfb2071b5 Merge pull request #5683 from meilisearch/fix-recoverable-file-store-error
Make sure to recover from missing update file
2025-07-01 09:08:55 +00:00
9cfbef478e Add override setttings to analytics 2025-07-01 11:04:59 +02:00
efd5fd96cc Add the overrideSettings parameter 2025-07-01 11:02:42 +02:00
f4a908669c Add tests 2025-07-01 10:02:15 +02:00
eb2c2815b6 Fix panic 2025-07-01 10:00:10 +02:00
0ef52941c7 Merge pull request #5687 from meilisearch/settings-indexer-edition-2024
Settings indexer edition 2024
2025-07-01 07:35:21 +00:00
0d85f8fcee Make sure to recover from missing update file 2025-06-30 19:09:30 +02:00
f4bb6cbca8 Better behavior when null indexes 2025-06-30 18:59:16 +02:00
ad03c86c44 Display an accurate number of uploaded documents 2025-06-30 18:46:47 +02:00
85037352b9 Fix most of the easy issues 2025-06-30 18:31:32 +02:00
29e9c74a49 Merge two ifs 2025-06-30 16:17:04 +02:00
1b54c866e1 Link experimental feature discussion 2025-06-30 14:47:39 +02:00
e414284335 Clippy too many arguments 2025-06-30 14:25:28 +02:00
7a204609fe Move document context and identifiers in document.rs 2025-06-30 14:21:46 +02:00
f6803dd7d1 Simplify iterator chaining in facet sort 2025-06-30 14:05:23 +02:00
f86f4f619f Implement geo sort on documents 2025-06-30 13:57:30 +02:00
e35d58b531 Move geosort code out of search 2025-06-30 13:12:00 +02:00
63827bbee0 Move sorting code out of search 2025-06-30 11:59:59 +02:00
6b2b8ed676 Transform experimental_no_edition_2024_for_settings into a config 2025-06-30 11:49:03 +02:00
6db5939f84 Re-integrate embedder stats 2025-06-30 09:52:06 +02:00
d35b2d8d33 minor fixes 2025-06-30 09:52:06 +02:00
0687cf058a Avoid rewritting documents that don't change
Ensure being on a reindex action before getting embedder_category_id

Fix document skip function
2025-06-30 09:52:06 +02:00
340d9e6edc Optimize facet sort
5 to 10x speedup
2025-06-27 14:40:55 +02:00
7219299436 Better handle task abortion 2025-06-27 12:33:32 +02:00
657bbf5d1e Fix more tests 2025-06-27 10:14:26 +02:00
28adbc0d18 Update tests 2025-06-27 09:47:46 +02:00
e3fba62e13 Fix typo 2025-06-27 09:40:59 +02:00
fb9170b8e3 Keep name consistent with others 2025-06-27 09:40:30 +02:00
c15763f910 Improve key description
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-06-27 09:39:24 +02:00
7fa1c41190 Fix some api key errors 2025-06-26 18:25:49 +02:00
77802dabf6 rename DocumentChangeContext into DocumentContext 2025-06-26 18:14:48 +02:00
a685eeafeb wierd snapshot update 2025-06-26 18:14:48 +02:00
f16e6f7c37 Update snapshots 2025-06-26 18:14:48 +02:00
900be0ccad Extract or regenerate vectors related to settings changes 2025-06-26 18:14:48 +02:00
51a087b764 Write back user provided vectors from deleted embedders 2025-06-26 18:14:48 +02:00
31142b3663 Introduce extractor for setting changes 2025-06-26 18:14:48 +02:00
e60b855a54 Delete embedders from arroy 2025-06-26 18:14:48 +02:00
510a4b91be Introduce DatabaseDocument type 2025-06-26 18:14:48 +02:00
e704f4d1ec Reimplement reindexing shell 2025-06-26 18:14:48 +02:00
82fe80b360 Replace the legacy Settings::execute by the new one 2025-06-26 18:14:14 +02:00
0f1dd3614c Update tasks tests 2025-06-26 18:11:12 +02:00
3aa6c3c750 Merge pull request #5707 from Mubelotix/last_embedder_message
Add last embedder error in batches
2025-06-26 15:21:17 +00:00
b956918c11 Fix clippy and more utoipa issues 2025-06-26 16:31:38 +02:00
e3003c1609 Improve OpenAPI schema 2025-06-26 16:05:12 +02:00
bf13268649 Better compute aggragates 2025-06-26 16:03:13 +02:00
0bb7866f1e Remove the skip embeddings boolean in the settings 2025-06-26 15:48:21 +02:00
e6e9a033aa Introduce new analytics to the export route 2025-06-26 15:45:24 +02:00
63031219c5 Add the payload size to the parameters 2025-06-26 13:57:32 +02:00
44d6430bae Rename fields 2025-06-26 12:30:08 +02:00
4d26e9c6f2 Remove my comments 2025-06-26 12:21:34 +02:00
2ff382c023 Remove useless clone 2025-06-26 12:15:09 +02:00
0f6dd133b2 Turn to references 2025-06-26 12:15:09 +02:00
29f6eeff8f Remove lots of Arcs 2025-06-26 12:15:08 +02:00
ef007d547d Remove panics 2025-06-26 12:15:08 +02:00
3fc16c627d Comment the delay 2025-06-26 12:15:08 +02:00
9422b6d654 Update crates/meilisearch/src/lib.rs
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2025-06-26 10:58:27 +02:00
ddba52414a Merge pull request #5702 from Nymuxyzo/fix/5688-reset-typo_tolerance-settings
Fix disableOnNumbers reset
2025-06-26 07:58:47 +00:00
4534dc2cab Create another deserr error 2025-06-25 16:45:32 +02:00
b05cb80803 Take sort criteria from the request 2025-06-25 16:41:08 +02:00
6e0526090a Implement sorting documents 2025-06-25 15:36:12 +02:00
a743da3061 Gzip-compress the content 2025-06-25 15:27:10 +02:00
c6216517c7 Parallelize document upload 2025-06-25 15:27:10 +02:00
2d4f7c635e Make tests happy 2025-06-25 15:27:10 +02:00
ee812b31c4 Support JSON value as filters 2025-06-25 15:27:09 +02:00
3329248a84 Support no pattern when exporting 2025-06-25 15:27:09 +02:00
bc08cd0deb Make clippy happy again 2025-06-25 15:27:09 +02:00
3e2f468213 Support task cancelation 2025-06-25 15:27:09 +02:00
7c448bcc00 Make clippy happy 2025-06-25 15:27:09 +02:00
acb7c0a449 Implement a retry strategy 2025-06-25 15:27:08 +02:00
e8795d2608 Export embeddings 2025-06-25 15:26:47 +02:00
e023ee4b6b Working first implementation 2025-06-25 15:26:47 +02:00
e74c3b692a Introduce a new route to export documents and enqueue the export task 2025-06-25 15:26:46 +02:00
1d3b18f774 Update test to be more reproducible 2025-06-25 14:58:21 +02:00
00bc86e74b Merge pull request #5705 from meilisearch/fix-max-total-size-limit-env-var
Fix the environment variable name of the experimental limit batched tasks total size feature
2025-06-25 12:49:30 +00:00
adc9976615 Simplify the analytics chat completions aggragetor 2025-06-25 11:50:26 +02:00
2090e9ea31 Update test 2025-06-25 10:08:25 +02:00
1c8f1c18f4 Fix constant name and key description 2025-06-25 09:59:34 +02:00
ae8c1461e1 Merge pull request #5708 from meilisearch/unsupport-gemini
Remove Gemini from the LLM-providers list
2025-06-25 06:44:37 +00:00
5f62274f21 Add disableOnNumbers to settings reset 2025-06-24 23:32:50 +02:00
c4a96b40eb Remove KeysGet from AllGet 2025-06-24 17:40:06 +02:00
5f50fc9464 Add new analytics to the chat completions route 2025-06-24 17:05:49 +02:00
89498a2bea Remove Gemini from the LLM-providers list 2025-06-24 15:58:39 +02:00
211c1b753f Fix the env variable name 2025-06-24 15:27:39 +02:00
d08e89ea3d Remove options 2025-06-24 15:10:15 +02:00
695877043a Fix warnings 2025-06-24 14:53:39 +02:00
bc4d1530ee Fix tests 2025-06-24 14:50:23 +02:00
d7721fe607 Format 2025-06-24 12:20:22 +02:00
4a179fb3c0 Improve code quality 2025-06-24 11:38:11 +02:00
59a1c5d9a7 Make test more reproducible 2025-06-24 11:08:06 +02:00
2f82d94502 Fix the test and simplify types 2025-06-23 18:55:23 +02:00
bd2bd0f33b Merge pull request #5697 from martin-g/documents-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in documents::
2025-06-23 16:33:21 +00:00
e02733df4a Merge pull request #5698 from martin-g/index-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in index::
2025-06-23 16:31:40 +00:00
f373ecc96a Merge pull request #5699 from martin-g/settings-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in settings::
2025-06-23 16:30:49 +00:00
748a327271 Merge pull request #5700 from martin-g/search-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in search::
2025-06-23 16:29:53 +00:00
4925b30196 Move embedder stats out of progress 2025-06-23 15:24:14 +02:00
43c4a229b7 Merge pull request #5692 from diksipav/5684-gemini-chat-completions-fix
Fix Gemini base_url when used with OpenAI clients
2025-06-23 09:03:34 +00:00
ca112a8b95 tests: Use Server::wait_task() instead of Index::wait_task() in index::
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:59:29 +03:00
855fa555a3 tests: Use Server::wait_task() instead of Index::wait_task() in search::
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:54:49 +03:00
a237c0797a tests: Use Server::wait_task() instead of Index::wait_task() in settings::
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:32:45 +03:00
5c46dc702a tests: Use Server::wait_task() instead of Index::wait_task()
The code is mostly duplicated.
Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:22:59 +03:00
4cadc8113b Add embedder stats in batches 2025-06-20 12:42:22 +02:00
2d6dc83940 Format the code 2025-06-19 15:55:12 +02:00
ab768f379f Fix comment 2025-06-19 15:49:34 +02:00
705e9a9e5e Make the uuids random again to prevent abuse using rainbow tables 2025-06-19 15:45:09 +02:00
c17031d3de Fix Gemini base_url when used with OpenAI clients 2025-06-19 15:11:37 +02:00
67f2a30d7c Fix test 2025-06-19 13:10:08 +02:00
99732f4084 Fix some tests 2025-06-19 13:04:55 +02:00
5081d837ea Fix AllGet action being included in All 2025-06-19 12:12:30 +02:00
9e1cb792f4 Rename Action::AllRead to AllGet 2025-06-19 11:55:25 +02:00
b6b7ede266 Rename Action *.read to *.get 2025-06-19 11:53:42 +02:00
f50e586a4f Allow management key to read other keys 2025-06-19 11:52:58 +02:00
11fedea788 Set static uuids to keys 2025-06-19 11:42:45 +02:00
032b34c377 Add a default management key 2025-06-19 11:29:32 +02:00
b421c8e7de Add an AllRead key 2025-06-19 11:29:16 +02:00
00eb258a53 Fix comment 2025-06-19 11:16:07 +02:00
fc6cc80705 Merge pull request #5689 from Mubelotix/main
Remove old dependencies
2025-06-19 08:11:55 +00:00
138d20b277 Remove old dependencies 2025-06-18 16:46:20 +02:00
7c1a9113f9 Merge pull request #5686 from meilisearch/upgrade-dependencies-again
Upgrade dependencies
2025-06-18 09:22:18 +00:00
07ae297ffd Merge pull request #5681 from martin-g/faster-settings-prefix_search_settings-it-tests
tests: Faster settings::prefix_search_settings IT tests
2025-06-18 09:20:56 +00:00
4069dbcfca Upgrade incompatible dependencies 2025-06-17 22:23:37 +02:00
03eb50fbac Upgrade dependencies 2025-06-17 22:03:06 +02:00
2616d776f2 Merge pull request #5677 from martin-g/faster-documents-errors-it-tests
tests: Faster document::errors IT tests
2025-06-17 15:53:35 +00:00
3004db95af Merge pull request #5680 from martin-g/faster-similar-mod-it-tests
tests: Faster similar::mod IT tests
2025-06-17 15:51:38 +00:00
9a729bf31d Merge pull request #5682 from martin-g/faster-documents-update_documents-it-tests
tests: Faster documents::update_documents IT tests
2025-06-17 14:36:09 +00:00
8bfa6a7f54 tests: Faster documents::update_documents IT tests
Use a shared server + unique index

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 23:48:59 +03:00
056f18bd02 tests: Faster settings::prefix_search_settings IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 23:20:11 +03:00
fe9866aca8 tests: Faster similar::mod IT tests
Use shared server + unique indexes

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 22:51:07 +03:00
60f105a4a3 tests: Faster document::errors IT tests
* Add a call to .failed() for an awaited task
* Use Server::wait_task() instead of Index::wait_task() - it has better
  error checking

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 16:25:15 +03:00
abb399b802 Merge pull request #5674 from meilisearch/release-v1.15.2
Bring back v1.15.2 to main
2025-06-16 11:36:07 +00:00
aeaac7270e Merge pull request #5603 from martin-g/faster-search-multi-it-tests
tests: Faster search::multi IT tests
2025-06-16 09:43:24 +00:00
f45770a3ce Merge pull request #5672 from martin-g/reuse-bench-data
docs: Recommend using a custom path for the benches' data
2025-06-16 09:35:57 +00:00
0e10ff1aa3 docs: Recommend using a custom path for the benches' data
This reduces the build time of the `benchmarks` crate from ~220secs to
45secs (according to `cargo build --timings`) on my dev machine

Additionally I've introduced a parent folder for the Meili related cache
paths - ~/.cache/meili

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 09:21:47 +03:00
6ee608c2d1 Remove debug leftovers
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 15:45:04 +03:00
95e8a9bef1 Use a unique name for an index in a shared server
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 15:10:48 +03:00
0598320252 Try to debug the problem with the existing "test" index in a shared server
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 14:07:57 +03:00
2269104337 Use unique_index_with_prefix() instead of composing the index names manually with Uuid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 13:35:03 +03:00
e8774ad079 Extract shared indices for movies and batman documents
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-12 13:46:17 +03:00
c3368e6859 Merge pull request #5659 from meilisearch/tmp-release-v1.15.1
Bring back v1.15.0 and v1.15.1 changes
2025-06-12 09:16:56 +00:00
9bda9a9a64 Merge remote-tracking branch 'origin/main' into tmp-release-v1.15.1 2025-06-12 10:21:07 +02:00
aefebdeb8b Merge pull request #5617 from workbackai/workback/patch/5594/FB6ED899-E821-4C88-AA79-8BB975E1937A
fix(milli/search): Cyrillic has different typo tolerance due to byte counting bug
2025-06-12 07:39:19 +00:00
646e44ddf9 Re-use the shared_index_with_score_documents since the settings are as the default
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-12 08:59:19 +03:00
b8845d1015 Sort the imports
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 11:29:33 +03:00
620867d611 Use unique indices for the searches in non-existing indices
By using hardcoded there is a chance that the index could exist

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 11:01:05 +03:00
a73d3c03e9 Make the dynamic assertion for facetsByIndex JSON key more broader
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 09:10:10 +03:00
824f5b12ce Formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 08:54:58 +03:00
bb4baf7fae Remove useless dynamic redactions. They are covered by their .**.xyz counterparts
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 08:52:28 +03:00
0263eb0aec More assertion fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 08:42:35 +03:00
8a916a4e42 More assertion fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 07:54:04 +03:00
6a683975bf More fixes of the tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 16:58:48 +03:00
1824fbd1b5 Introduce Index::unique_index_with_prefix(&str)
It could be used when we want to see the index name in the assertions,
e.g. `movies-[uuid]`

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:49:18 +03:00
34d8a54c4b Fix typos in comments and update assertions
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:48:59 +03:00
8fa6e8670a tests: Faster search::multi IT tests
Use shared server + unique indices where possible

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:10:43 +03:00
170ad87e44 Merge pull request #5622 from martin-g/faster-search-filters-it-tests
tests: Faster search::filters IT tests
2025-06-10 08:17:52 +00:00
8f96724adf Set max_attempts to 400 for Server::wait_task()
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-09 14:03:49 +03:00
01e5b0effa Merge pull request #5611 from martin-g/faster-stats-mod-it-tests
tests: Faster stats::mod IT tests
2025-06-09 11:02:12 +00:00
2ec9664878 chore: Fix English grammar in SearchQueue's comments
No functional changes!

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-09 12:05:36 +02:00
10028515ac Use a unique server for the summarized dump creation test
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:52:05 +03:00
63ccd19ab1 Use Server::wait_task() instead of Index::wait_task() for tasks IT tests
Revert the debugging helper that dumped the thread stack traces.
Try with 400 max attempts for the task success/failure (200 secs)

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:16:50 +03:00
1b4d344e18 Increase the wait time in the tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:32 +03:00
89c0cf9b12 temporary: Dump the threads stack traces when .wait_task() times out
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:32 +03:00
3770e70581 Optimize the imports
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:31 +03:00
e497008161 Add cattos to the shared_index_with_nested_documents() as a filterable attribute
This allows to make some more search::filters IT tests using shared
server + unique/shared indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:31 +03:00
a15ebb283f Remove unused import
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:30 +03:00
3f256a7959 Use the shared index with DOCUMENTS where possible
Remove useless assertion that is covered by the earlier call of
.succeeded()

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:30 +03:00
b41af0d0f6 Formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:30 +03:00
3ebff65ef3 tests: Faster search::filters IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:29 +03:00
666680bd87 test(meilisearch/search/locales.rs): updates snapshot
Used `cargo insta test`
Reviewed with `cargo insta review`
2025-06-04 14:18:20 +01:00
27527849bb test(meilisearch/search/locales.rs): updates snapshot
Used `cargo insta test`
Reviewed with `cargo insta review`
2025-06-04 14:17:10 +01:00
1d02efeab9 Merge pull request #5615 from martin-g/faster-tasks-mod-it-tests
tests: Faster tasks::mod IT tests
2025-06-04 12:38:39 +00:00
53fc98d3b0 Merge pull request #5632 from martin-g/db-change-label
ci: Use `GITHUB_TOKEN` secret for the `db change check` workflow
2025-06-04 12:23:01 +00:00
263300b3a3 style(milli): linting 2025-06-04 12:19:00 +01:00
ab3d92d163 chore(parse_query): delete println and move test inside tests module 2025-06-04 12:19:00 +01:00
ef9fc6c854 fix(parse_query): cyrillic bug 2025-06-04 12:19:00 +01:00
61b0f50d4d Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:37:42 +03:00
0557a4dd2f Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:08:13 +03:00
930d5a09a8 Use unique server + its own index for #stats() test
Using a shared server will make this test fragile

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:08:13 +03:00
8b0c4291ae tests: Fater stats::mod IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:08:13 +03:00
c9efdf8c88 Render details.dumpUid as [dump_uid] in Value's Display
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:00:47 +03:00
72736c0ea9 Merge pull request #5627 from meilisearch/skip_remote_test
ignore flaky test
2025-06-04 08:28:24 +00:00
49317bbee4 Merge pull request #5625 from martin-g/faster-search-hybrid-it-tests
tests: Faster search::hybrid IT tests
2025-06-03 13:54:38 +00:00
af54c8381e Use ${{ github.repository }} instead of hardcoding the repo/owner
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:46:16 +03:00
693fcd5752 Try with GITHUB_TOKEN
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:40:40 +03:00
733175359a Update the new test case to use the new signature of index_with_documents_user_provided()
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:29:45 +03:00
7c6162f0bf Fix clippy error
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:26:21 +03:00
d6ae39bf0f tests: Faster search::hybrid IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:26:21 +03:00
e416bbc1de Merge pull request #5623 from martin-g/faster-search-geo-it-tests
tests: Faster search::geo IT tests
2025-06-03 12:25:48 +00:00
2cfd363dc6 Merge pull request #5619 from martin-g/faster-documents-delete_documents-it-tests
tests: Faster documents::delete_documents IT tests
2025-06-03 12:06:07 +00:00
70aa78a2c2 Remove unused import
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 14:04:15 +03:00
96c81762ed Apply suggestions from code review
Do not redactions for the snapshot assertions

Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 14:00:38 +03:00
0b1f634afa Remove useless code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:52:55 +03:00
d3d5015854 Use the cancelled task uid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:50:04 +03:00
f95f29c492 Use unique server+index for list_tasks_type_filtered() test case
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 13:45:46 +03:00
a50b69b868 Use unique server+index for list_tasks_status_filtered() test case
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 13:45:17 +03:00
3668f5f021 Use unique server+index for list_tasks() test case
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 13:44:38 +03:00
54fdf379bb Use shared_does_not_exists_index() index for delete_one_document_unexisting_index() test case
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:41:13 +03:00
41b1cd5a73 Extract GEO_DOCUMENTS static variable and shared index with these docs
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:08:12 +03:00
5c14a25d5a Merge pull request #5624 from martin-g/faster-documents-get_documents-it-tests
tests: Faster documents::get_documents IT tests
2025-06-03 09:37:07 +00:00
fda2843135 Merge pull request #5621 from martin-g/faster-similar-errors-it-tests
tests: Faster similar::errors IT tests
2025-06-03 09:27:27 +00:00
9347330f3a Merge pull request #5620 from martin-g/faster-search-distinct-it-tests
tests: Faster search::distinct IT tests
2025-06-03 09:24:39 +00:00
56c9190dab Merge pull request #5618 from martin-g/faster-vector-binary_quantized-it-tests
tests: Faster vector::binary_quantized IT tests
2025-06-03 09:20:08 +00:00
6b986dceaf Merge pull request #5607 from martin-g/faster-settings-get_settings-it-tests
tests: Faster settings::get_settings IT tests
2025-06-03 08:53:17 +00:00
ea6bb4df1d Merge pull request #5614 from meilisearch/fix-hybrid-distinct
Fix distinct for hybrid search
2025-06-03 07:20:55 +00:00
a3d2f64725 tests: Faster search::distinct IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 08:23:26 +03:00
d5526cffff Merge pull request #5527 from nnethercott/all-cpus-in-import-dump
Use all CPUs during an import dump
2025-06-02 15:24:59 +00:00
5cb75d1f2a ignore flaky test 2025-06-02 17:06:53 +02:00
921e3c4ffe tests: Faster documents::get_documents IT tests
Use shared server + unique index

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:36:08 +03:00
52591761af tests: Faster search::geo IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:32:32 +03:00
f80182f0a9 tests: Faster similar::errors IT tests
Use shared server + unique indices

Related to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:20:17 +03:00
3b30b6a57a tests: Faster documents::delete_documents IT tests
Use shared server + unique indices
Assert .succeeded()/.failed() for the waited tasks

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:04:48 +03:00
5efc78db55 tests: Faster vector::binary_quantized IT tests
Use shared server + unique indices where possible

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 14:47:18 +03:00
cffbe3fcb6 Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 14:17:19 +03:00
8d8fcb9846 Revert to unique server + named index for some tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 11:44:21 +03:00
20049669c9 Merge pull request #5600 from martin-g/faster-search-facet_search-it-tests
tests: Faster search::facet_search IT tests
2025-06-02 08:39:30 +00:00
db28d13cb1 Remove useless assertion.
.succeeded() does the same

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 10:59:46 +03:00
5a7cfc57fd tests: Faster tasks::mode IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 10:56:43 +03:00
790621dc29 Remove useless assert
Co-authored-by: Many the fish <many@meilisearch.com>
2025-06-02 10:55:28 +03:00
1d577ae98b Merge pull request #5610 from martin-g/faster-settings-tokenizer_customization-it-tests
tests: Faster settings::tokenizer_customization IT tests
2025-06-02 07:09:41 +00:00
88e9a55d44 Merge pull request #5609 from martin-g/faster-settings-proximity_settings-it-tests
tests: Faster settings::proximity_settings IT tests
2025-06-02 07:09:06 +00:00
dbe551cf99 Merge pull request #5606 from martin-g/faster-settings-distinct-it-tests
tests: Faster settings::distinct IT tests
2025-06-02 07:07:23 +00:00
a299fbd33b Merge pull request #5605 from martin-g/faster-search-restricted_searchable-it-tests
tests: Faster search::restricted_searchable IT tests
2025-06-02 07:06:50 +00:00
193119acb9 Merge pull request #5604 from martin-g/search-pagination-it-tests
tests: search::pagination IT tests
2025-06-02 07:05:52 +00:00
4c71118699 Merge pull request #5602 from martin-g/faster-search-matching_strategy-it-tests
tests: Faster search::matching_strategy IT tests
2025-06-02 07:04:43 +00:00
5fe2943d3c Merge pull request #5601 from martin-g/faster-search-locales-it-tests
tests: Faster search::locales IT tests
2025-06-02 07:02:28 +00:00
86ff502327 Merge pull request #5599 from martin-g/faster-index-search-errors-tests
tests: Faster search::errors IT tests
2025-06-02 06:54:32 +00:00
6b1a345dce tests: Faster settings::tokenizer_customization IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 08:23:09 +03:00
b54ece690b tests: Faster settings::proximity_settings IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 08:20:05 +03:00
3ea167bade tests: Faster settings::get_settings IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 16:33:27 +03:00
1158d6689f tests: Faster settings::distinct IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 15:41:31 +03:00
d9b0463a0b tests: Faster search::restricted_searchable IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 15:37:27 +03:00
ae9899f179 tests: search::pagination IT tests
Minor cleanup.

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 15:26:55 +03:00
308fd7128e Fix clippy errors
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 11:36:56 +03:00
27e7c00622 Add dynamic redactions for taskUid and enqueuedAt properties
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 11:33:10 +03:00
58207da934 Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 10:56:33 +03:00
fb8b832192 Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 10:54:31 +03:00
17207b5405 tests: Faster search::matching_strategy IT tests
Use shared server + unique indices for all tests

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 09:09:02 +03:00
bd95503eba tests: Faster search::locales IT tests
Use a shared server + unique indices where possible

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 09:03:23 +03:00
8b8b0d802c tests: Faster search::facet_search IT tests
Use shared server + unique indices where possible.
Assert .succeeded() for the waited tasks.
Drop usage of dbg!() in the assertions. It caused noise in the logs

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 08:53:10 +03:00
d329e86250 tests: Use shared server + unique server where possible
Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 08:42:10 +03:00
d416b3b390 Merge pull request #5592 from nnethercott/extract-geo-facets-seperately
Decouple geo facet extraction from rest of document
2025-05-28 16:22:10 +00:00
54f5e74744 Support distinct in hybrid search 2025-05-28 17:58:58 +02:00
fd4b192a39 Add distinct_fid function and expose distinct_single_docid 2025-05-28 17:58:58 +02:00
3c13feebf7 Test that distinct is applied for hybrid search 2025-05-28 17:58:58 +02:00
1811168b96 remove duplicated check on geo field changes 2025-05-28 15:45:13 +02:00
b06cc1e0a2 Update crates/milli/src/update/new/extract/faceted/extract_facets.rs
Co-authored-by: Many the fish <many@meilisearch.com>
2025-05-28 15:38:23 +02:00
44f812c36d Update crates/milli/src/update/new/extract/faceted/extract_facets.rs
Co-authored-by: Many the fish <many@meilisearch.com>
2025-05-28 15:38:12 +02:00
c8e77b5f25 Merge pull request #5574 from martin-g/faster-add_documents-it-tests
perf: Faster integration tests for add_documents.rs
2025-05-28 13:13:38 +00:00
283f516e15 Merge pull request #5579 from martin-g/faster-index-update_index-it-tests
perf: Faster index::update_index IT tests
2025-05-28 13:11:56 +00:00
b4ca0a8c98 Update the tests related to updating indices
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:41 +03:00
b658e38acd Fix formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:41 +03:00
f87e46cc16 Ignore the result from #wait_task()
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:41 +03:00
65354b414a Update crates/meilisearch/tests/index/update_index.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 15:02:40 +03:00
025df397c0 Update crates/meilisearch/tests/index/update_index.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 15:02:40 +03:00
f77abc9dc8 Update crates/meilisearch/tests/index/update_index.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 15:02:40 +03:00
7e9909ee45 perf: Faster index::update_index IT tests
Use a shared server where possible.
Assert succeeded/failed task waits.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:40 +03:00
43ec97fe45 format the code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:01:04 +03:00
02929e241b Update the status code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:36:13 +03:00
c13efde042 uuid is a production dependency of meili-snap
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:35:50 +03:00
36f0a1492c Apply suggestions from code review
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 14:22:04 +03:00
ce65ad213b Add dynamic redactions for uid, batchUid and taskUid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:22:04 +03:00
3e0de6cb83 Wait for the batched tasks bu their real uid.
Some of them succeed, others fail.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:22:04 +03:00
f3d691667d Use a Regex in insta dynamic redaction to replace Uuids with [uuid]
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:22:01 +03:00
ce9c930d10 Fix clippy and fmt
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:21:25 +03:00
fc88b003b4 Use shared server and unique indices for add_documents IT tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:20:07 +03:00
cf5d26124a Call .succeeded() or .failed() on the waited task
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:18:34 +03:00
38b1c57fa8 Faster IT tests for add_documents.rs
Use Shared server where possible

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:18:33 +03:00
25c525b057 Merge pull request #5589 from mcmah309/typo_fix
Typo fix
2025-05-28 11:02:22 +00:00
83cd28b60b Merge pull request #5584 from martin-g/faster-index-search-mod-tests
tests: Faster index::search::mod IT tests
2025-05-28 08:40:37 +00:00
48cad4132a Fix clippy - ignore code variable
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-27 16:44:57 +03:00
4897ad99d0 Wait for the add_documents task
Format the code

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-27 14:26:29 +03:00
46ff78b4ec Update the regex to replace all occurrences of uuids in the redaction
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-27 11:47:02 +03:00
9ad43b6841 rename has_changed to has_changed_for_facets 2025-05-26 18:37:20 +02:00
c9ec502ed9 refactor for readability 2025-05-26 18:32:59 +02:00
18aed75d3b fix logic 2025-05-26 18:20:55 +02:00
6738a4f6ee feat: mettre a jour the insta snapshots 2025-05-26 16:36:36 +02:00
d2948adea3 Migrate more tests to assert with "[uuid]" instead of real Uuid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-26 14:31:58 +03:00
f54b57e5be Use a Regex in insta dynamic redaction to replace Uuids with [uuid]
(cherry picked from commit f8b8c6ab71a28052cf9b271ca8aa5d4175f9e8f9)
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-26 14:03:48 +03:00
95821d0bde refactor: update macro 2025-05-26 10:07:13 +02:00
f690fa0686 feat: add macro_rules to factorize 2025-05-26 09:46:14 +02:00
24e94b28c1 feat: uncouple geo extraction from full doc 2025-05-26 09:22:20 +02:00
34d58f35c8 Print [uuid] instead of the Uuid index name for MeilisearchHttpError::Milli errors
This way the tests' assertions/snapshots for unique indices would be stable

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-25 15:48:55 +03:00
1d5265caf4 Fix typo in method name 2025-05-22 14:25:04 +00:00
97aeb6db4d Merge pull request #5548 from lblack00/attributes-to-search-on-nested-fields
Added support for nested wildcards to attributes_to_search_on
2025-05-22 13:58:23 +00:00
f888f87635 Updated formatting using RustFmt 2025-05-21 02:07:25 -07:00
8c8d98eeaa Use shared server and unique indices for all tests where possible
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-21 10:48:20 +03:00
c5ae43cac6 Updated all additional test cases 2025-05-20 09:03:26 -07:00
57eecd6197 Remove an empty line
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-20 14:37:45 +03:00
2fe5c78cb6 tests: Faster index::search::mod IT tests
* Use shared index where possible.
* Call .succeeded/.failed when waiting for a task.
* Use newer format_args syntax
* Do not use fully qualified name for meili_snap:: functions. The
  functions are already imported in scope

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-20 14:26:26 +03:00
8047cfe438 Merge pull request #5580 from martin-g/better-assertions-index-delete_index-it-tests
tests: Assert succeeded/failed for the index::delete_index IT tests
2025-05-20 08:49:24 +00:00
5717e5c1af Merge pull request #5578 from martin-g/faster-index-get_index-it-tests
perf: Faster index::get_index IT tests
2025-05-20 08:41:11 +00:00
bb07038c31 tests: Assert succeeded/failed for the index::delete_index IT tests
Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:57:53 +03:00
d1a088ea0b Format the code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:52:43 +03:00
b68e22c0e6 Revert the improvements for get_and_paginate_indexes()
Because they won't work in multi-threaded execution of the tests

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:36:45 +03:00
03a36f116e 1. Use a unique Server for no_index_return_empty_list test
... because a Shared one could see indices created by other tests

2. List at least 1000 indices to make sure we get the newly created ones
   in list_multiple_indexes()

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:20:16 +03:00
8a0bf24ed5 Merge pull request #5572 from martin-g/faster-stats-it-tests
perf: Faster IT tests - stats.rs
2025-05-19 12:44:08 +00:00
e2763471e5 Faster index::get_index IT tests
Use shared server for all tests in get_index.rs

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 15:36:25 +03:00
b2f2c5d69f Remove an assertion of a task uid.
It differs for every run of the IT test suite.

Format the imports

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 14:44:08 +03:00
1594c54e23 Provide more information about resulting documents on test case 2025-05-19 02:37:23 -07:00
13b607bd68 Removed matches_wildcard_pattern() and integrated match_pattern() into attributes_to_search_on(), updated test cases 2025-05-18 20:24:52 -07:00
3d130d31c8 Do not hard code the non-exiting index name/uid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-16 15:49:50 +03:00
4cda584b0c Fix the build of stats.rs
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-16 15:45:25 +03:00
248c90bad5 removing .await 2025-05-16 15:29:24 +03:00
0e9040e605 remove warnings 2025-05-16 15:29:23 +03:00
3e3c00f44c fix for test failure 2025-05-16 15:29:23 +03:00
d986a3bbaf Changes to index and expected_response as per feedback 2025-05-16 15:29:22 +03:00
c2ceb8e41b Improve Integration tests in the file stats.rs 2025-05-16 15:29:18 +03:00
79db2e67fb refactor: prefer helper over explicit pool construction
Co-authored-by: Many the fish <many@meilisearch.com>
2025-05-15 11:24:34 +02:00
865f24cfef refactor: helper methods for pool and max threads 2025-05-14 23:45:24 +02:00
3fbe1df770 Updated nested_search_all_details_with_deep_wildcard() to test deeply nested attributes 2025-05-14 00:18:30 -07:00
150d1db86b Implemented integration tests for restrict_searchable.rs on nested wildcard attributes 2025-05-13 21:44:24 -07:00
806e983aa5 fix: lazy computation in thread default
Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com>
2025-05-13 14:14:48 +02:00
e96c1d4b0f style: change fmt from empty str to "unlimited" 2025-05-13 12:16:34 +02:00
15cdc6924b refactor: remove runtime cfg!(test) check
Won't work in integration tests and consequently all threads would be
used. To remedy this we make explicit `max_threads=Some(1)` in the
IndexerConfig::default
2025-05-13 09:18:19 +02:00
677e8b122c Merge pull request #5551 from meilisearch/dont-intern-without-typo
Only intern in case of single-typo when looking for single typoes
2025-05-12 20:23:39 +00:00
75a7e40a27 Merge branch 'main' into all-cpus-in-import-dump 2025-05-12 21:48:12 +02:00
c8939944c6 Add test 2025-05-12 12:40:55 +02:00
4e6252fb03 Only intern in case of single-typo when looking for single typoes 2025-05-12 11:59:21 +02:00
8bd8e744f3 Attributes to search on supports nested wildcards 2025-05-09 02:42:48 -07:00
53f32a7dd7 refactor: change thread_pool from Option<ThreadPoolNoAbort> to
ThreadPoolNoAbort
2025-05-07 17:00:08 +02:00
47a7ed93d3 feat: Make MaxThreads None by default 2025-05-06 09:11:55 +02:00
2ac826edca Apply suggested changes
Co-authored-by: Clément Renault <renault.cle@gmail.com>

Update crates/meilisearch/src/lib.rs

Co-authored-by: Clément Renault <renault.cle@gmail.com>
2025-05-01 16:12:06 +02:00
89aff2081c Fix clippy warnings 2025-04-30 14:17:32 +02:00
3b773b3416 Revert thread_pool type back to Option in config 2025-04-28 11:56:37 +02:00
648b2876f6 Create temp threadpool with all CPUs in dump 2025-04-27 00:52:10 +02:00
260 changed files with 17923 additions and 7020 deletions

View File

@ -4,22 +4,22 @@ on:
pull_request:
types: [opened, synchronize, reopened, labeled, unlabeled]
env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
check-labels:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Check db change labels
id: check_labels
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
URL=/repos/meilisearch/meilisearch/pulls/${{ github.event.pull_request.number }}/labels
echo ${{ github.event.pull_request.number }}
echo $URL
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/meilisearch/meilisearch/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
echo "Labels: $LABELS"
if [[ ! "$LABELS" =~ "db change" && ! "$LABELS" =~ "no db change" ]]; then
echo "::error::Pull request must contain either the 'db change' or 'no db change' label."
exit 1

View File

@ -32,7 +32,7 @@ jobs:
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb

View File

@ -51,7 +51,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
@ -81,7 +81,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
@ -113,7 +113,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@ -178,7 +178,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@ -29,7 +29,7 @@ jobs:
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.85
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.8
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -51,7 +51,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.8
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.85
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
@ -155,7 +155,7 @@ jobs:
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.8
uses: Swatinem/rust-cache@v2.8.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@ -172,7 +172,7 @@ jobs:
profile: minimal
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.8
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@ -191,7 +191,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.8
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

11
.gitignore vendored
View File

@ -11,12 +11,21 @@
/bench
/_xtask_benchmark.ms
/benchmarks
.DS_Store
# Snapshots
## ... large
*.full.snap
## ... unreviewed
## ... unreviewed
*.snap.new
## ... pending
*.pending-snap
# Tmp files
.tmp*
# Database snapshot
crates/meilisearch/db.snapshot
# Fuzzcheck data for the facet indexing fuzz test
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/

View File

@ -57,9 +57,17 @@ This command will be triggered to each PR as a requirement for merging it.
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
It'll store some built artifacts in the directory of your choice.
We recommend using the standard `$HOME/.cache/lindera` directory:
We recommend using the `$HOME/.cache/meili/lindera` directory:
```sh
export LINDERA_CACHE=$HOME/.cache/lindera
export LINDERA_CACHE=$HOME/.cache/meili/lindera
```
You can set the `MILLI_BENCH_DATASETS_PATH` environment variable to further speed up your builds.
It'll store some big files used for the benchmarks in the directory of your choice.
We recommend using the `$HOME/.cache/meili/benches` directory:
```sh
export MILLI_BENCH_DATASETS_PATH=$HOME/.cache/meili/benches
```
Furthermore, you can improve incremental compilation by setting the `MEILI_NO_VERGEN` environment variable.

668
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.15.2"
version = "1.16.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@ -11,27 +11,27 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.95"
bumpalo = "3.16.0"
anyhow = "1.0.98"
bumpalo = "3.18.1"
csv = "1.3.1"
memmap2 = "0.9.5"
milli = { path = "../milli" }
mimalloc = { version = "0.1.43", default-features = false }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.15.0"
mimalloc = { version = "0.1.47", default-features = false }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tempfile = "3.20.0"
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
criterion = { version = "0.6.0", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.10"
roaring = "0.10.12"
[build-dependencies]
anyhow = "1.0.95"
bytes = "1.9.0"
convert_case = "0.6.0"
flate2 = "1.0.35"
reqwest = { version = "0.12.15", features = ["blocking", "rustls-tls"], default-features = false }
anyhow = "1.0.98"
bytes = "1.10.1"
convert_case = "0.8.0"
flate2 = "1.1.2"
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]
@ -51,3 +51,8 @@ harness = false
[[bench]]
name = "indexing"
harness = false
[[bench]]
name = "sort"
harness = false

View File

@ -11,7 +11,7 @@ use milli::heed::{EnvOpenOptions, RwTxn};
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::vector::RuntimeEmbedders;
use milli::{FilterableAttributesRule, Index};
use rand::seq::SliceRandom;
use rand_chacha::rand_core::SeedableRng;
@ -65,7 +65,7 @@ fn setup_settings<'t>(
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
builder.execute(|_| (), || false).unwrap();
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
}
fn setup_index_with_settings(
@ -166,9 +166,10 @@ fn indexing_songs_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -232,9 +233,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -276,9 +278,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -344,9 +347,10 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -420,9 +424,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -464,9 +469,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -504,9 +510,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -571,9 +578,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -637,9 +645,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -703,9 +712,10 @@ fn indexing_wiki(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -768,9 +778,10 @@ fn reindexing_wiki(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -812,9 +823,10 @@ fn reindexing_wiki(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -879,9 +891,10 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -955,9 +968,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1000,9 +1014,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1041,9 +1056,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1107,9 +1123,10 @@ fn indexing_movies_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1172,9 +1189,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1216,9 +1234,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1283,9 +1302,10 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1331,9 +1351,10 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
new_fields_ids_map,
Some(primary_key),
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1395,9 +1416,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1439,9 +1461,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1479,9 +1502,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1568,9 +1592,10 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1658,9 +1683,10 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1740,9 +1766,10 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1806,9 +1833,10 @@ fn indexing_geo(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1871,9 +1899,10 @@ fn reindexing_geo(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1915,9 +1944,10 @@ fn reindexing_geo(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -1982,9 +2012,10 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();

View File

@ -0,0 +1,114 @@
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
//!
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let sortable_fields =
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_sortable_fields(sortable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: "jsonl",
configure: base_conf,
primary_key: Some("geonameid"),
queries: &[""],
offsets: &[
Some((0, 20)), // The most common query in the real world
Some((0, 500)), // A query that ranges over many documents
Some((980, 20)), // The worst query that could happen in the real world
Some((800_000, 20)) // The worst query
],
get_documents: true,
..Conf::BASE
};
fn bench_sort(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
utils::Conf {
group_name: "without sort",
sort: None,
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values",
sort: Some(vec!["name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values",
sort: Some(vec!["timezone:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar then different values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different then similar values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "geo sort",
sample_size: Some(10),
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values then geo sort",
sample_size: Some(50),
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values then geo sort",
sample_size: Some(50),
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many fields",
sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]),
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_sort);
criterion_main!(benches);

View File

@ -9,11 +9,12 @@ use anyhow::Context;
use bumpalo::Bump;
use criterion::BenchmarkId;
use memmap2::Mmap;
use milli::documents::sort::recursive_sort;
use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::vector::RuntimeEmbedders;
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
use serde_json::Value;
@ -35,6 +36,12 @@ pub struct Conf<'a> {
pub configure: fn(&mut Settings),
pub filter: Option<&'a str>,
pub sort: Option<Vec<&'a str>>,
/// set to skip documents (offset, limit)
pub offsets: &'a [Option<(usize, usize)>],
/// enable if you want to bench getting documents without querying
pub get_documents: bool,
/// configure the benchmark sample size
pub sample_size: Option<usize>,
/// enable or disable the optional words on the query
pub optional_words: bool,
/// primary key, if there is None we'll auto-generate docids for every documents
@ -52,6 +59,9 @@ impl Conf<'_> {
configure: |_| (),
filter: None,
sort: None,
offsets: &[None],
get_documents: false,
sample_size: None,
optional_words: true,
primary_key: None,
};
@ -90,7 +100,7 @@ pub fn base_setup(conf: &Conf) -> Index {
(conf.configure)(&mut builder);
builder.execute(|_| (), || false).unwrap();
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
wtxn.commit().unwrap();
let config = IndexerConfig::default();
@ -125,9 +135,10 @@ pub fn base_setup(conf: &Conf) -> Index {
new_fields_ids_map,
primary_key,
&document_changes,
EmbeddingConfigs::default(),
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
@ -144,25 +155,79 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
let name = format!("{}: {}", file_name, conf.group_name);
let mut group = c.benchmark_group(&name);
if let Some(sample_size) = conf.sample_size {
group.sample_size(sample_size);
}
for &query in conf.queries {
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
let _ids = search.execute().unwrap();
});
});
for offset in conf.offsets {
let parameter = match offset {
None => query.to_string(),
Some((offset, limit)) => format!("{query}[{offset}:{limit}]"),
};
group.bench_with_input(
BenchmarkId::from_parameter(parameter),
&query,
|b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search
.query(query)
.terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
if let Some((offset, limit)) = offset {
search.offset(*offset).limit(*limit);
}
let _ids = search.execute().unwrap();
});
},
);
}
}
if conf.get_documents {
for offset in conf.offsets {
let parameter = match offset {
None => String::from("get_documents"),
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
};
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
let all_docs = index.documents_ids(&rtxn).unwrap();
let facet_sort =
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
let iter = facet_sort.iter().unwrap();
if let Some((offset, limit)) = offset {
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = iter.collect::<Vec<_>>();
}
} else {
let all_docs = index.documents_ids(&rtxn).unwrap();
if let Some((offset, limit)) = offset {
let _results =
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = all_docs.iter().collect::<Vec<_>>();
}
}
});
});
}
}
group.finish();
index.prepare_for_closing().wait();

View File

@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> {
writeln!(
&mut manifest_paths_file,
r#"pub const {}: &str = {:?};"#,
dataset.to_case(Case::ScreamingSnake),
dataset.to_case(Case::UpperSnake),
out_file.display(),
)?;

View File

@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
time = { version = "0.3.37", features = ["parsing"] }
time = { version = "0.3.41", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.95"
vergen-git2 = "1.0.2"
anyhow = "1.0.98"
vergen-git2 = "1.0.7"

View File

@ -11,21 +11,21 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.95"
flate2 = "1.0.35"
http = "1.2.0"
anyhow = "1.0.98"
flate2 = "1.1.2"
http = "1.3.1"
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.20.2"
once_cell = "1.21.3"
regex = "1.11.1"
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tar = "0.4.43"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tar = "0.4.44"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.41"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
uuid = { version = "1.17.0", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@ -1,12 +1,17 @@
#![allow(clippy::type_complexity)]
#![allow(clippy::wrong_self_convention)]
use std::collections::BTreeMap;
use meilisearch_types::batches::BatchId;
use meilisearch_types::byte_unit::Byte;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
use meilisearch_types::tasks::{
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
@ -141,6 +146,12 @@ pub enum KindDump {
instance_uid: Option<InstanceUid>,
},
SnapshotCreation,
Export {
url: String,
api_key: Option<String>,
payload_size: Option<Byte>,
indexes: BTreeMap<String, ExportIndexSettings>,
},
UpgradeDatabase {
from: (u32, u32, u32),
},
@ -213,6 +224,15 @@ impl From<KindWithContent> for KindDump {
KindDump::DumpCreation { keys, instance_uid }
}
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export {
url,
api_key,
payload_size,
indexes: indexes
.into_iter()
.map(|(pattern, settings)| (pattern.to_string(), settings))
.collect(),
},
KindWithContent::UpgradeDatabase { from: version } => {
KindDump::UpgradeDatabase { from: version }
}
@ -329,6 +349,7 @@ pub(crate) mod test {
write_channel_congestion: None,
internal_database_sizes: Default::default(),
},
embedder_stats: Default::default(),
enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC),
oldest: datetime!(2022-11-11 0:00 UTC),

View File

@ -116,6 +116,15 @@ impl DumpReader {
}
}
pub fn chat_completions_settings(
&mut self,
) -> Result<Box<dyn Iterator<Item = Result<(String, v6::ChatCompletionSettings)>> + '_>> {
match self {
DumpReader::Current(current) => current.chat_completions_settings(),
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
}
}
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
match self {
DumpReader::Current(current) => Ok(current.features()),

View File

@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path;
@ -21,6 +22,7 @@ pub type Unchecked = meilisearch_types::settings::Unchecked;
pub type Task = crate::TaskDump;
pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network;
@ -192,6 +194,34 @@ impl V6Reader {
)
}
pub fn chat_completions_settings(
&mut self,
) -> Result<Box<dyn Iterator<Item = Result<(String, ChatCompletionSettings)>> + '_>> {
let entries = match fs::read_dir(self.dump.path().join("chat-completions-settings")) {
Ok(entries) => entries,
Err(e) if e.kind() == ErrorKind::NotFound => return Ok(Box::new(std::iter::empty())),
Err(e) => return Err(e.into()),
};
Ok(Box::new(
entries
.map(|entry| -> Result<Option<_>> {
let entry = entry?;
let file_name = entry.file_name();
let path = Path::new(&file_name);
if entry.file_type()?.is_file() && path.extension() == Some(OsStr::new("json"))
{
let name = path.file_stem().unwrap().to_str().unwrap().to_string();
let file = File::open(entry.path())?;
let settings = serde_json::from_reader(file)?;
Ok(Some((name, settings)))
} else {
Ok(None)
}
})
.filter_map(|entry| entry.transpose()),
))
}
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
self.features
}

View File

@ -5,7 +5,7 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
use serde_json::{Map, Value};
@ -51,6 +51,10 @@ impl DumpWriter {
KeyWriter::new(self.dir.path().to_path_buf())
}
pub fn create_chat_completions_settings(&self) -> Result<ChatCompletionsSettingsWriter> {
ChatCompletionsSettingsWriter::new(self.dir.path().join("chat-completions-settings"))
}
pub fn create_tasks_queue(&self) -> Result<TaskWriter> {
TaskWriter::new(self.dir.path().join("tasks"))
}
@ -104,6 +108,24 @@ impl KeyWriter {
}
}
pub struct ChatCompletionsSettingsWriter {
path: PathBuf,
}
impl ChatCompletionsSettingsWriter {
pub(crate) fn new(path: PathBuf) -> Result<Self> {
std::fs::create_dir(&path)?;
Ok(ChatCompletionsSettingsWriter { path })
}
pub fn push_settings(&mut self, name: &str, settings: &ChatCompletionSettings) -> Result<()> {
let mut settings_file = File::create(self.path.join(name).with_extension("json"))?;
serde_json::to_writer(&mut settings_file, &settings)?;
settings_file.flush()?;
Ok(())
}
}
pub struct TaskWriter {
queue: BufWriter<File>,
update_files: PathBuf,

View File

@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.15.0"
thiserror = "2.0.9"
tempfile = "3.20.0"
thiserror = "2.0.12"
tracing = "0.1.41"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
uuid = { version = "1.17.0", features = ["serde", "v4"] }

View File

@ -14,7 +14,7 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.2.0"
unescaper = "0.1.5"
unescaper = "0.1.6"
[dev-dependencies]
# fixed version due to format breakages in v1.40

View File

@ -16,7 +16,7 @@ license.workspace = true
serde_json = "1.0"
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
criterion = { version = "0.6.0", features = ["html_reports"] }
[[bench]]
name = "benchmarks"

View File

@ -12,11 +12,11 @@ license.workspace = true
[dependencies]
arbitrary = { version = "1.4.1", features = ["derive"] }
bumpalo = "3.16.0"
clap = { version = "4.5.24", features = ["derive"] }
either = "1.13.0"
bumpalo = "3.18.1"
clap = { version = "4.5.40", features = ["derive"] }
either = "1.15.0"
fastrand = "2.3.0"
milli = { path = "../milli" }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.15.0"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tempfile = "3.20.0"

View File

@ -13,7 +13,7 @@ use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::IndexerConfig;
use milli::vector::EmbeddingConfigs;
use milli::vector::RuntimeEmbedders;
use milli::Index;
use serde_json::Value;
use tempfile::TempDir;
@ -89,7 +89,7 @@ fn main() {
let mut new_fields_ids_map = db_fields_ids_map.clone();
let indexer_alloc = Bump::new();
let embedders = EmbeddingConfigs::default();
let embedders = RuntimeEmbedders::default();
let mut indexer = indexer::DocumentOperation::new();
let mut operations = Vec::new();
@ -144,6 +144,7 @@ fn main() {
embedders,
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();

View File

@ -11,31 +11,31 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.95"
anyhow = "1.0.98"
bincode = "1.3.3"
byte-unit = "5.1.6"
bumpalo = "3.16.0"
bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
convert_case = "0.6.0"
convert_case = "0.8.0"
csv = "1.3.1"
derive_builder = "0.20.2"
dump = { path = "../dump" }
enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.0.35"
indexmap = "2.7.0"
flate2 = "1.1.2"
indexmap = "2.9.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5"
page_size = "0.6.0"
rayon = "1.10.0"
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.138", features = ["preserve_order"] }
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = [
"serde-well-known",
"formatting",
"parsing",
@ -43,7 +43,8 @@ time = { version = "0.3.37", features = [
] }
tracing = "0.1.41"
ureq = "2.12.1"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
uuid = { version = "1.17.0", features = ["serde", "v4"] }
backoff = "0.4.0"
[dev-dependencies]
big_s = "1.0.2"

View File

@ -4,6 +4,7 @@ use std::io;
use dump::{KindDump, TaskDump, UpdateFile};
use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::RwTxn;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
@ -211,6 +212,23 @@ impl<'a> Dump<'a> {
KindWithContent::DumpCreation { keys, instance_uid }
}
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
KindDump::Export { url, api_key, payload_size, indexes } => {
KindWithContent::Export {
url,
api_key,
payload_size,
indexes: indexes
.into_iter()
.map(|(pattern, settings)| {
Ok((
IndexUidPattern::try_from(pattern)
.map_err(|_| Error::CorruptedDump)?,
settings,
))
})
.collect::<Result<_, Error>>()?,
}
}
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
},
};

View File

@ -151,6 +151,10 @@ pub enum Error {
CorruptedTaskQueue,
#[error(transparent)]
DatabaseUpgrade(Box<Self>),
#[error(transparent)]
Export(Box<Self>),
#[error("Failed to export documents to remote server {code} ({type}): {message} <{link}>")]
FromRemoteWhenExporting { message: String, code: String, r#type: String, link: String },
#[error("Failed to rollback for index `{index}`: {rollback_outcome} ")]
RollbackFailed { index: String, rollback_outcome: RollbackOutcome },
#[error(transparent)]
@ -212,6 +216,7 @@ impl Error {
| Error::BatchNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
| Error::FromRemoteWhenExporting { .. }
| Error::AbortedTask
| Error::Dump(_)
| Error::Heed(_)
@ -221,6 +226,7 @@ impl Error {
| Error::IoError(_)
| Error::Persist(_)
| Error::FeatureNotEnabled(_)
| Error::Export(_)
| Error::Anyhow(_) => true,
Error::CreateBatch(_)
| Error::CorruptedTaskQueue
@ -282,6 +288,7 @@ impl ErrorCode for Error {
Error::Dump(e) => e.error_code(),
Error::Milli { error, .. } => error.error_code(),
Error::ProcessBatchPanicked(_) => Code::Internal,
Error::FromRemoteWhenExporting { .. } => Code::Internal,
Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(e) => e.error_code(),
@ -294,6 +301,7 @@ impl ErrorCode for Error {
Error::CorruptedTaskQueue => Code::Internal,
Error::CorruptedDump => Code::Internal,
Error::DatabaseUpgrade(_) => Code::Internal,
Error::Export(_) => Code::Internal,
Error::RollbackFailed { .. } => Code::Internal,
Error::UnrecoverableError(_) => Code::Internal,
Error::IndexSchedulerVersionMismatch { .. } => Code::Internal,

View File

@ -144,6 +144,19 @@ impl RoFeatures {
.into())
}
}
pub fn check_multimodal(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.multimodal {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "multimodal",
issue_link: "https://github.com/orgs/meilisearch/discussions/846",
}
.into())
}
}
}
impl FeatureData {

View File

@ -289,6 +289,9 @@ fn snapshot_details(d: &Details) -> String {
Details::IndexSwap { swaps } => {
format!("{{ swaps: {swaps:?} }}")
}
Details::Export { url, api_key, payload_size, indexes } => {
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
}
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
}
@ -343,6 +346,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
uid,
details,
stats,
embedder_stats,
started_at,
finished_at,
progress: _,
@ -366,6 +370,12 @@ pub fn snapshot_batch(batch: &Batch) -> String {
snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
if !embedder_stats.skip_serializing() {
snap.push_str(&format!(
"embedder stats: {}, ",
serde_json::to_string(&embedder_stats).unwrap()
));
}
snap.push_str(&format!("stop reason: {}, ", serde_json::to_string(&stop_reason).unwrap()));
snap.push('}');
snap

View File

@ -57,12 +57,15 @@ use meilisearch_types::features::{
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
use meilisearch_types::milli::vector::json_template::JsonTemplate;
use meilisearch_types::milli::vector::{
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
};
use meilisearch_types::milli::{self, Index};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{KindWithContent, Task};
use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks;
pub use queue::Query;
use queue::Queue;
@ -851,29 +854,42 @@ impl IndexScheduler {
&self,
index_uid: String,
embedding_configs: Vec<IndexEmbeddingConfig>,
) -> Result<EmbeddingConfigs> {
) -> Result<RuntimeEmbedders> {
let res: Result<_> = embedding_configs
.into_iter()
.map(
|IndexEmbeddingConfig {
name,
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
..
}| {
let prompt = Arc::new(
prompt
.try_into()
.map_err(meilisearch_types::milli::Error::from)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
fragments,
}|
-> Result<(String, Arc<RuntimeEmbedder>)> {
let document_template = prompt
.try_into()
.map_err(meilisearch_types::milli::Error::from)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
let fragments = fragments
.into_inner()
.into_iter()
.map(|fragment| {
let value = embedder_options.fragment(&fragment.name).unwrap();
let template = JsonTemplate::new(value.clone()).unwrap();
RuntimeFragment { name: fragment.name, id: fragment.id, template }
})
.collect();
// optimistically return existing embedder
{
let embedders = self.embedders.read().unwrap();
if let Some(embedder) = embedders.get(&embedder_options) {
return Ok((
name,
(embedder.clone(), prompt, quantized.unwrap_or_default()),
let runtime = Arc::new(RuntimeEmbedder::new(
embedder.clone(),
document_template,
fragments,
quantized.unwrap_or_default(),
));
return Ok((name, runtime));
}
}
@ -889,11 +905,19 @@ impl IndexScheduler {
let mut embedders = self.embedders.write().unwrap();
embedders.insert(embedder_options, embedder.clone());
}
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
let runtime = Arc::new(RuntimeEmbedder::new(
embedder.clone(),
document_template,
fragments,
quantized.unwrap_or_default(),
));
Ok((name, runtime))
},
)
.collect();
res.map(EmbeddingConfigs::new)
res.map(RuntimeEmbedders::new)
}
pub fn chat_settings(&self, uid: &str) -> Result<Option<ChatCompletionSettings>> {

View File

@ -103,6 +103,7 @@ make_enum_progress! {
pub enum DumpCreationProgress {
StartTheDumpCreation,
DumpTheApiKeys,
DumpTheChatCompletionSettings,
DumpTheTasks,
DumpTheBatches,
DumpTheIndexes,
@ -175,8 +176,17 @@ make_enum_progress! {
}
}
make_enum_progress! {
pub enum Export {
EnsuringCorrectnessOfTheTarget,
ExportingTheSettings,
ExportingTheDocuments,
}
}
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
make_atomic_progress!(Index alias AtomicIndexStep => "index" );
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );

View File

@ -179,6 +179,7 @@ impl BatchQueue {
progress: None,
details: batch.details,
stats: batch.stats,
embedder_stats: batch.embedder_stats.as_ref().into(),
started_at: batch.started_at,
finished_at: batch.finished_at,
enqueued_at: batch.enqueued_at,

View File

@ -71,6 +71,7 @@ impl From<KindWithContent> for AutobatchKind {
KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks that don't apply to an index.")

View File

@ -1,4 +1,5 @@
use std::fmt;
use std::io::ErrorKind;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@ -47,6 +48,9 @@ pub(crate) enum Batch {
IndexSwap {
task: Task,
},
Export {
task: Task,
},
UpgradeDatabase {
tasks: Vec<Task>,
},
@ -103,6 +107,7 @@ impl Batch {
Batch::TaskCancelation { task, .. }
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::Export { task }
| Batch::IndexUpdate { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
@ -142,6 +147,7 @@ impl Batch {
| TaskDeletions(_)
| SnapshotCreation(_)
| Dump(_)
| Export { .. }
| UpgradeDatabase { .. }
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
@ -167,6 +173,7 @@ impl fmt::Display for Batch {
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
Batch::Export { .. } => f.write_str("Export")?,
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
};
match index_uid {
@ -426,9 +433,10 @@ impl IndexScheduler {
/// 0. We get the *last* task to cancel.
/// 1. We get the tasks to upgrade.
/// 2. We get the *next* task to delete.
/// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index.
/// 3. We get the *next* export to process.
/// 4. We get the *next* snapshot to process.
/// 5. We get the *next* dump to process.
/// 6. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(
&self,
@ -500,7 +508,17 @@ impl IndexScheduler {
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
}
// 3. we batch the snapshot.
// 3. we batch the export.
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
if !to_export.is_empty() {
let task_id = to_export.iter().next().expect("There must be at least one export task");
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
current_batch.processing([&mut task]);
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
return Ok(Some((Batch::Export { task }, current_batch)));
}
// 4. we batch the snapshot.
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
if !to_snapshot.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
@ -510,7 +528,7 @@ impl IndexScheduler {
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
}
// 4. we batch the dumps.
// 5. we batch the dumps.
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
if let Some(to_dump) = to_dump.min() {
let mut task =
@ -523,7 +541,7 @@ impl IndexScheduler {
return Ok(Some((Batch::Dump(task), current_batch)));
}
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@ -577,7 +595,11 @@ impl IndexScheduler {
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
if let Some(uuid) = task.content_uuid() {
let content_size = self.queue.file_store.compute_size(uuid)?;
let content_size = match self.queue.file_store.compute_size(uuid) {
Ok(content_size) => content_size,
Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0,
Err(otherwise) => return Err(otherwise.into()),
};
total_size = total_size.saturating_add(content_size);
}

View File

@ -4,6 +4,7 @@ mod autobatcher_test;
mod create_batch;
mod process_batch;
mod process_dump_creation;
mod process_export;
mod process_index_operation;
mod process_snapshot_creation;
mod process_upgrade;

View File

@ -162,8 +162,13 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
let (tasks, congestion) = self.apply_index_operation(
&mut index_wtxn,
&index,
op,
&progress,
current_batch.embedder_stats.clone(),
)?;
{
progress.update_progress(FinalizingIndexStep::Committing);
@ -238,10 +243,12 @@ impl IndexScheduler {
);
builder.set_primary_key(primary_key);
let must_stop_processing = self.scheduler.must_stop_processing.clone();
builder
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
&|| must_stop_processing.get(),
&progress,
current_batch.embedder_stats.clone(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
index_wtxn.commit()?;
@ -361,6 +368,46 @@ impl IndexScheduler {
task.status = Status::Succeeded;
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::Export { mut task } => {
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
else {
unreachable!()
};
let ret = catch_unwind(AssertUnwindSafe(|| {
self.process_export(
url,
api_key.as_deref(),
payload_size.as_ref(),
indexes,
progress,
)
}));
let stats = match ret {
Ok(Ok(stats)) => stats,
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
Ok(Err(e)) => return Err(Error::Export(Box::new(e))),
Err(e) => {
let msg = match e.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match e.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
msg.to_string(),
))));
}
};
task.status = Status::Succeeded;
if let Some(Details::Export { indexes, .. }) = task.details.as_mut() {
*indexes = stats;
}
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
unreachable!();
@ -708,9 +755,11 @@ impl IndexScheduler {
from.1,
from.2
);
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let ret = catch_unwind(std::panic::AssertUnwindSafe(|| {
self.process_rollback(from, progress)
})) {
}));
match ret {
Ok(Ok(())) => {}
Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))),
Err(e) => {

View File

@ -43,7 +43,16 @@ impl IndexScheduler {
let rtxn = self.env.read_txn()?;
// 2. dump the tasks
// 2. dump the chat completion settings
// TODO should I skip the export if the chat completion has been disabled?
progress.update_progress(DumpCreationProgress::DumpTheChatCompletionSettings);
let mut dump_chat_completion_settings = dump.create_chat_completions_settings()?;
for result in self.chat_settings.iter(&rtxn)? {
let (name, chat_settings) = result?;
dump_chat_completion_settings.push_settings(name, &chat_settings)?;
}
// 3. dump the tasks
progress.update_progress(DumpCreationProgress::DumpTheTasks);
let mut dump_tasks = dump.create_tasks_queue()?;
@ -81,7 +90,7 @@ impl IndexScheduler {
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
@ -105,7 +114,7 @@ impl IndexScheduler {
}
dump_tasks.flush()?;
// 3. dump the batches
// 4. dump the batches
progress.update_progress(DumpCreationProgress::DumpTheBatches);
let mut dump_batches = dump.create_batches_queue()?;
@ -138,7 +147,7 @@ impl IndexScheduler {
}
dump_batches.flush()?;
// 4. Dump the indexes
// 5. Dump the indexes
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0;
@ -165,9 +174,6 @@ impl IndexScheduler {
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index
.embedding_configs(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let nb_documents = index
.number_of_documents(&rtxn)
@ -178,7 +184,7 @@ impl IndexScheduler {
let documents = index
.all_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// 4.1. Dump the documents
// 5.1. Dump the documents
for ret in documents {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
@ -221,16 +227,12 @@ impl IndexScheduler {
return Err(Error::from_milli(user_err, Some(uid.to_string())));
};
for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_provided.contains(id));
for (embedder_name, (embeddings, regenerate)) in embeddings {
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate: !user_provided,
regenerate,
};
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
@ -240,7 +242,7 @@ impl IndexScheduler {
atomic.fetch_add(1, Ordering::Relaxed);
}
// 4.2. Dump the settings
// 5.2. Dump the settings
let settings = meilisearch_types::settings::settings(
index,
&rtxn,
@ -251,7 +253,7 @@ impl IndexScheduler {
Ok(())
})?;
// 5. Dump experimental feature settings
// 6. Dump experimental feature settings
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
let features = self.features().runtime_features();
dump.create_experimental_features(features)?;

View File

@ -0,0 +1,367 @@
use std::collections::BTreeMap;
use std::io::{self, Write as _};
use std::sync::atomic;
use std::time::Duration;
use backoff::ExponentialBackoff;
use byte_unit::Byte;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::update::{request_threads, Setting};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
use meilisearch_types::settings::{self, SecretPolicy};
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
use serde::Deserialize;
use ureq::{json, Response};
use super::MustStopProcessing;
use crate::processing::AtomicDocumentStep;
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
pub(super) fn process_export(
&self,
base_url: &str,
api_key: Option<&str>,
payload_size: Option<&Byte>,
indexes: &BTreeMap<IndexUidPattern, ExportIndexSettings>,
progress: Progress,
) -> Result<BTreeMap<IndexUidPattern, DetailsExportIndexSettings>> {
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::ProcessExport)?;
let indexes: Vec<_> = self
.index_names()?
.into_iter()
.flat_map(|uid| {
indexes
.iter()
.find(|(pattern, _)| pattern.matches_str(&uid))
.map(|(pattern, settings)| (pattern, uid, settings))
})
.collect();
let mut output = BTreeMap::new();
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let must_stop_processing = self.scheduler.must_stop_processing.clone();
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
if must_stop_processing.get() {
return Err(Error::AbortedTask);
}
progress.update_progress(VariableNameStep::<ExportIndex>::new(
format!("Exporting index `{uid}`"),
i as u32,
indexes.len() as u32,
));
let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?;
let index_rtxn = index.read_txn()?;
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
// First, check if the index already exists
let url = format!("{base_url}/indexes/{uid}");
let response = retry(&must_stop_processing, || {
let mut request = agent.get(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
});
let index_exists = match response {
Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
false
}
Err(e) => return Err(e),
};
let primary_key = index
.primary_key(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Create the index
if !index_exists {
let url = format!("{base_url}/indexes");
retry(&must_stop_processing, || {
let mut request = agent.post(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Patch the index primary key
if index_exists && *override_settings {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Send the index settings
if !index_exists || *override_settings {
let mut settings =
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet;
}
// Retry logic for sending settings
let url = format!("{base_url}/indexes/{uid}/settings");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
request.send_json(settings.clone()).map_err(into_backoff_error)
})?;
}
let filter = filter
.as_ref()
.map(Filter::from_json)
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
.flatten();
let filter_universe = filter
.map(|f| f.evaluate(&index_rtxn, &index))
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let whole_universe = index
.documents_ids(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let universe = filter_universe.unwrap_or(whole_universe);
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// We don't need to keep this one alive as we will
// spawn many threads to process the documents
drop(index_rtxn);
let total_documents = universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
output.insert(
IndexUidPattern::new_unchecked(uid.clone()),
DetailsExportIndexSettings {
settings: (*export_settings).clone(),
matched_documents: Some(total_documents as u64),
},
);
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
let results = request_threads()
.broadcast(|ctx| {
let index_rtxn = index
.read_txn()
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new();
for (i, docid) in universe.iter().enumerate() {
if i % ctx.num_threads() != ctx.index() {
continue;
}
let document = index
.document(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// TODO definitely factorize this code
'inject_vectors: {
let embeddings = index
.embeddings(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(Error::from_milli(
milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(
&index_rtxn,
std::iter::once(docid),
)
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={docid}")
}
},
value: vectors.clone(),
},
),
Some(uid.to_string()),
));
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate,
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
}
}
tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
// We compress the documents before sending them
let mut encoder =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder
.write_all(&buffer)
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
encoder
.finish()
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
buffer.clear();
compressed_buffer.clear();
}
buffer.extend_from_slice(&tmp_buffer);
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
Ok(())
})
.map_err(|e| {
Error::from_milli(
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
Some(uid.to_string()),
)
})?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
}
Ok(output)
}
}
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
where
F: Fn() -> Result<ureq::Response, backoff::Error<ureq::Error>>,
{
match backoff::retry(ExponentialBackoff::default(), || {
if must_stop_processing.get() {
return Err(backoff::Error::Permanent(ureq::Error::Status(
u16::MAX,
// 444: Connection Closed Without Response
Response::new(444, "Abort", "Aborted task").unwrap(),
)));
}
send_request()
}) {
Ok(response) => Ok(response),
Err(backoff::Error::Permanent(e)) => Err(ureq_error_into_error(e)),
Err(backoff::Error::Transient { err, retry_after: _ }) => Err(ureq_error_into_error(err)),
}
}
fn into_backoff_error(err: ureq::Error) -> backoff::Error<ureq::Error> {
match err {
// Those code status must trigger an automatic retry
// <https://www.restapitutorial.com/advanced/responses/retries>
ureq::Error::Status(408 | 429 | 500 | 502 | 503 | 504, _) => {
backoff::Error::Transient { err, retry_after: None }
}
ureq::Error::Status(_, _) => backoff::Error::Permanent(err),
ureq::Error::Transport(_) => backoff::Error::Transient { err, retry_after: None },
}
}
/// Converts a `ureq::Error` into an `Error`.
fn ureq_error_into_error(error: ureq::Error) -> Error {
#[derive(Deserialize)]
struct MeiliError {
message: String,
code: String,
r#type: String,
link: String,
}
match error {
// This is a workaround to handle task abortion - the error propagation path
// makes it difficult to cleanly surface the abortion at this level.
ureq::Error::Status(u16::MAX, _) => Error::AbortedTask,
ureq::Error::Status(_, response) => match response.into_json() {
Ok(MeiliError { message, code, r#type, link }) => {
Error::FromRemoteWhenExporting { message, code, r#type, link }
}
Err(e) => e.into(),
},
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
}
}
enum ExportIndex {}

View File

@ -1,11 +1,13 @@
use std::sync::Arc;
use bumpalo::collections::CollectIn;
use bumpalo::Bump;
use meilisearch_types::heed::RwTxn;
use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::DocumentAdditionResult;
use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::Index;
@ -24,7 +26,7 @@ impl IndexScheduler {
/// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index, progress),
skip(self, index_wtxn, index, progress, embedder_stats),
target = "indexing::scheduler"
)]
pub(crate) fn apply_index_operation<'i>(
@ -33,6 +35,7 @@ impl IndexScheduler {
index: &'i Index,
operation: IndexOperation,
progress: &Progress,
embedder_stats: Arc<EmbedderStats>,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now();
@ -86,8 +89,9 @@ impl IndexScheduler {
let mut content_files_iter = content_files.iter();
let mut indexer = indexer::DocumentOperation::new();
let embedders = index
.embedding_configs()
.embedding_configs(index_wtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
.map_err(|e| Error::from_milli(e.into(), Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
for operation in operations {
match operation {
@ -113,18 +117,8 @@ impl IndexScheduler {
}
}
let local_pool;
let indexer_config = self.index_mapper.indexer_config();
let pool = match &indexer_config.thread_pool {
Some(pool) => pool,
None => {
local_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|i| format!("indexing-thread-{i}"))
.build()
.unwrap();
&local_pool
}
};
let pool = &indexer_config.thread_pool;
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
let (document_changes, operation_stats, primary_key) = indexer
@ -187,6 +181,7 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
progress,
&embedder_stats,
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
);
@ -266,18 +261,8 @@ impl IndexScheduler {
let mut congestion = None;
if task.error.is_none() {
let local_pool;
let indexer_config = self.index_mapper.indexer_config();
let pool = match &indexer_config.thread_pool {
Some(pool) => pool,
None => {
local_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|i| format!("indexing-thread-{i}"))
.build()
.unwrap();
&local_pool
}
};
let pool = &indexer_config.thread_pool;
let candidates_count = candidates.len();
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
@ -290,8 +275,9 @@ impl IndexScheduler {
})
.unwrap()?;
let embedders = index
.embedding_configs()
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentEditionProgress::Indexing);
@ -308,6 +294,7 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
progress,
&embedder_stats,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
@ -429,18 +416,8 @@ impl IndexScheduler {
let mut congestion = None;
if !tasks.iter().all(|res| res.error.is_some()) {
let local_pool;
let indexer_config = self.index_mapper.indexer_config();
let pool = match &indexer_config.thread_pool {
Some(pool) => pool,
None => {
local_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|i| format!("indexing-thread-{i}"))
.build()
.unwrap();
&local_pool
}
};
let pool = &indexer_config.thread_pool;
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
let mut indexer = indexer::DocumentDeletion::new();
@ -448,8 +425,9 @@ impl IndexScheduler {
indexer.delete_documents_by_docids(to_delete);
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
let embedders = index
.embedding_configs()
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentDeletionProgress::Indexing);
@ -466,6 +444,7 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
progress,
&embedder_stats,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
@ -498,14 +477,11 @@ impl IndexScheduler {
}
progress.update_progress(SettingsProgress::ApplyTheSettings);
builder
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)
let congestion = builder
.execute(&|| must_stop_processing.get(), progress, embedder_stats)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
Ok((tasks, None))
Ok((tasks, congestion))
}
IndexOperation::DocumentClearAndSetting {
index_uid,
@ -521,6 +497,7 @@ impl IndexScheduler {
tasks: cleared_tasks,
},
progress,
embedder_stats.clone(),
)?;
let (settings_tasks, _congestion) = self.apply_index_operation(
@ -528,6 +505,7 @@ impl IndexScheduler {
index,
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
progress,
embedder_stats,
)?;
let mut tasks = settings_tasks;

View File

@ -0,0 +1,17 @@
---
source: crates/index-scheduler/src/scheduler/test.rs
expression: config.embedder_options
---
{
"Rest": {
"api_key": "My super secret",
"distribution": null,
"dimensions": 4,
"url": "http://localhost:7777",
"request": "{{text}}",
"search_fragments": {},
"indexing_fragments": {},
"response": "{{embedding}}",
"headers": {}
}
}

View File

@ -0,0 +1,12 @@
---
source: crates/index-scheduler/src/scheduler/test_embedders.rs
expression: simple_hf_config.embedder_options
---
{
"HuggingFace": {
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"distribution": null,
"pooling": "useModel"
}
}

View File

@ -0,0 +1,15 @@
---
source: crates/index-scheduler/src/scheduler/test_embedders.rs
expression: doc
---
{
"doggo": "Intel",
"breed": "beagle",
"_vectors": {
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@ -0,0 +1,15 @@
---
source: crates/index-scheduler/src/scheduler/test_embedders.rs
expression: doc
---
{
"doggo": "kefir",
"breed": "patou",
"_vectors": {
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@ -1,12 +1,17 @@
---
source: crates/index-scheduler/src/scheduler/test_embedders.rs
expression: simple_hf_config.embedder_options
expression: fakerest_config.embedder_options
---
{
"HuggingFace": {
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"Rest": {
"api_key": "My super secret",
"distribution": null,
"pooling": "useModel"
"dimensions": 384,
"url": "http://localhost:7777",
"request": "{{text}}",
"search_fragments": {},
"indexing_fragments": {},
"response": "{{embedding}}",
"headers": {}
}
}

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@ -3,11 +3,11 @@ use std::collections::BTreeMap;
use big_s::S;
use meili_snap::{json_string, snapshot};
use meilisearch_auth::AuthFilter;
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
use meilisearch_types::milli::{self};
use meilisearch_types::settings::SettingEmbeddingSettings;
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
use milli::vector::db::IndexEmbeddingConfig;
use roaring::RoaringBitmap;
use crate::insta_snapshot::snapshot_index_scheduler;
@ -690,11 +690,20 @@ fn test_settings_update() {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
let embedders = index.embedding_configs();
let configs = embedders.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name, config, fragments } = configs.first().unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"default");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(config.embedder_options);
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
}
#[test]
@ -732,6 +741,7 @@ fn basic_get_stats() {
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
@ -765,6 +775,7 @@ fn basic_get_stats() {
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
@ -805,6 +816,7 @@ fn basic_get_stats() {
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
@ -846,6 +858,7 @@ fn basic_get_stats() {
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,

View File

@ -3,13 +3,14 @@ use std::collections::BTreeMap;
use big_s::S;
use insta::assert_json_snapshot;
use meili_snap::{json_string, snapshot};
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::milli::vector::SearchQuery;
use meilisearch_types::milli::{self, obkv_to_json};
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use milli::update::IndexDocumentsMethod::*;
use milli::vector::db::IndexEmbeddingConfig;
use crate::insta_snapshot::snapshot_index_scheduler;
use crate::test_utils::read_json;
@ -85,28 +86,51 @@ fn import_vectors() {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap();
let embedders = index.embedding_configs();
let configs = embedders.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
let IndexEmbeddingConfig { name, config: fakerest_config, fragments } =
configs.get(0).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
insta::assert_json_snapshot!(fakerest_config.embedder_options);
let fakerest_name = name.clone();
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
let IndexEmbeddingConfig { name, config: simple_hf_config, fragments } =
configs.get(1).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"1");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone();
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
let hf_runtime = configs.get(&simple_hf_name).unwrap();
let hf_embedder = &hf_runtime.embedder;
let beagle_embed = hf_embedder
.embed_search(SearchQuery::Text("Intel the beagle best doggo"), None)
.unwrap();
let lab_embed =
hf_embedder.embed_search(SearchQuery::Text("Max the lab best doggo"), None).unwrap();
let patou_embed = hf_embedder
.embed_search(SearchQuery::Text("kefir the patou best doggo"), None)
.unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
};
@ -166,22 +190,38 @@ fn import_vectors() {
let rtxn = index.read_txn().unwrap();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
let embedders = index.embedding_configs();
let configs = embedders.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"1");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let embeddings = index.embeddings(&rtxn, 0).unwrap();
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -239,25 +279,41 @@ fn import_vectors() {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let embedders = index.embedding_configs();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
let configs = embedders.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"1");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let embeddings = index.embeddings(&rtxn, 0).unwrap();
// automatically changed to patou because set to regenerate
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
// remained beagle
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -399,8 +455,8 @@ fn import_vectors_first_and_embedder_later() {
.collect::<Vec<_>>();
// the all the vectors linked to the new specified embedder have been removed
// Only the unknown embedders stays in the document DB
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
let conf = index.embedding_configs(&rtxn).unwrap();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
// even though we specified the vector for the ID 3, it shouldn't be marked
// as user provided since we explicitely marked it as NOT user provided.
snapshot!(format!("{conf:#?}"), @r###"
@ -426,19 +482,28 @@ fn import_vectors_first_and_embedder_later() {
},
quantized: None,
},
user_provided: RoaringBitmap<[1, 2]>,
fragments: FragmentConfigs(
[],
),
},
]
"###);
let info =
index.embedding_configs().embedder_info(&rtxn, "my_doggo_embedder").unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[1, 2, 3]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[1, 2]>");
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty(), "{embedding:?}");
// the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embeddings = &embeddings["my_doggo_embedder"];
let (embeddings, _) = &embeddings["my_doggo_embedder"];
snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@ -493,7 +558,7 @@ fn import_vectors_first_and_embedder_later() {
"###);
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
@ -501,7 +566,7 @@ fn import_vectors_first_and_embedder_later() {
// the document with the id 4 should generate an embedding
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
}
@ -603,33 +668,35 @@ fn delete_document_containing_vector() {
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
let conf = index.embedding_configs(&rtxn).unwrap();
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
snapshot!(format!("{conf:#?}"), @r###"
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
quantized: None,
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
},
user_provided: RoaringBitmap<[0]>,
quantized: None,
},
]
"###);
fragments: FragmentConfigs(
[],
),
},
]
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["manual"];
let (embedding, _) = &embeddings["manual"];
assert!(!embedding.is_empty(), "{embedding:?}");
index_scheduler
@ -647,30 +714,32 @@ fn delete_document_containing_vector() {
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
let conf = index.embedding_configs(&rtxn).unwrap();
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
snapshot!(format!("{conf:#?}"), @r###"
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
quantized: None,
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
},
user_provided: RoaringBitmap<[]>,
quantized: None,
},
]
"###);
fragments: FragmentConfigs(
[],
),
},
]
"###);
}
#[test]
@ -800,7 +869,7 @@ fn delete_embedder_with_user_provided_vectors() {
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"regenerate":false,"embeddings":[[0.0,0.0,0.0]]}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"regenerate":false,"embeddings":[[1.0,1.0,1.0]]}}}]"###);
}
{
@ -835,6 +904,6 @@ fn delete_embedder_with_user_provided_vectors() {
.collect::<Vec<_>>();
// FIXME: redaction
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[0.0,0.0,0.0]]},\"my_doggo_embedder\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]]}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0]]}}}]""###);
}
}

View File

@ -37,6 +37,7 @@ pub(crate) enum FailureLocation {
InsideCreateBatch,
InsideProcessBatch,
PanicInsideProcessBatch,
ProcessExport,
ProcessUpgrade,
AcquiringWtxn,
UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },

View File

@ -1,7 +1,9 @@
//! Utility functions on the DBs. Mainly getter and setters.
use crate::milli::progress::EmbedderStats;
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use std::sync::Arc;
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
@ -27,6 +29,7 @@ pub struct ProcessingBatch {
pub uid: BatchId,
pub details: DetailsView,
pub stats: BatchStats,
pub embedder_stats: Arc<EmbedderStats>,
pub statuses: HashSet<Status>,
pub kinds: HashSet<Kind>,
@ -48,6 +51,7 @@ impl ProcessingBatch {
uid,
details: DetailsView::default(),
stats: BatchStats::default(),
embedder_stats: Default::default(),
statuses,
kinds: HashSet::default(),
@ -146,6 +150,7 @@ impl ProcessingBatch {
progress: None,
details: self.details.clone(),
stats: self.stats.clone(),
embedder_stats: self.embedder_stats.as_ref().into(),
started_at: self.started_at,
finished_at: self.finished_at,
enqueued_at: self.enqueued_at,
@ -273,6 +278,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
K::TaskCancelation { .. }
| K::TaskDeletion { .. }
| K::DumpCreation { .. }
| K::Export { .. }
| K::UpgradeDatabase { .. }
| K::SnapshotCreation => (),
};
@ -600,6 +606,9 @@ impl crate::IndexScheduler {
Details::Dump { dump_uid: _ } => {
assert_eq!(kind.as_kind(), Kind::DumpCreation);
}
Details::Export { url: _, api_key: _, payload_size: _, indexes: _ } => {
assert_eq!(kind.as_kind(), Kind::Export);
}
Details::UpgradeDatabase { from: _, to: _ } => {
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
}

View File

@ -15,7 +15,7 @@ license.workspace = true
serde_json = "1.0"
[dev-dependencies]
criterion = "0.5.1"
criterion = "0.6.0"
[[bench]]
name = "depth"

View File

@ -14,4 +14,6 @@ license.workspace = true
# fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] }
md5 = "0.7.0"
once_cell = "1.20"
once_cell = "1.21"
regex-lite = "0.1.6"
uuid = { version = "1.17.0", features = ["v4"] }

View File

@ -4,9 +4,16 @@ use std::path::{Path, PathBuf};
use std::sync::Mutex;
pub use insta;
use insta::internals::{Content, ContentPath};
use once_cell::sync::Lazy;
use regex_lite::Regex;
static SNAPSHOT_NAMES: Lazy<Mutex<HashMap<PathBuf, usize>>> = Lazy::new(Mutex::default);
/// A regex to match UUIDs in messages, specifically looking for the UUID v4 format
static UUID_IN_MESSAGE_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
.unwrap()
});
/// Return the md5 hash of the given string
pub fn hash_snapshot(snap: &str) -> String {
@ -26,6 +33,39 @@ pub fn default_snapshot_settings_for_test<'a>(
let filename = path.file_name().unwrap().to_str().unwrap();
settings.set_omit_expression(true);
fn uuid_in_message_redaction(content: Content, _content_path: ContentPath) -> Content {
match &content {
Content::String(s) => {
let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]");
Content::String(uuid_replaced.to_string())
}
_ => content,
}
}
fn uuid_in_json_key_redaction(content: Content, _content_path: ContentPath) -> Content {
match content {
Content::Map(map) => {
let new_map = map
.iter()
.map(|(key, value)| match key {
Content::String(s) => {
let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]");
(Content::String(uuid_replaced.to_string()), value.clone())
}
_ => (key.clone(), value.clone()),
})
.collect();
Content::Map(new_map)
}
_ => content,
}
}
settings.add_dynamic_redaction(".**.message", uuid_in_message_redaction);
settings.add_dynamic_redaction(".**.indexUid", uuid_in_message_redaction);
settings.add_dynamic_redaction(".**.facetsByIndex", uuid_in_json_key_redaction);
let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name);
let test_name = test_name.rsplit("::").next().unwrap().to_owned();
@ -232,6 +272,9 @@ macro_rules! json_string {
#[cfg(test)]
mod tests {
use crate as meili_snap;
use crate::UUID_IN_MESSAGE_RE;
use uuid::Uuid;
#[test]
fn snap() {
snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820");
@ -279,4 +322,14 @@ mod tests {
// snapshot_hash!("", name: "", @"d41d8cd98f00b204e9800998ecf8427e");
}
}
#[test]
fn uuid_in_message_regex() {
let uuid1 = Uuid::new_v4();
let uuid2 = Uuid::new_v4();
let uuid3 = Uuid::new_v4();
let to_replace = format!("1 {uuid1} 2 {uuid2} 3 {uuid3} 4");
let replaced = UUID_IN_MESSAGE_RE.replace_all(to_replace.as_str(), "[uuid]");
assert_eq!(replaced, "1 [uuid] 2 [uuid] 3 [uuid] 4");
}
}

View File

@ -17,10 +17,10 @@ hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
sha2 = "0.10.8"
thiserror = "2.0.9"
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.11.0", features = ["serde", "v4"] }
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
sha2 = "0.10.9"
thiserror = "2.0.12"
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.17.0", features = ["serde", "v4"] }

View File

@ -158,7 +158,7 @@ impl AuthController {
self.store.delete_all_keys()
}
/// Delete all the keys in the DB.
/// Insert a key directly into the store.
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
self.store.put_api_key(key)?;
Ok(())
@ -351,6 +351,7 @@ pub struct IndexSearchRules {
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
store.put_api_key(Key::default_chat())?;
store.put_api_key(Key::default_read_only_admin())?;
store.put_api_key(Key::default_admin())?;
store.put_api_key(Key::default_search())?;

View File

@ -88,7 +88,13 @@ impl HeedAuthStore {
let mut actions = HashSet::new();
for action in &key.actions {
match action {
Action::All => actions.extend(enum_iterator::all::<Action>()),
Action::All => {
actions.extend(enum_iterator::all::<Action>());
actions.remove(&Action::AllGet);
}
Action::AllGet => {
actions.extend(enum_iterator::all::<Action>().filter(|a| a.is_read()))
}
Action::DocumentsAll => {
actions.extend(
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]

View File

@ -11,37 +11,38 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.9.0", default-features = false }
anyhow = "1.0.95"
bumpalo = "3.16.0"
actix-web = { version = "4.11.0", default-features = false }
anyhow = "1.0.98"
bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
convert_case = "0.6.0"
byte-unit = { version = "5.1.6", features = ["serde"] }
convert_case = "0.8.0"
csv = "1.3.1"
deserr = { version = "0.6.3", features = ["actix-web"] }
either = { version = "1.13.0", features = ["serde"] }
either = { version = "1.15.0", features = ["serde"] }
enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.0.35"
flate2 = "1.1.2"
fst = "0.4.7"
memmap2 = "0.9.5"
milli = { path = "../milli" }
roaring = { version = "0.10.10", features = ["serde"] }
rustc-hash = "2.1.0"
serde = { version = "1.0.217", features = ["derive"] }
roaring = { version = "0.10.12", features = ["serde"] }
rustc-hash = "2.1.1"
serde = { version = "1.0.219", features = ["derive"] }
serde-cs = "0.2.4"
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tar = "0.4.43"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tar = "0.4.44"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = "1.43"
utoipa = { version = "5.3.1", features = ["macros"] }
uuid = { version = "1.11.0", features = ["serde", "v4"] }
tokio = "1.45"
utoipa = { version = "5.4.0", features = ["macros"] }
uuid = { version = "1.17.0", features = ["serde", "v4"] }
[dev-dependencies]
# fixed version due to format breakages in v1.40

View File

@ -3,7 +3,7 @@ use serde::Serialize;
use time::{Duration, OffsetDateTime};
use utoipa::ToSchema;
use crate::batches::{Batch, BatchId, BatchStats};
use crate::batches::{Batch, BatchId, BatchStats, EmbedderStatsView};
use crate::task_view::DetailsView;
use crate::tasks::serialize_duration;
@ -14,7 +14,7 @@ pub struct BatchView {
pub uid: BatchId,
pub progress: Option<ProgressView>,
pub details: DetailsView,
pub stats: BatchStats,
pub stats: BatchStatsView,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339", default)]
@ -25,13 +25,26 @@ pub struct BatchView {
pub batch_strategy: String,
}
#[derive(Debug, Clone, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct BatchStatsView {
#[serde(flatten)]
pub stats: BatchStats,
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
pub embedder_requests: EmbedderStatsView,
}
impl BatchView {
pub fn from_batch(batch: &Batch) -> Self {
Self {
uid: batch.uid,
progress: batch.progress.clone(),
details: batch.details.clone(),
stats: batch.stats.clone(),
stats: BatchStatsView {
stats: batch.stats.clone(),
embedder_requests: batch.embedder_stats.clone(),
},
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
started_at: batch.started_at,
finished_at: batch.finished_at,

View File

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use milli::progress::ProgressView;
use milli::progress::{EmbedderStats, ProgressView};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use utoipa::ToSchema;
@ -19,6 +19,8 @@ pub struct Batch {
pub progress: Option<ProgressView>,
pub details: DetailsView,
pub stats: BatchStats,
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
pub embedder_stats: EmbedderStatsView,
#[serde(with = "time::serde::rfc3339")]
pub started_at: OffsetDateTime,
@ -43,6 +45,7 @@ impl PartialEq for Batch {
progress,
details,
stats,
embedder_stats,
started_at,
finished_at,
enqueued_at,
@ -53,6 +56,7 @@ impl PartialEq for Batch {
&& progress.is_none() == other.progress.is_none()
&& details == &other.details
&& stats == &other.stats
&& embedder_stats == &other.embedder_stats
&& started_at == &other.started_at
&& finished_at == &other.finished_at
&& enqueued_at == &other.enqueued_at
@ -83,3 +87,30 @@ pub struct BatchStats {
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
}
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct EmbedderStatsView {
pub total: usize,
pub failed: usize,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub last_error: Option<String>,
}
impl From<&EmbedderStats> for EmbedderStatsView {
fn from(stats: &EmbedderStats) -> Self {
let errors = stats.errors.read().unwrap_or_else(|p| p.into_inner());
Self {
total: stats.total_count.load(std::sync::atomic::Ordering::Relaxed),
failed: errors.1 as usize,
last_error: errors.0.clone(),
}
}
}
impl EmbedderStatsView {
pub fn skip_serializing(&self) -> bool {
self.total == 0 && self.failed == 0 && self.last_error.is_none()
}
}

View File

@ -237,6 +237,7 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
@ -301,6 +302,7 @@ InvalidFacetSearchQuery , InvalidRequest , BAD_REQU
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
InvalidSearchMedia , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
@ -308,6 +310,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
@ -389,6 +392,13 @@ InvalidDocumentEditionContext , InvalidRequest , BAD_REQU
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ;
// Export
InvalidExportUrl , InvalidRequest , BAD_REQUEST ;
InvalidExportApiKey , InvalidRequest , BAD_REQUEST ;
InvalidExportPayloadSize , InvalidRequest , BAD_REQUEST ;
InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ;
InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ;
InvalidExportIndexOverrideSettings , InvalidRequest , BAD_REQUEST ;
// Experimental features - Chat Completions
UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ;
UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;
@ -406,6 +416,7 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
}
@ -457,6 +468,7 @@ impl ErrorCode for milli::Error {
| UserError::MissingSourceForNested { .. }
| UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyFragments(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
@ -466,7 +478,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort,
UserError::InvalidSearchableAttribute { .. } => {
Code::InvalidSearchAttributesToSearchOn
}
@ -482,7 +495,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::SortError { search: true, .. } => Code::InvalidSearchSort,
UserError::SortError { search: false, .. } => Code::InvalidDocumentSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance
}

View File

@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
use crate::error::{Code, ResponseError};
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
"Search the database for relevant JSON documents using an optional query.";
"Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}";
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
@ -21,6 +22,7 @@ pub struct RuntimeTogglableFeatures {
pub get_task_documents_route: bool,
pub composite_embedders: bool,
pub chat_completions: bool,
pub multimodal: bool,
}
#[derive(Default, Debug, Clone, Copy)]
@ -114,7 +116,6 @@ pub enum ChatCompletionSource {
OpenAi,
AzureOpenAi,
Mistral,
Gemini,
VLlm,
}
@ -134,7 +135,6 @@ impl ChatCompletionSource {
AzureOpenAi if Self::old_openai_model(model) => System,
AzureOpenAi => Developer,
Mistral => System,
Gemini => System,
VLlm => System,
}
}
@ -154,7 +154,6 @@ impl ChatCompletionSource {
match self {
OpenAi => Some("https://api.openai.com/v1/"),
Mistral => Some("https://api.mistral.ai/v1/"),
Gemini => Some("https://generativelanguage.googleapis.com/v1beta/openai/"),
AzureOpenAi | VLlm => None,
}
}
@ -166,6 +165,7 @@ pub struct ChatCompletionPrompts {
pub system: String,
pub search_description: String,
pub search_q_param: String,
pub search_filter_param: String,
pub search_index_uid_param: String,
}
@ -175,6 +175,7 @@ impl Default for ChatCompletionPrompts {
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
}
}

View File

@ -12,7 +12,7 @@ use crate::index_uid::{IndexUid, IndexUidFormatError};
/// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long and optionally ending with a *.
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)]
pub struct IndexUidPattern(String);

View File

@ -144,6 +144,21 @@ impl Key {
}
}
pub fn default_read_only_admin() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
Self {
name: Some("Default Read-Only Admin API Key".to_string()),
description: Some("Use it to read information across the whole database. Caution! Do not expose this key on a public frontend".to_string()),
uid,
actions: vec![Action::AllGet, Action::KeysGet],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: now,
updated_at: now,
}
}
pub fn default_search() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
@ -218,6 +233,9 @@ pub enum Action {
#[serde(rename = "*")]
#[deserr(rename = "*")]
All = 0,
#[serde(rename = "*.get")]
#[deserr(rename = "*.get")]
AllGet,
#[serde(rename = "search")]
#[deserr(rename = "search")]
Search,
@ -317,6 +335,9 @@ pub enum Action {
#[serde(rename = "experimental.update")]
#[deserr(rename = "experimental.update")]
ExperimentalFeaturesUpdate,
#[serde(rename = "export")]
#[deserr(rename = "export")]
Export,
#[serde(rename = "network.get")]
#[deserr(rename = "network.get")]
NetworkGet,
@ -396,6 +417,52 @@ impl Action {
}
}
/// Whether the action should be included in [Action::AllRead].
pub fn is_read(&self) -> bool {
use Action::*;
// It's using an exhaustive match to force the addition of new actions.
match self {
// Any action that expands to others must return false, as it wouldn't be able to expand recursively.
All | AllGet | DocumentsAll | IndexesAll | ChatsAll | TasksAll | SettingsAll
| StatsAll | MetricsAll | DumpsAll | SnapshotsAll | ChatsSettingsAll => false,
Search => true,
DocumentsAdd => false,
DocumentsGet => true,
DocumentsDelete => false,
Export => true,
IndexesAdd => false,
IndexesGet => true,
IndexesUpdate => false,
IndexesDelete => false,
IndexesSwap => false,
TasksCancel => false,
TasksDelete => false,
TasksGet => true,
SettingsGet => true,
SettingsUpdate => false,
StatsGet => true,
MetricsGet => true,
DumpsCreate => false,
SnapshotsCreate => false,
Version => true,
KeysAdd => false,
KeysGet => false, // Disabled in order to prevent privilege escalation
KeysUpdate => false,
KeysDelete => false,
ExperimentalFeaturesGet => true,
ExperimentalFeaturesUpdate => false,
NetworkGet => true,
NetworkUpdate => false,
ChatCompletions => false, // Disabled because it might trigger generation of new chats
ChatsGet => true,
ChatsDelete => false,
ChatsSettingsGet => true,
ChatsSettingsUpdate => false,
}
}
pub const fn repr(&self) -> u8 {
*self as u8
}
@ -405,6 +472,7 @@ pub mod actions {
use super::Action::*;
pub(crate) const ALL: u8 = All.repr();
pub const ALL_GET: u8 = AllGet.repr();
pub const SEARCH: u8 = Search.repr();
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();
@ -438,6 +506,8 @@ pub mod actions {
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
pub const EXPORT: u8 = Export.repr();
pub const NETWORK_GET: u8 = NetworkGet.repr();
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();

View File

@ -18,7 +18,7 @@ pub mod versioning;
pub use milli::{heed, Index};
use uuid::Uuid;
pub use versioning::VERSION_FILE_NAME;
pub use {milli, serde_cs};
pub use {byte_unit, milli, serde_cs};
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type InstanceUid = Uuid;

View File

@ -9,10 +9,11 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::disabled_typos_terms::DisabledTyposTerms;
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
use milli::index::PrefixSearch;
use milli::proximity::ProximityPrecision;
pub use milli::update::ChatSettings;
use milli::update::Setting;
use milli::vector::db::IndexEmbeddingConfig;
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
use utoipa::ToSchema;
@ -500,8 +501,11 @@ impl Settings<Unchecked> {
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
for (name, config) in configs.iter_mut() {
let config_to_check = std::mem::take(config);
let checked_config =
milli::update::validate_embedding_settings(config_to_check.inner, name)?;
let checked_config = milli::update::validate_embedding_settings(
config_to_check.inner,
name,
milli::vector::settings::EmbeddingValidationContext::SettingsPartialUpdate,
)?;
*config = SettingEmbeddingSettings { inner: checked_config };
}
self.embedders = Setting::Set(configs);
@ -697,7 +701,7 @@ pub fn apply_settings_to_builder(
match typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
Setting::Set(val) => builder.set_autorize_typos(val),
Setting::Set(val) => builder.set_authorize_typos(val),
Setting::Reset => builder.reset_authorize_typos(),
Setting::NotSet => (),
}
@ -751,6 +755,7 @@ pub fn apply_settings_to_builder(
builder.reset_min_word_len_two_typos();
builder.reset_exact_words();
builder.reset_exact_attributes();
builder.reset_disable_on_numbers();
}
Setting::NotSet => (),
}
@ -910,6 +915,7 @@ pub fn settings(
};
let embedders: BTreeMap<_, _> = index
.embedding_configs()
.embedding_configs(rtxn)?
.into_iter()
.map(|IndexEmbeddingConfig { name, config, .. }| {
@ -968,6 +974,7 @@ pub fn settings(
if let SecretPolicy::HideSecrets = secret_policy {
settings.hide_secrets()
}
Ok(settings)
}

View File

@ -1,3 +1,6 @@
use std::collections::BTreeMap;
use byte_unit::UnitType;
use milli::Object;
use serde::{Deserialize, Serialize};
use time::{Duration, OffsetDateTime};
@ -6,7 +9,9 @@ use utoipa::ToSchema;
use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
use crate::tasks::{
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
};
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
@ -118,6 +123,15 @@ pub struct DetailsView {
pub upgrade_from: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub upgrade_to: Option<String>,
// exporting
#[serde(skip_serializing_if = "Option::is_none")]
pub url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub api_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub payload_size: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexes: Option<BTreeMap<String, DetailsExportIndexSettings>>,
}
impl DetailsView {
@ -238,6 +252,34 @@ impl DetailsView {
Some(left)
}
},
url: match (self.url.clone(), other.url.clone()) {
(None, None) => None,
(None, Some(url)) | (Some(url), None) => Some(url),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
api_key: match (self.api_key.clone(), other.api_key.clone()) {
(None, None) => None,
(None, Some(key)) | (Some(key), None) => Some(key),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
payload_size: match (self.payload_size.clone(), other.payload_size.clone()) {
(None, None) => None,
(None, Some(size)) | (Some(size), None) => Some(size),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
indexes: match (self.indexes.clone(), other.indexes.clone()) {
(None, None) => None,
(None, Some(indexes)) | (Some(indexes), None) => Some(indexes),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
// We want the earliest version
upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) {
(None, None) => None,
@ -327,6 +369,22 @@ impl From<Details> for DetailsView {
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
Details::Export { url, api_key, payload_size, indexes } => DetailsView {
url: Some(url),
api_key: api_key.map(|mut api_key| {
hide_secret(&mut api_key);
api_key
}),
payload_size: payload_size
.map(|ps| ps.get_appropriate_unit(UnitType::Both).to_string()),
indexes: Some(
indexes
.into_iter()
.map(|(pattern, settings)| (pattern.to_string(), settings))
.collect(),
),
..Default::default()
},
Details::UpgradeDatabase { from, to } => DetailsView {
upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)),
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
@ -335,3 +393,21 @@ impl From<Details> for DetailsView {
}
}
}
// We definitely need to factorize the code to hide the secret key
fn hide_secret(secret: &mut String) {
match secret.len() {
x if x < 10 => {
secret.replace_range(.., "XXX...");
}
x if x < 20 => {
secret.replace_range(2.., "XXXX...");
}
x if x < 30 => {
secret.replace_range(3.., "XXXXX...");
}
_x => {
secret.replace_range(5.., "XXXXXX...");
}
}
}

View File

@ -1,19 +1,22 @@
use core::fmt;
use std::collections::HashSet;
use std::collections::{BTreeMap, HashSet};
use std::fmt::{Display, Write};
use std::str::FromStr;
use byte_unit::Byte;
use enum_iterator::Sequence;
use milli::update::IndexDocumentsMethod;
use milli::Object;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize, Serializer};
use serde_json::Value;
use time::{Duration, OffsetDateTime};
use utoipa::ToSchema;
use utoipa::{schema, ToSchema};
use uuid::Uuid;
use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::index_uid_pattern::IndexUidPattern;
use crate::keys::Key;
use crate::settings::{Settings, Unchecked};
use crate::{versioning, InstanceUid};
@ -50,6 +53,7 @@ impl Task {
| SnapshotCreation
| TaskCancelation { .. }
| TaskDeletion { .. }
| Export { .. }
| UpgradeDatabase { .. }
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
@ -86,6 +90,7 @@ impl Task {
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::SnapshotCreation
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. } => None,
}
}
@ -108,11 +113,11 @@ pub enum KindWithContent {
},
DocumentDeletionByFilter {
index_uid: String,
filter_expr: serde_json::Value,
filter_expr: Value,
},
DocumentEdition {
index_uid: String,
filter_expr: Option<serde_json::Value>,
filter_expr: Option<Value>,
context: Option<milli::Object>,
function: String,
},
@ -152,6 +157,12 @@ pub enum KindWithContent {
instance_uid: Option<InstanceUid>,
},
SnapshotCreation,
Export {
url: String,
api_key: Option<String>,
payload_size: Option<Byte>,
indexes: BTreeMap<IndexUidPattern, ExportIndexSettings>,
},
UpgradeDatabase {
from: (u32, u32, u32),
},
@ -163,6 +174,13 @@ pub struct IndexSwap {
pub indexes: (String, String),
}
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ExportIndexSettings {
pub filter: Option<Value>,
pub override_settings: bool,
}
impl KindWithContent {
pub fn as_kind(&self) -> Kind {
match self {
@ -180,6 +198,7 @@ impl KindWithContent {
KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion,
KindWithContent::DumpCreation { .. } => Kind::DumpCreation,
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
KindWithContent::Export { .. } => Kind::Export,
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
}
}
@ -192,6 +211,7 @@ impl KindWithContent {
| SnapshotCreation
| TaskCancelation { .. }
| TaskDeletion { .. }
| Export { .. }
| UpgradeDatabase { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@ -269,6 +289,14 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, payload_size, indexes } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
payload_size: *payload_size,
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: (from.0, from.1, from.2),
to: (
@ -335,6 +363,14 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, payload_size, indexes } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
payload_size: *payload_size,
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
@ -383,6 +419,14 @@ impl From<&KindWithContent> for Option<Details> {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, payload_size, indexes } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
payload_size: *payload_size,
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
@ -499,6 +543,7 @@ pub enum Kind {
TaskDeletion,
DumpCreation,
SnapshotCreation,
Export,
UpgradeDatabase,
}
@ -516,6 +561,7 @@ impl Kind {
| Kind::TaskCancelation
| Kind::TaskDeletion
| Kind::DumpCreation
| Kind::Export
| Kind::UpgradeDatabase
| Kind::SnapshotCreation => false,
}
@ -536,6 +582,7 @@ impl Display for Kind {
Kind::TaskDeletion => write!(f, "taskDeletion"),
Kind::DumpCreation => write!(f, "dumpCreation"),
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
Kind::Export => write!(f, "export"),
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
}
}
@ -568,6 +615,8 @@ impl FromStr for Kind {
Ok(Kind::DumpCreation)
} else if kind.eq_ignore_ascii_case("snapshotCreation") {
Ok(Kind::SnapshotCreation)
} else if kind.eq_ignore_ascii_case("export") {
Ok(Kind::Export)
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
Ok(Kind::UpgradeDatabase)
} else {
@ -643,12 +692,33 @@ pub enum Details {
IndexSwap {
swaps: Vec<IndexSwap>,
},
Export {
url: String,
api_key: Option<String>,
payload_size: Option<Byte>,
indexes: BTreeMap<IndexUidPattern, DetailsExportIndexSettings>,
},
UpgradeDatabase {
from: (u32, u32, u32),
to: (u32, u32, u32),
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[schema(rename_all = "camelCase")]
pub struct DetailsExportIndexSettings {
#[serde(flatten)]
pub settings: ExportIndexSettings,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_documents: Option<u64>,
}
impl From<ExportIndexSettings> for DetailsExportIndexSettings {
fn from(settings: ExportIndexSettings) -> Self {
DetailsExportIndexSettings { settings, matched_documents: None }
}
}
impl Details {
pub fn to_failed(&self) -> Self {
let mut details = self.clone();
@ -667,6 +737,7 @@ impl Details {
Self::SettingsUpdate { .. }
| Self::IndexInfo { .. }
| Self::Dump { .. }
| Self::Export { .. }
| Self::UpgradeDatabase { .. }
| Self::IndexSwap { .. } => (),
}

View File

@ -13,51 +13,50 @@ license.workspace = true
default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.9.0", default-features = false, features = [
actix-cors = "0.7.1"
actix-http = { version = "3.11.0", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls-0_23",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.9.0", default-features = false, features = [
actix-web = { version = "4.11.0", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls-0_23",
] }
anyhow = { version = "1.0.95", features = ["backtrace"] }
async-trait = "0.1.85"
bstr = "1.11.3"
anyhow = { version = "1.0.98", features = ["backtrace"] }
bstr = "1.12.0"
byte-unit = { version = "5.1.6", features = ["serde"] }
bytes = "1.9.0"
bumpalo = "3.16.0"
clap = { version = "4.5.24", features = ["derive", "env"] }
bytes = "1.10.1"
bumpalo = "3.18.1"
clap = { version = "4.5.40", features = ["derive", "env"] }
crossbeam-channel = "0.5.15"
deserr = { version = "0.6.3", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.13.0"
either = "1.15.0"
file-store = { path = "../file-store" }
flate2 = "1.0.35"
flate2 = "1.1.2"
fst = "0.4.7"
futures = "0.3.31"
futures-util = "0.3.31"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.7.0", features = ["serde"] }
is-terminal = "0.4.13"
indexmap = { version = "2.9.0", features = ["serde"] }
is-terminal = "0.4.16"
itertools = "0.14.0"
jsonwebtoken = "9.3.0"
jsonwebtoken = "9.3.1"
lazy_static = "1.5.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false }
mimalloc = { version = "0.1.47", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
num_cpus = "1.17.0"
obkv = "0.3.0"
once_cell = "1.20.2"
ordered-float = "4.6.0"
parking_lot = "0.12.3"
once_cell = "1.21.3"
ordered-float = "5.0.0"
parking_lot = "0.12.4"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.16"
platform-dirs = "0.3.0"
@ -65,44 +64,44 @@ prometheus = { version = "0.14.0", features = ["process"] }
rand = "0.8.5"
rayon = "1.10.0"
regex = "1.11.1"
reqwest = { version = "0.12.12", features = [
reqwest = { version = "0.12.20", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = { version = "0.23.20", features = ["ring"], default-features = false }
rustls-pki-types = { version = "1.10.1", features = ["alloc"] }
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
rustls-pemfile = "2.2.0"
segment = { version = "0.2.5" }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
sha2 = "0.10.8"
segment = { version = "0.2.6" }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
sha2 = "0.10.9"
siphasher = "1.0.1"
slice-group-by = "0.3.1"
static-files = { version = "0.2.4", optional = true }
sysinfo = "0.33.1"
tar = "0.4.43"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
static-files = { version = "0.2.5", optional = true }
sysinfo = "0.35.2"
tar = "0.4.44"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = { version = "1.43.1", features = ["full"] }
toml = "0.8.19"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
tokio = { version = "1.45.1", features = ["full"] }
toml = "0.8.23"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.4", features = ["serde"] }
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.15"
tracing-actix-web = "0.7.18"
build-info = { version = "1.7.0", path = "../build-info" }
roaring = "0.10.10"
roaring = "0.10.12"
mopa-maintained = "0.2.3"
utoipa = { version = "5.3.1", features = [
utoipa = { version = "5.4.0", features = [
"actix_extras",
"macros",
"non_strict_integers",
@ -118,29 +117,29 @@ actix-web-lab = { version = "0.24.1", default-features = false }
[dev-dependencies]
actix-rt = "2.10.0"
brotli = "6.0.0"
brotli = "8.0.1"
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
insta = { version = "=1.39.0", features = ["redactions"] }
manifest-dir-macros = "0.1.18"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
wiremock = "0.6.2"
wiremock = "0.6.3"
yaup = "0.3.1"
[build-dependencies]
anyhow = { version = "1.0.95", optional = true }
cargo_toml = { version = "0.21.0", optional = true }
anyhow = { version = "1.0.98", optional = true }
cargo_toml = { version = "0.22.1", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.12.12", features = [
reqwest = { version = "0.12.20", features = [
"blocking",
"rustls-tls",
], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.3.0", optional = true }
static-files = { version = "0.2.5", optional = true }
tempfile = { version = "3.20.0", optional = true }
zip = { version = "4.1.0", optional = true }
[features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
@ -170,5 +169,5 @@ german = ["meilisearch-types/german"]
turkish = ["meilisearch-types/turkish"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.20/build.zip"
sha1 = "82a7ddd7bf14bb5323c3d235d2b62892a98b6a59"

View File

@ -104,6 +104,4 @@ impl Analytics for MockAnalytics {
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
}

View File

@ -73,12 +73,6 @@ pub enum DocumentDeletionKind {
PerFilter,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
}
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
pub trait Aggregate: 'static + mopa::Any + Send {
/// The name of the event that will be sent to segment.

View File

@ -197,11 +197,13 @@ struct Infos {
experimental_max_number_of_batched_tasks: usize,
experimental_limit_batched_tasks_total_size: u64,
experimental_network: bool,
experimental_multimodal: bool,
experimental_chat_completions: bool,
experimental_get_task_documents_route: bool,
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
experimental_no_snapshot_compaction: bool,
experimental_no_edition_2024_for_settings: bool,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
@ -286,8 +288,12 @@ impl Infos {
ScheduleSnapshot::Enabled(interval) => Some(interval),
};
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
indexer_options;
let IndexerOpts {
max_indexing_memory,
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
} = indexer_options;
let RuntimeTogglableFeatures {
metrics,
@ -298,6 +304,7 @@ impl Infos {
get_task_documents_route,
composite_embedders,
chat_completions,
multimodal,
} = features;
// We're going to override every sensible information.
@ -317,6 +324,7 @@ impl Infos {
experimental_reduce_indexing_memory_usage,
experimental_network: network,
experimental_chat_completions: chat_completions,
experimental_multimodal: multimodal,
experimental_get_task_documents_route: get_task_documents_route,
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
@ -350,6 +358,7 @@ impl Infos {
ssl_require_auth,
ssl_resumption,
ssl_tickets,
experimental_no_edition_2024_for_settings,
}
}
}

View File

@ -49,7 +49,7 @@ pub enum MeilisearchHttpError {
TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(if *.0 % 1024 == 0 { UnitType::Binary } else { UnitType::Decimal }))]
PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
@ -76,8 +76,10 @@ pub enum MeilisearchHttpError {
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
#[error("Invalid request: missing `hybrid` parameter when `vector` or `media` are present.")]
MissingSearchHybrid,
#[error("Invalid request: both `media` and `vector` parameters are present.")]
MediaAndVector,
}
impl MeilisearchHttpError {
@ -111,6 +113,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal,
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
MeilisearchHttpError::MediaAndVector => Code::InvalidSearchMediaAndVector,
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
Code::InvalidMultiSearchFederationOptions
}

View File

@ -37,7 +37,10 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::{
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::versioning::{
@ -461,6 +464,7 @@ fn import_dump(
index_scheduler: &mut IndexScheduler,
auth: &mut AuthController,
) -> Result<(), anyhow::Error> {
let progress = Progress::default();
let reader = File::open(dump_path)?;
let mut dump_reader = dump::DumpReader::open(reader)?;
@ -494,19 +498,37 @@ fn import_dump(
keys.push(key);
}
// 3. Import the runtime features and network
// 3. Import the `ChatCompletionSettings`s.
for result in dump_reader.chat_completions_settings()? {
let (name, settings) = result?;
index_scheduler.put_chat_settings(&name, &settings)?;
}
// 4. Import the runtime features and network
let features = dump_reader.features()?.unwrap_or_default();
index_scheduler.put_runtime_features(features)?;
let network = dump_reader.network()?.cloned().unwrap_or_default();
index_scheduler.put_network(network)?;
let indexer_config = index_scheduler.indexer_config();
// 4.1 Use all cpus to process dump if `max_indexing_threads` not configured
let backup_config;
let base_config = index_scheduler.indexer_config();
let indexer_config = if base_config.max_threads.is_none() {
let (thread_pool, _) = default_thread_pool_and_threads();
let _config = IndexerConfig { thread_pool, ..*base_config };
backup_config = _config;
&backup_config
} else {
base_config
};
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
// try to process tasks while we're trying to import the indexes.
// 4. Import the indexes.
// 5. Import the indexes.
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
@ -519,20 +541,20 @@ fn import_dump(
let mut wtxn = index.write_txn()?;
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
// 4.1 Import the primary key if there is one.
// 5.1 Import the primary key if there is one.
if let Some(ref primary_key) = metadata.primary_key {
builder.set_primary_key(primary_key.to_string());
}
// 4.2 Import the settings.
// 5.2 Import the settings.
tracing::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
builder
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
let embedder_stats: Arc<EmbedderStats> = Default::default();
builder.execute(&|| false, &progress, embedder_stats.clone())?;
// 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
// 5.3 Import the documents.
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
@ -543,11 +565,11 @@ fn import_dump(
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 4.3.2 We feed it to the milli index.
// 5.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs(&wtxn)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
@ -560,6 +582,7 @@ fn import_dump(
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
&embedder_stats,
)?;
let builder = builder.with_embedders(embedders);
@ -574,15 +597,15 @@ fn import_dump(
index_scheduler.refresh_index_stats(&uid)?;
}
// 5. Import the queue
// 6. Import the queue
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
// 5.1. Import the batches
// 6.1. Import the batches
for ret in dump_reader.batches()? {
let batch = ret?;
index_scheduler_dump.register_dumped_batch(batch)?;
}
// 5.2. Import the tasks
// 6.2. Import the tasks
for ret in dump_reader.tasks()? {
let (task, file) = ret?;
index_scheduler_dump.register_dumped_task(task, file)?;

View File

@ -15,6 +15,33 @@ lazy_static! {
"Meilisearch number of degraded search requests"
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_SEARCH_REQUESTS: IntCounterVec = register_int_counter_vec!(
opts!(
"meilisearch_chat_search_requests",
"Meilisearch number of search requests performed by the chat route itself"
),
&["type"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_chat_prompt_tokens_usage", "Meilisearch Chat Prompt Tokens Usage"),
&["workspace", "model"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE: IntCounterVec =
register_int_counter_vec!(
opts!(
"meilisearch_chat_completion_tokens_usage",
"Meilisearch Chat Completion Tokens Usage"
),
&["workspace", "model"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_chat_total_tokens_usage", "Meilisearch Chat Total Tokens Usage"),
&["workspace", "model"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
.expect("Can't create a metric");

View File

@ -53,6 +53,8 @@ const MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE: &str = "MEILI_EXPERIMENTAL_DUMPLESS_U
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
@ -62,7 +64,7 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE";
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
@ -749,22 +751,43 @@ pub struct IndexerOpts {
#[clap(skip)]
#[serde(skip)]
pub skip_index_budget: bool,
/// Experimental no edition 2024 for settings feature. For more information,
/// see: <https://github.com/orgs/meilisearch/discussions/847>
///
/// Enables the experimental no edition 2024 for settings feature.
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
#[serde(default)]
pub experimental_no_edition_2024_for_settings: bool,
}
impl IndexerOpts {
/// Exports the values to their corresponding env vars if they are not set.
pub fn export_to_env(self) {
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
let IndexerOpts {
max_indexing_memory,
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_MEMORY,
max_indexing_memory.to_string(),
);
}
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
max_indexing_threads.0.to_string(),
);
if let Some(max_indexing_threads) = max_indexing_threads.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
max_indexing_threads.to_string(),
);
}
if experimental_no_edition_2024_for_settings {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS,
experimental_no_edition_2024_for_settings.to_string(),
);
}
}
}
@ -772,18 +795,23 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let thread_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|index| format!("indexing-thread:{index}"))
.num_threads(*other.max_indexing_threads)
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
.build()?;
Ok(Self {
thread_pool,
log_every_n: Some(DEFAULT_LOG_EVERY_N),
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
thread_pool: Some(thread_pool),
max_threads: *other.max_indexing_threads,
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
..Default::default()
experimental_no_edition_2024_for_settings: other
.experimental_no_edition_2024_for_settings,
chunk_compression_type: Default::default(),
chunk_compression_level: Default::default(),
documents_chunk_size: Default::default(),
max_nb_chunks: Default::default(),
})
}
}
@ -843,31 +871,31 @@ fn total_memory_bytes() -> Option<u64> {
}
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
pub struct MaxThreads(usize);
#[derive(Default, Debug, Clone, Copy, Deserialize, Serialize)]
pub struct MaxThreads(Option<usize>);
impl FromStr for MaxThreads {
type Err = ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
usize::from_str(s).map(Self)
}
}
impl Default for MaxThreads {
fn default() -> Self {
MaxThreads(num_cpus::get() / 2)
fn from_str(s: &str) -> Result<MaxThreads, Self::Err> {
if s.is_empty() || s == "unlimited" {
return Ok(MaxThreads::default());
}
usize::from_str(s).map(Some).map(MaxThreads)
}
}
impl fmt::Display for MaxThreads {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
match self.0 {
Some(threads) => write!(f, "{}", threads),
None => write!(f, "unlimited"),
}
}
}
impl Deref for MaxThreads {
type Target = usize;
type Target = Option<usize>;
fn deref(&self) -> &Self::Target {
&self.0

View File

@ -0,0 +1,135 @@
use std::collections::BinaryHeap;
use serde_json::{json, Value};
use crate::analytics::Aggregate;
#[derive(Default)]
pub struct ChatCompletionAggregator {
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// chat completion specific metrics
total_messages: usize,
total_streamed_requests: usize,
total_non_streamed_requests: usize,
// model usage tracking
models_used: std::collections::HashMap<String, usize>,
}
impl ChatCompletionAggregator {
pub fn from_request(model: &str, message_count: usize, is_stream: bool) -> Self {
let mut models_used = std::collections::HashMap::new();
models_used.insert(model.to_string(), 1);
Self {
total_received: 1,
total_succeeded: 0,
time_spent: BinaryHeap::new(),
total_messages: message_count,
total_streamed_requests: if is_stream { 1 } else { 0 },
total_non_streamed_requests: if is_stream { 0 } else { 1 },
models_used,
}
}
pub fn succeed(&mut self, time_spent: std::time::Duration) {
self.total_succeeded += 1;
self.time_spent.push(time_spent.as_millis() as usize);
}
}
impl Aggregate for ChatCompletionAggregator {
fn event_name(&self) -> &'static str {
"Chat Completion POST"
}
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
let Self {
total_received,
total_succeeded,
mut time_spent,
total_messages,
total_streamed_requests,
total_non_streamed_requests,
models_used,
..
} = *new;
// Aggregate time spent
self.time_spent.append(&mut time_spent);
// Aggregate counters
self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.total_messages = self.total_messages.saturating_add(total_messages);
self.total_streamed_requests =
self.total_streamed_requests.saturating_add(total_streamed_requests);
self.total_non_streamed_requests =
self.total_non_streamed_requests.saturating_add(total_non_streamed_requests);
// Aggregate model usage
for (model, count) in models_used {
*self.models_used.entry(model).or_insert(0) += count;
}
self
}
fn into_event(self: Box<Self>) -> Value {
let Self {
total_received,
total_succeeded,
time_spent,
total_messages,
total_streamed_requests,
total_non_streamed_requests,
models_used,
..
} = *self;
// Compute time statistics
let time_spent: Vec<usize> = time_spent.into_sorted_vec();
let (max_time, min_time, avg_time) = if time_spent.is_empty() {
(0, 0, 0)
} else {
let max_time = time_spent.last().unwrap_or(&0);
let min_time = time_spent.first().unwrap_or(&0);
let sum: usize = time_spent.iter().sum();
let avg_time = sum / time_spent.len();
(*max_time, *min_time, avg_time)
};
// Compute average messages per request
let avg_messages_per_request =
if total_received > 0 { total_messages as f64 / total_received as f64 } else { 0.0 };
// Compute streaming vs non-streaming proportions
let streaming_ratio = if total_received > 0 {
total_streamed_requests as f64 / total_received as f64
} else {
0.0
};
json!({
"total_received": total_received,
"total_succeeded": total_succeeded,
"time_spent": {
"max": max_time,
"min": min_time,
"avg": avg_time
},
"total_messages": total_messages,
"avg_messages_per_request": avg_messages_per_request,
"total_streamed_requests": total_streamed_requests,
"total_non_streamed_requests": total_non_streamed_requests,
"streaming_ratio": streaming_ratio,
"models_used": models_used,
})
}
}

View File

@ -13,9 +13,9 @@ use async_openai::types::{
ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage,
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType,
CreateChatCompletionRequest, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
FunctionCallStream, FunctionObjectArgs,
ChatCompletionStreamOptions, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs,
ChatCompletionToolType, CreateChatCompletionRequest, CreateChatCompletionStreamResponse,
FinishReason, FunctionCall, FunctionCallStream, FunctionObjectArgs,
};
use async_openai::Client;
use bumpalo::Bump;
@ -27,15 +27,17 @@ use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts,
ChatCompletionSource as DbChatCompletionSource, SystemRole,
};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget};
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
use meilisearch_types::{Document, Index};
use serde::Deserialize;
use serde_json::json;
use tokio::runtime::Handle;
use tokio::sync::mpsc::error::SendError;
use super::chat_completion_analytics::ChatCompletionAggregator;
use super::config::Config;
use super::errors::{MistralError, OpenAiOutsideError, StreamErrorEvent};
use super::utils::format_documents;
@ -43,10 +45,15 @@ use super::{
ChatsParam, MEILI_APPEND_CONVERSATION_MESSAGE_NAME, MEILI_SEARCH_IN_INDEX_FUNCTION_NAME,
MEILI_SEARCH_PROGRESS_NAME, MEILI_SEARCH_SOURCES_NAME,
};
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::metrics::{
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE, MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE,
MEILISEARCH_CHAT_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE,
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
};
use crate::routes::chats::utils::SseEventSender;
use crate::routes::indexes::search::search_kind;
use crate::search::{add_search_rules, prepare_search, search_from_kind, SearchQuery};
@ -64,6 +71,7 @@ async fn chat(
req: HttpRequest,
search_queue: web::Data<SearchQueue>,
web::Json(chat_completion): web::Json<CreateChatCompletionRequest>,
analytics: web::Data<Analytics>,
) -> impl Responder {
let ChatsParam { workspace_uid } = chats_param.into_inner();
@ -76,6 +84,7 @@ async fn chat(
&workspace_uid,
req,
chat_completion,
analytics,
)
.await,
)
@ -88,6 +97,7 @@ async fn chat(
&workspace_uid,
req,
chat_completion,
analytics,
)
.await,
)
@ -160,6 +170,7 @@ fn setup_search_tool(
let mut index_uids = Vec::new();
let mut function_description = prompts.search_description.clone();
let mut filter_description = prompts.search_filter_param.clone();
index_scheduler.try_for_each_index::<_, ()>(|name, index| {
// Make sure to skip unauthorized indexes
if !filters.is_index_authorized(name) {
@ -171,16 +182,22 @@ fn setup_search_tool(
let index_description = chat_config.description;
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
index_uids.push(name.to_string());
let facet_distributions = format_facet_distributions(index, &rtxn, 10).unwrap(); // TODO do not unwrap
let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index");
let _ = writeln!(&mut filter_description, "{facet_distributions}");
Ok(())
})?;
tracing::debug!("LLM function description: {function_description}");
tracing::debug!("LLM filter description: {filter_description}");
let tool = ChatCompletionToolArgs::default()
.r#type(ChatCompletionToolType::Function)
.function(
FunctionObjectArgs::default()
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
.description(&function_description)
.description(function_description)
.parameters(json!({
"type": "object",
"properties": {
@ -194,9 +211,13 @@ fn setup_search_tool(
// "type": ["string", "null"],
"type": "string",
"description": prompts.search_q_param,
},
"filter": {
"type": "string",
"description": filter_description,
}
},
"required": ["index_uid", "q"],
"required": ["index_uid", "q", "filter"],
"additionalProperties": false,
}))
.strict(true)
@ -238,11 +259,19 @@ async fn process_search_request(
auth_token: &str,
index_uid: String,
q: Option<String>,
filter: Option<String>,
) -> Result<(Index, Vec<Document>, String), ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.static_read_txn()?;
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) };
let mut query = SearchQuery {
q,
filter: filter.map(serde_json::Value::from),
..SearchQuery::from(search_parameters)
};
tracing::debug!("LLM query: {:?}", query);
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
auth_ctrl,
auth_token,
@ -271,17 +300,26 @@ async fn process_search_request(
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
search_from_kind(index_uid, search_kind, search)
.map(|(search_results, _)| (rtxn, search_results))
.map_err(ResponseError::from)
match search_from_kind(index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
Err(MeilisearchHttpError::Milli {
error: meilisearch_types::milli::Error::UserError(user_error),
index_name: _,
}) => Ok((rtxn, Err(user_error))),
Err(err) => Err(ResponseError::from(err)),
}
})
.await;
permit.drop().await;
let output = output?;
let output = match output? {
Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)),
Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())),
Err(err) => Err(err),
};
let mut documents = Vec::new();
if let Ok((ref rtxn, ref search_result)) = output {
// aggregate.succeed(search_result);
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
if search_result.degraded {
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
}
@ -315,9 +353,18 @@ async fn non_streamed_chat(
workspace_uid: &str,
req: HttpRequest,
chat_completion: CreateChatCompletionRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
// Create analytics aggregator
let aggregate = ChatCompletionAggregator::from_request(
&chat_completion.model,
chat_completion.messages.len(),
false, // non_streamed_chat is not streaming
);
let start_time = std::time::Instant::now();
if let Some(n) = chat_completion.n.filter(|&n| n != 1) {
return Err(ResponseError::from_msg(
format!("You tried to specify n = {n} but only single choices are supported (n = 1)."),
@ -377,16 +424,19 @@ async fn non_streamed_chat(
for call in meili_calls {
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
)
.await
.map_err(|e| e.to_string()),
Ok(SearchInIndexParameters { index_uid, q, filter }) => {
process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
.map_err(|e| e.to_string())
}
Err(err) => Err(err.to_string()),
};
@ -414,6 +464,11 @@ async fn non_streamed_chat(
}
}
// Record success in analytics
let mut aggregate = aggregate;
aggregate.succeed(start_time.elapsed());
analytics.publish(aggregate, &req);
Ok(HttpResponse::Ok().json(response))
}
@ -424,6 +479,7 @@ async fn streamed_chat(
workspace_uid: &str,
req: HttpRequest,
mut chat_completion: CreateChatCompletionRequest,
analytics: web::Data<Analytics>,
) -> Result<impl Responder, ResponseError> {
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
let filters = index_scheduler.filters();
@ -445,6 +501,14 @@ async fn streamed_chat(
}
};
// Create analytics aggregator
let mut aggregate = ChatCompletionAggregator::from_request(
&chat_completion.model,
chat_completion.messages.len(),
true, // streamed_chat is always streaming
);
let start_time = std::time::Instant::now();
let config = Config::new(&chat_settings);
let auth_token = extract_token_from_request(&req)?.unwrap().to_string();
let system_role = chat_settings.source.system_role(&chat_completion.model);
@ -460,6 +524,7 @@ async fn streamed_chat(
let (tx, rx) = tokio::sync::mpsc::channel(10);
let tx = SseEventSender::new(tx);
let workspace_uid = workspace_uid.to_string();
let _join_handle = Handle::current().spawn(async move {
let client = Client::with_config(config.clone());
let mut global_tool_calls = HashMap::<u32, Call>::new();
@ -469,6 +534,7 @@ async fn streamed_chat(
let output = run_conversation(
&index_scheduler,
&auth_ctrl,
&workspace_uid,
&search_queue,
&auth_token,
&client,
@ -490,6 +556,10 @@ async fn streamed_chat(
let _ = tx.stop().await;
});
// Record success in analytics after the stream is set up
aggregate.succeed(start_time.elapsed());
analytics.publish(aggregate, &req);
Ok(Sse::from_infallible_receiver(rx).with_retry_duration(Duration::from_secs(10)))
}
@ -502,6 +572,7 @@ async fn run_conversation<C: async_openai::config::Config>(
Data<IndexScheduler>,
>,
auth_ctrl: &web::Data<AuthController>,
workspace_uid: &str,
search_queue: &web::Data<SearchQueue>,
auth_token: &str,
client: &Client<C>,
@ -511,13 +582,34 @@ async fn run_conversation<C: async_openai::config::Config>(
global_tool_calls: &mut HashMap<u32, Call>,
function_support: FunctionSupport,
) -> Result<ControlFlow<Option<FinishReason>, ()>, SendError<Event>> {
use DbChatCompletionSource::*;
let mut finish_reason = None;
chat_completion.stream_options = match source {
OpenAi | AzureOpenAi => Some(ChatCompletionStreamOptions { include_usage: true }),
Mistral | VLlm => None,
};
// safety: unwrap: can only happens if `stream` was set to `false`
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
while let Some(result) = response.next().await {
match result {
Ok(resp) => {
let choice = &resp.choices[0];
if let Some(usage) = resp.usage.as_ref() {
MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE
.with_label_values(&[workspace_uid, &chat_completion.model])
.inc_by(usage.prompt_tokens as u64);
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE
.with_label_values(&[workspace_uid, &chat_completion.model])
.inc_by(usage.completion_tokens as u64);
MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE
.with_label_values(&[workspace_uid, &chat_completion.model])
.inc_by(usage.total_tokens as u64);
}
let choice = match resp.choices.first() {
Some(choice) => choice,
None => break,
};
finish_reason = choice.finish_reason;
let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;
@ -659,13 +751,14 @@ async fn handle_meili_tools(
let mut error = None;
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request(
Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request(
index_scheduler,
auth_ctrl.clone(),
search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
{
@ -741,4 +834,42 @@ struct SearchInIndexParameters {
index_uid: String,
/// The query parameter to use.
q: Option<String>,
/// The filter parameter to use.
filter: Option<String>,
}
fn format_facet_distributions(
index: &Index,
rtxn: &RoTxn,
max_values_per_facet: usize,
) -> meilisearch_types::milli::Result<String> {
let universe = index.documents_ids(rtxn)?;
let rules = index.filterable_attributes_rules(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_attributes = fields_ids_map
.names()
.filter(|name| rules.iter().any(|rule| matches!(rule.match_str(name), PatternMatch::Match)))
.map(|name| (name, OrderBy::Count));
let facets_distribution = index
.facets_distribution(rtxn)
.max_values_per_facet(max_values_per_facet)
.candidates(universe)
.facets(filterable_attributes)
.execute()?;
let mut output = String::new();
for (facet_name, entries) in facets_distribution {
let _ = write!(&mut output, "{}: ", facet_name);
let total_entries = entries.len();
for (i, (value, _count)) in entries.into_iter().enumerate() {
let _ = if total_entries.saturating_sub(1) == i {
write!(&mut output, "{value}.")
} else {
write!(&mut output, "{value}, ")
};
}
let _ = writeln!(&mut output);
}
Ok(output)
}

View File

@ -13,7 +13,7 @@ impl Config {
pub fn new(chat_settings: &DbChatSettings) -> Self {
use meilisearch_types::features::ChatCompletionSource::*;
match chat_settings.source {
OpenAi | Mistral | Gemini | VLlm => {
OpenAi | Mistral | VLlm => {
let mut config = OpenAIConfig::default();
if let Some(org_id) = chat_settings.org_id.as_ref() {
config = config.with_org_id(org_id);

View File

@ -19,6 +19,7 @@ use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::routes::PAGINATION_DEFAULT_LIMIT;
mod chat_completion_analytics;
pub mod chat_completions;
mod config;
mod errors;

View File

@ -8,8 +8,8 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT,
DEFAULT_CHAT_SYSTEM_PROMPT,
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT,
DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT,
};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
@ -84,6 +84,11 @@ async fn patch_settings(
Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_q_param,
},
search_filter_param: match new_prompts.search_filter_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_filter_param,
},
search_index_uid_param: match new_prompts.search_index_uid_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
@ -218,7 +223,6 @@ pub enum ChatCompletionSource {
#[default]
OpenAi,
Mistral,
Gemini,
AzureOpenAi,
VLlm,
}
@ -229,7 +233,6 @@ impl From<ChatCompletionSource> for DbChatCompletionSource {
match source {
OpenAi => DbChatCompletionSource::OpenAi,
Mistral => DbChatCompletionSource::Mistral,
Gemini => DbChatCompletionSource::Gemini,
AzureOpenAi => DbChatCompletionSource::AzureOpenAi,
VLlm => DbChatCompletionSource::VLlm,
}
@ -254,6 +257,10 @@ pub struct ChatPrompts {
#[schema(value_type = Option<String>, example = json!("This is query parameter..."))]
pub search_q_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchFilterParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is filter parameter..."))]
pub search_filter_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
pub search_index_uid_param: Setting<String>,

View File

@ -0,0 +1,183 @@
use std::collections::BTreeMap;
use std::convert::Infallible;
use std::str::FromStr as _;
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use byte_unit::Byte;
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::actions;
use meilisearch_types::tasks::{ExportIndexSettings as DbExportIndexSettings, KindWithContent};
use serde::Serialize;
use serde_json::Value;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::routes::export_analytics::ExportAnalytics;
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::Opt;
#[derive(OpenApi)]
#[openapi(
paths(export),
tags((
name = "Export",
description = "The `/export` route allows you to trigger an export process to a remote Meilisearch instance.",
external_docs(url = "https://www.meilisearch.com/docs/reference/api/export"),
)),
)]
pub struct ExportApi;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(export)));
}
#[utoipa::path(
post,
path = "",
tag = "Export",
security(("Bearer" = ["export", "*"])),
responses(
(status = 202, description = "Export successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
{
"taskUid": 1,
"status": "enqueued",
"type": "export",
"enqueuedAt": "2021-08-11T09:25:53.000000Z"
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
async fn export(
index_scheduler: GuardedData<ActionPolicy<{ actions::EXPORT }>, Data<IndexScheduler>>,
export: AwebJson<Export, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let export = export.into_inner();
debug!(returns = ?export, "Trigger export");
let analytics_aggregate = ExportAnalytics::from_export(&export);
let Export { url, api_key, payload_size, indexes } = export;
let indexes = match indexes {
Some(indexes) => indexes
.into_iter()
.map(|(pattern, ExportIndexSettings { filter, override_settings })| {
(pattern, DbExportIndexSettings { filter, override_settings })
})
.collect(),
None => BTreeMap::from([(
IndexUidPattern::new_unchecked("*"),
DbExportIndexSettings::default(),
)]),
};
let task = KindWithContent::Export {
url,
api_key,
payload_size: payload_size.map(|ByteWithDeserr(bytes)| bytes),
indexes,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
analytics.publish(analytics_aggregate, &req);
Ok(HttpResponse::Ok().json(task))
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Export {
#[schema(value_type = Option<String>, example = json!("https://ms-1234.heaven.meilisearch.com"))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportUrl>)]
pub url: String,
#[schema(value_type = Option<String>, example = json!("1234abcd"))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportApiKey>)]
pub api_key: Option<String>,
#[schema(value_type = Option<String>, example = json!("24MiB"))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportPayloadSize>)]
pub payload_size: Option<ByteWithDeserr>,
#[schema(value_type = Option<BTreeMap<String, ExportIndexSettings>>, example = json!({ "*": { "filter": null } }))]
#[deserr(default)]
#[serde(default)]
pub indexes: Option<BTreeMap<IndexUidPattern, ExportIndexSettings>>,
}
/// A wrapper around the `Byte` type that implements `Deserr`.
#[derive(Debug, Serialize)]
#[serde(transparent)]
pub struct ByteWithDeserr(pub Byte);
impl<E> deserr::Deserr<E> for ByteWithDeserr
where
E: deserr::DeserializeError,
{
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: deserr::ValuePointerRef,
) -> Result<Self, E> {
use deserr::{ErrorKind, Value, ValueKind};
match value {
Value::Integer(integer) => Ok(ByteWithDeserr(Byte::from_u64(integer))),
Value::String(string) => Byte::from_str(&string).map(ByteWithDeserr).map_err(|e| {
deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: e.to_string() },
location,
))
}),
actual => Err(deserr::take_cf_content(E::error(
None,
ErrorKind::IncorrectValueKind {
actual,
accepted: &[ValueKind::Integer, ValueKind::String],
},
location,
))),
}
}
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct ExportIndexSettings {
#[schema(value_type = Option<String>, example = json!("genres = action"))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportIndexFilter>)]
pub filter: Option<Value>,
#[schema(value_type = Option<bool>, example = json!(true))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportIndexOverrideSettings>)]
pub override_settings: bool,
}

View File

@ -0,0 +1,111 @@
use url::Url;
use crate::analytics::Aggregate;
use crate::routes::export::Export;
#[derive(Default)]
pub struct ExportAnalytics {
total_received: usize,
has_api_key: bool,
sum_exports_meilisearch_cloud: usize,
sum_index_patterns: usize,
sum_patterns_with_filter: usize,
sum_patterns_with_override_settings: usize,
payload_sizes: Vec<u64>,
}
impl ExportAnalytics {
pub fn from_export(export: &Export) -> Self {
let Export { url, api_key, payload_size, indexes } = export;
let url = Url::parse(url).ok();
let is_meilisearch_cloud = url.as_ref().and_then(Url::host_str).is_some_and(|host| {
host.ends_with("meilisearch.dev")
|| host.ends_with("meilisearch.com")
|| host.ends_with("meilisearch.io")
});
let has_api_key = api_key.is_some();
let index_patterns_count = indexes.as_ref().map_or(0, |indexes| indexes.len());
let patterns_with_filter_count = indexes.as_ref().map_or(0, |indexes| {
indexes.values().filter(|settings| settings.filter.is_some()).count()
});
let patterns_with_override_settings_count = indexes.as_ref().map_or(0, |indexes| {
indexes.values().filter(|settings| settings.override_settings).count()
});
let payload_sizes =
if let Some(crate::routes::export::ByteWithDeserr(byte_size)) = payload_size {
vec![byte_size.as_u64()]
} else {
vec![]
};
Self {
total_received: 1,
has_api_key,
sum_exports_meilisearch_cloud: is_meilisearch_cloud as usize,
sum_index_patterns: index_patterns_count,
sum_patterns_with_filter: patterns_with_filter_count,
sum_patterns_with_override_settings: patterns_with_override_settings_count,
payload_sizes,
}
}
}
impl Aggregate for ExportAnalytics {
fn event_name(&self) -> &'static str {
"Export Triggered"
}
fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
self.total_received += other.total_received;
self.has_api_key |= other.has_api_key;
self.sum_exports_meilisearch_cloud += other.sum_exports_meilisearch_cloud;
self.sum_index_patterns += other.sum_index_patterns;
self.sum_patterns_with_filter += other.sum_patterns_with_filter;
self.sum_patterns_with_override_settings += other.sum_patterns_with_override_settings;
self.payload_sizes.extend(other.payload_sizes);
self
}
fn into_event(self: Box<Self>) -> serde_json::Value {
let avg_payload_size = if self.payload_sizes.is_empty() {
None
} else {
Some(self.payload_sizes.iter().sum::<u64>() / self.payload_sizes.len() as u64)
};
let avg_exports_meilisearch_cloud = if self.total_received == 0 {
None
} else {
Some(self.sum_exports_meilisearch_cloud as f64 / self.total_received as f64)
};
let avg_index_patterns = if self.total_received == 0 {
None
} else {
Some(self.sum_index_patterns as f64 / self.total_received as f64)
};
let avg_patterns_with_filter = if self.total_received == 0 {
None
} else {
Some(self.sum_patterns_with_filter as f64 / self.total_received as f64)
};
let avg_patterns_with_override_settings = if self.total_received == 0 {
None
} else {
Some(self.sum_patterns_with_override_settings as f64 / self.total_received as f64)
};
serde_json::json!({
"total_received": self.total_received,
"has_api_key": self.has_api_key,
"avg_exports_meilisearch_cloud": avg_exports_meilisearch_cloud,
"avg_index_patterns": avg_index_patterns,
"avg_patterns_with_filter": avg_patterns_with_filter,
"avg_patterns_with_override_settings": avg_patterns_with_override_settings,
"avg_payload_size": avg_payload_size,
})
}
}

View File

@ -54,6 +54,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
get_task_documents_route: Some(false),
composite_embedders: Some(false),
chat_completions: Some(false),
multimodal: Some(false),
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
@ -100,6 +101,8 @@ pub struct RuntimeTogglableFeatures {
pub composite_embedders: Option<bool>,
#[deserr(default)]
pub chat_completions: Option<bool>,
#[deserr(default)]
pub multimodal: Option<bool>,
}
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@ -113,6 +116,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
get_task_documents_route,
composite_embedders,
chat_completions,
multimodal,
} = value;
Self {
@ -124,6 +128,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
get_task_documents_route: Some(get_task_documents_route),
composite_embedders: Some(composite_embedders),
chat_completions: Some(chat_completions),
multimodal: Some(multimodal),
}
}
}
@ -138,6 +143,7 @@ pub struct PatchExperimentalFeatureAnalytics {
get_task_documents_route: bool,
composite_embedders: bool,
chat_completions: bool,
multimodal: bool,
}
impl Aggregate for PatchExperimentalFeatureAnalytics {
@ -155,6 +161,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
get_task_documents_route: new.get_task_documents_route,
composite_embedders: new.composite_embedders,
chat_completions: new.chat_completions,
multimodal: new.multimodal,
})
}
@ -181,6 +188,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
get_task_documents_route: Some(false),
composite_embedders: Some(false),
chat_completions: Some(false),
multimodal: Some(false),
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
@ -223,6 +231,7 @@ async fn patch_features(
.composite_embedders
.unwrap_or(old_features.composite_embedders),
chat_completions: new_features.0.chat_completions.unwrap_or(old_features.chat_completions),
multimodal: new_features.0.multimodal.unwrap_or(old_features.multimodal),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
@ -237,6 +246,7 @@ async fn patch_features(
get_task_documents_route,
composite_embedders,
chat_completions,
multimodal,
} = new_features;
analytics.publish(
@ -249,6 +259,7 @@ async fn patch_features(
get_task_documents_route,
composite_embedders,
chat_completions,
multimodal,
},
&req,
);

View File

@ -1,6 +1,7 @@
use std::collections::HashSet;
use std::io::{ErrorKind, Seek as _};
use std::marker::PhantomData;
use std::str::FromStr;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
@ -17,9 +18,10 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::documents::sort::recursive_sort;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::milli::{AscDesc, DocumentId};
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
@ -42,6 +44,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
};
@ -135,6 +138,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
per_document_id: bool,
// if a filter was used
per_filter: bool,
// if documents were sorted
sort: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
@ -151,39 +156,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
marker: std::marker::PhantomData<Method>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
}
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
pub fn from_query(query: &DocumentFetchKind) -> Self {
let (limit, offset, retrieve_vectors) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
(*limit, *offset, *retrieve_vectors)
}
};
let ids = match query {
DocumentFetchKind::Normal { ids, .. } => *ids,
DocumentFetchKind::PerDocumentId { .. } => 0,
};
Self {
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
retrieve_vectors,
max_document_ids: ids,
marker: PhantomData,
}
}
}
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
@ -193,6 +165,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
Box::new(Self {
per_document_id: self.per_document_id | new.per_document_id,
per_filter: self.per_filter | new.per_filter,
sort: self.sort | new.sort,
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset),
@ -276,6 +249,7 @@ pub async fn get_document(
retrieve_vectors: param_retrieve_vectors.0,
per_document_id: true,
per_filter: false,
sort: false,
max_limit: 0,
max_offset: 0,
max_document_ids: 0,
@ -406,6 +380,8 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentSort>)]
sort: Option<String>,
}
#[derive(Debug, Deserr, ToSchema)]
@ -430,6 +406,9 @@ pub struct BrowseQuery {
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
#[schema(default, value_type = Option<Vec<String>>, example = json!(["title:asc", "rating:desc"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentSort>)]
sort: Option<Vec<String>>,
}
/// Get documents with POST
@ -495,6 +474,7 @@ pub async fn documents_by_query_post(
analytics.publish(
DocumentsFetchAggregator::<DocumentsPOST> {
per_filter: body.filter.is_some(),
sort: body.sort.is_some(),
retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit,
max_offset: body.offset,
@ -571,7 +551,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } =
params.into_inner();
let filter = match filter {
@ -582,20 +562,20 @@ pub async fn get_documents(
None => None,
};
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery {
offset: offset.0,
limit: limit.0,
fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0,
filter,
ids,
ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()),
sort: sort.map(|attr| fix_sort_query_parameters(&attr)),
};
analytics.publish(
DocumentsFetchAggregator::<DocumentsGET> {
per_filter: query.filter.is_some(),
sort: query.sort.is_some(),
retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit,
max_offset: query.offset,
@ -615,7 +595,7 @@ fn documents_by_query(
query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
@ -633,6 +613,18 @@ fn documents_by_query(
None
};
let sort_criteria = if let Some(sort) = &sort {
let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::SortError::from(asc_desc_error).into_document_error().into())
}
};
Some(sorts)
} else {
None
};
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(
&index,
@ -643,6 +635,7 @@ fn documents_by_query(
fields,
retrieve_vectors,
index_scheduler.features(),
sort_criteria,
)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@ -1452,7 +1445,6 @@ fn some_documents<'a, 't: 'a>(
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(rtxn)?;
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
@ -1468,15 +1460,9 @@ fn some_documents<'a, 't: 'a>(
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, key)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(key));
let embeddings = ExplicitVectors {
embeddings: Some(vector.into()),
regenerate: !user_provided,
};
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
let embeddings =
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
@ -1501,6 +1487,7 @@ fn retrieve_documents<S: AsRef<str>>(
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
sort_criteria: Option<Vec<AscDesc>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
@ -1533,15 +1520,32 @@ fn retrieve_documents<S: AsRef<str>>(
})?
}
let (it, number_of_documents) = {
let (it, number_of_documents) = if let Some(sort) = sort_criteria {
let number_of_documents = candidates.len();
let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
let iter = facet_sort.iter()?;
let mut documents = Vec::with_capacity(limit);
for result in iter.skip(offset).take(limit) {
documents.push(result?);
}
(
itertools::Either::Left(some_documents(
index,
&rtxn,
documents.into_iter(),
retrieve_vectors,
)?),
number_of_documents,
)
} else {
let number_of_documents = candidates.len();
(
some_documents(
itertools::Either::Right(some_documents(
index,
&rtxn,
candidates.into_iter().skip(offset).take(limit),
retrieve_vectors,
)?,
)?),
number_of_documents,
)
};

View File

@ -56,6 +56,8 @@ pub struct FacetSearchQuery {
pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchMedia>)]
pub media: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
@ -94,6 +96,7 @@ impl FacetSearchAggregator {
facet_name,
vector,
q,
media,
filter,
matching_strategy,
attributes_to_search_on,
@ -108,6 +111,7 @@ impl FacetSearchAggregator {
facet_names: Some(facet_name.clone()).into_iter().collect(),
additional_search_parameters_provided: q.is_some()
|| vector.is_some()
|| media.is_some()
|| filter.is_some()
|| *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some()
@ -291,6 +295,7 @@ impl From<FacetSearchQuery> for SearchQuery {
facet_name: _,
q,
vector,
media,
filter,
matching_strategy,
attributes_to_search_on,
@ -312,6 +317,7 @@ impl From<FacetSearchQuery> for SearchQuery {
SearchQuery {
q,
media,
offset: DEFAULT_SEARCH_OFFSET(),
limit: DEFAULT_SEARCH_LIMIT(),
page,

View File

@ -205,6 +205,8 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
Ok(Self {
q: other.q,
// `media` not supported for `GET`
media: None,
vector: other.vector.map(CS::into_inner),
offset: other.offset.0,
limit: other.limit.0,
@ -481,28 +483,30 @@ pub fn search_kind(
index_uid: String,
index: &milli::Index,
) -> Result<SearchKind, ResponseError> {
let is_placeholder_query =
if let Some(q) = query.q.as_deref() { q.trim().is_empty() } else { true };
let non_placeholder_query = !is_placeholder_query;
let is_media = query.media.is_some();
// handle with care, the order of cases matters, the semantics is subtle
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
// empty query, no vector => placeholder search
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
// no query, no vector => placeholder search
(None, _, None) => Ok(SearchKind::KeywordOnly),
// hybrid.semantic_ratio == 1.0 => vector
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
}
// hybrid.semantic_ratio == 0.0 => keyword
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
match (is_media, non_placeholder_query, &query.hybrid, query.vector.as_deref()) {
// media + vector => error
(true, _, _, Some(_)) => Err(MeilisearchHttpError::MediaAndVector.into()),
// media + !hybrid => error
(true, _, None, _) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
// vector + !hybrid => error
(_, _, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
// hybrid S0 => keyword
(_, _, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
Ok(SearchKind::KeywordOnly)
}
// no query, hybrid, vector => semantic
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
// !q + !vector => placeholder search
(false, false, _, None) => Ok(SearchKind::KeywordOnly),
// hybrid S100 => semantic
(_, _, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
}
// query, no hybrid, no vector => keyword
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
// query, hybrid, maybe vector => hybrid
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
// q + hybrid => hybrid
(_, true, Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
index_scheduler,
index_uid,
index,
@ -510,7 +514,11 @@ pub fn search_kind(
**semantic_ratio,
v.map(|v| v.len()),
),
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
// !q + hybrid => semantic
(_, false, Some(HybridQuery { semantic_ratio: _, embedder }), v) => {
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
}
// q => keyword
(false, true, None, None) => Ok(SearchKind::KeywordOnly),
}
}

View File

@ -61,6 +61,8 @@ pub struct SearchAggregator<Method: AggregateMethod> {
semantic_ratio: bool,
hybrid: bool,
retrieve_vectors: bool,
// Number of requests containing `media`
total_media: usize,
// every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>,
@ -101,6 +103,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
let SearchQuery {
q,
vector,
media,
offset,
limit,
page,
@ -175,6 +178,11 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
if let Some(ref vector) = vector {
ret.max_vector_size = vector.len();
}
if media.is_some() {
ret.total_media = 1;
}
ret.retrieve_vectors |= retrieve_vectors;
if query.is_finite_pagination() {
@ -277,6 +285,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
show_ranking_score_details,
semantic_ratio,
hybrid,
total_media,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
@ -327,6 +336,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
self.retrieve_vectors |= retrieve_vectors;
self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid;
self.total_media += total_media;
// pagination
self.max_limit = self.max_limit.max(max_limit);
@ -403,6 +413,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
show_ranking_score_details,
semantic_ratio,
hybrid,
total_media,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
@ -450,6 +461,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
"hybrid": {
"enabled": hybrid,
"semantic_ratio": semantic_ratio,
"total_media": total_media,
},
"pagination": {
"max_limit": max_limit,

Some files were not shown because too many files have changed in this diff Show More