Compare commits

..

300 Commits

Author SHA1 Message Date
Tamo
b7f32c5acd Merge pull request #5817 from meilisearch/fix-dumpless-upgrade
fix the dumpless upgrade again
2025-08-11 15:29:54 +00:00
Tamo
8f04529ba2 Merge pull request #5816 from meilisearch/webhook-telemetry
Update webhook telemetry events
2025-08-11 15:13:41 +00:00
Tamo
54b85b8644 fix the dumpless upgrade again 2025-08-11 16:37:09 +02:00
Mubelotix
562c620fec Update webhook telemetry events 2025-08-11 16:21:14 +02:00
Clément Renault
33bc86d71a Merge pull request #5810 from meilisearch/curquiza-patch-1
Add category to release draft
2025-08-11 11:57:51 +00:00
curquiza
b265c92852 Thank contributors better 2025-08-11 12:17:10 +02:00
Clémentine
759beed560 Add category in release draft 2025-08-07 18:15:29 +02:00
Tamo
2035f342f0 Merge pull request #5807 from meilisearch/patch-chat-settings
Turn chat settings to `PATCH`
2025-08-06 14:17:30 +00:00
Tamo
27fed758c2 Merge pull request #5806 from meilisearch/update-to-v1-17-0
Update version to v1.17.0
2025-08-06 13:33:51 +00:00
Mubelotix
3ead985caf Fix issue #5772 2025-08-06 15:02:25 +02:00
Mubelotix
e302e9edd3 Add test for task 2025-08-06 15:02:15 +02:00
Tamo
1fdf820931 Update version to v1.17.0 2025-08-06 12:12:52 +02:00
Tamo
b4f2eeac0a Merge pull request #5803 from meilisearch/curquiza-patch-1
Minor docs update about release.md
2025-08-06 09:14:57 +00:00
Tamo
7e3f2ab0c6 Merge pull request #5785 from meilisearch/webhook-api
Webhook api
2025-08-05 18:10:00 +00:00
Tamo
899be9c3ff make sure we NEVER ever write the cli defined webhook to the database or dumps 2025-08-05 18:55:32 +02:00
Clément Renault
444231e812 Merge pull request #5804 from meilisearch/curquiza-patch-2
Minor fix in PR template
2025-08-05 15:11:23 +00:00
Mubelotix
1ff6da63e8 Make errors singular 2025-08-05 16:58:25 +02:00
Mubelotix
b5158e1e83 Fix cli webhook getting stored in dumps 2025-08-05 16:58:25 +02:00
Tamo
3f1e172c6f fix race condition: take the rtxn before entering the thread so we're sure we won't try to retrieve deleted tasks 2025-08-05 16:47:35 +02:00
Tamo
2b5b41790e update the dump so it doesn't contains the null-uuid webhook 2025-08-05 16:21:14 +02:00
Mubelotix
55cd3203fe Merge pull request #5783 from meilisearch/starts-with-optim
Optimize the starts_with filter
2025-08-05 14:10:10 +00:00
Clémentine
45bb13bf43 Minor fix in PR template 2025-08-05 15:42:56 +02:00
Clémentine
095cba8fba Minor docs update about release.md 2025-08-05 15:29:42 +02:00
Mubelotix
3a9b08960a Add test 2025-08-05 13:49:28 +02:00
Mubelotix
c4e7bf2e60 Stabilize STARTS WITH filter 2025-08-05 12:14:25 +02:00
Mubelotix
4f6a48c327 Stop storing the cli webhook in the db 2025-08-05 11:44:53 +02:00
Tamo
4c61a227ca fmt after my suggestion 2025-08-05 11:29:54 +02:00
Tamo
3d2c204f2d Update crates/milli/src/search/facet/filter.rs 2025-08-05 11:26:10 +02:00
Mubelotix
8b27dec25c Test that the cli webhook receives data 2025-08-05 11:19:21 +02:00
Mubelotix
a9c924b433 Turn url back into a setting 2025-08-05 11:16:34 +02:00
Mubelotix
6cb2296644 Update tests 2025-08-05 11:10:48 +02:00
Mubelotix
b2d157a74a Remove dbg
Co-Authored-By: Thomas Campistron <irevoire@hotmail.fr>
2025-08-05 10:49:21 +02:00
Mubelotix
386cf83285 Improve webhook settings 2025-08-05 10:48:39 +02:00
Mubelotix
8ef1a50086 Add hint
Co-Authored-By: Thomas Campistron <irevoire@hotmail.fr>
2025-08-05 10:42:39 +02:00
Mubelotix
84651ffd7d Remove hardcoded buffer size
Co-Authored-By: Thomas Campistron <irevoire@hotmail.fr>
2025-08-05 10:41:28 +02:00
Mubelotix
43c20bb3ed Add missing actions in from_repr
Co-Authored-By: Thomas Campistron <irevoire@hotmail.fr>
2025-08-05 10:39:52 +02:00
Mubelotix
d340013d8b Change error name 2025-08-05 10:35:12 +02:00
Mubelotix
8a44d9faef Merge branch 'main' into webhook-api 2025-08-05 10:32:36 +02:00
Mubelotix
afb367c7f4 Update old comment 2025-08-05 10:29:39 +02:00
Mubelotix
84bcf9785f Merge branch 'main' into starts-with-optim 2025-08-05 10:27:45 +02:00
Mubelotix
fc814b7537 Apply review suggestion 2025-08-05 10:25:14 +02:00
Clémentine
0865d8af6c Merge pull request #5766 from meilisearch/release-process-change
Release process change
2025-08-05 07:07:46 +00:00
Clément Renault
cac884401f Merge pull request #5800 from meilisearch/tmp-release-v1.16.0
Bring back changes to main
2025-08-04 16:34:24 +00:00
Mubelotix
7251cccd03 Make notify_webhooks execute in its own thread 2025-08-04 17:13:05 +02:00
Mubelotix
ddfcacbb62 Add nice error message for users trying to set uuid or isEditable 2025-08-04 16:53:41 +02:00
Mubelotix
3b26d64a5d Edit reserved webhook message 2025-08-04 16:39:34 +02:00
Mubelotix
3b0f576d56 Improve invalid uuid error message 2025-08-04 16:38:00 +02:00
Clément Renault
454f8b36f4 Make clippy happy 2025-08-04 16:36:46 +02:00
Mubelotix
1754745c42 Add URL and header validity checks 2025-08-04 16:26:20 +02:00
Clément Renault
6f30dfa41c Merge remote-tracking branch 'origin/main' into tmp-release-v1.16.0 2025-08-04 16:06:51 +02:00
Tamo
33350248c8 Merge pull request #5773 from meilisearch/snapshotception
Fix snapshotCreation task being included in snapshot
2025-08-04 13:53:43 +00:00
Mubelotix
69c59d3de3 Update security in utoipa 2025-08-04 15:43:37 +02:00
Mubelotix
8dfebbb3e7 Fix tests 2025-08-04 15:37:12 +02:00
Mubelotix
737ad3ec19 Add new api key actions 2025-08-04 15:00:45 +02:00
Mubelotix
4ec4710811 Improve logs 2025-08-04 15:00:26 +02:00
Mubelotix
c5caac95dd Format 2025-08-04 14:51:23 +02:00
Mubelotix
7acbb1e140 Remove PATCH /webhooks 2025-08-04 14:49:27 +02:00
Clémentine
a5e5afd123 Merge pull request #5794 from meilisearch/dependabot/github_actions/sigstore/cosign-installer-3.9.2
Bump sigstore/cosign-installer from 3.8.2 to 3.9.2
2025-08-04 12:39:40 +00:00
Clémentine
c70e9abf70 Merge pull request #5795 from meilisearch/dependabot/github_actions/svenstaro/upload-release-action-2.11.2
Bump svenstaro/upload-release-action from 2.11.1 to 2.11.2
2025-08-04 12:10:03 +00:00
curquiza
f8d70249a7 Update process with Ruleset branch addition 2025-08-04 13:59:11 +02:00
Clémentine
a2c96d40d3 Merge pull request #5798 from meilisearch/update-minidashboard-v0.2.22
Update mini-dashboard v0.2.22
2025-08-04 10:40:12 +00:00
Clément Renault
05dd8e0d62 update mini-dashboard to v0.2.22 2025-08-04 11:14:10 +02:00
Clémentine
4182e631d6 Potential fix for code scanning alert no. 63: Workflow does not contain permissions
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-08-04 09:59:54 +02:00
dependabot[bot]
ddea0b1570 Bump svenstaro/upload-release-action from 2.11.1 to 2.11.2
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.11.1 to 2.11.2.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.11.1...2.11.2)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-version: 2.11.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-01 17:05:02 +00:00
dependabot[bot]
beb532e2a7 Bump sigstore/cosign-installer from 3.8.2 to 3.9.2
Bumps [sigstore/cosign-installer](https://github.com/sigstore/cosign-installer) from 3.8.2 to 3.9.2.
- [Release notes](https://github.com/sigstore/cosign-installer/releases)
- [Commits](3454372f43...d58896d6a1)

---
updated-dependencies:
- dependency-name: sigstore/cosign-installer
  dependency-version: 3.9.2
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-01 17:04:58 +00:00
Mubelotix
e3a6d63b52 Add utoipa types 2025-08-01 08:42:27 +02:00
Clément Renault
6f8c414a75 Merge pull request #5791 from meilisearch/update-minidashboard-v0.2.21
update mini-dashboard to v0.2.21
2025-07-31 16:13:35 +00:00
ManyTheFish
2ec80a1ae2 update mini-dashboard to v0.2.21 2025-07-31 17:14:38 +02:00
Mubelotix
ed147f80ac Add test and fix bug 2025-07-31 16:45:30 +02:00
Clément Renault
c37ed05f49 Merge pull request #5790 from meilisearch/adapt-go-ci
Adapt Go CI to recent change in the Go repo
2025-07-31 14:20:33 +00:00
curquiza
c1a5a545b6 Adapt Go CI to recent change in the Go repo 2025-07-31 15:23:45 +02:00
Mubelotix
35537e0b0b Add single_receives_data test 2025-07-31 14:12:09 +02:00
Mubelotix
ee80fc87c9 Add test for patch endpoint 2025-07-31 13:00:43 +02:00
Clémentine
bb43bf122e Update .github/pull_request_template.md
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-31 12:55:19 +02:00
Mubelotix
34590297c1 Add patch webhook endpoint 2025-07-31 12:53:57 +02:00
Mubelotix
9e43f7b419 Update tests 2025-07-31 12:44:35 +02:00
Mubelotix
94733a4a18 Add delete endpoint 2025-07-31 12:38:14 +02:00
Mubelotix
ad68245186 Update tests 2025-07-31 12:33:34 +02:00
Mubelotix
29fb4d5e2a Add post webhook route 2025-07-31 12:27:12 +02:00
Mubelotix
ca27bcaac7 Update tests 2025-07-31 11:34:47 +02:00
Mubelotix
53397e28fc Replace name by uuid 2025-07-31 11:19:46 +02:00
Mubelotix
7c2c17129f Add get webhook route 2025-07-31 10:59:06 +02:00
Mubelotix
446fce6c16 Extract logic from route 2025-07-31 10:01:25 +02:00
Clément Renault
a99538cd5f Merge pull request #5787 from meilisearch/chat-update-metrics-name
feat(chat): update prometheus metrics name
2025-07-31 07:58:07 +00:00
Mubelotix
f67043801b Add a test for concurrent cli and dump 2025-07-31 09:35:16 +02:00
nicolasvienot
941da56ee3 fix linter 2025-07-31 06:49:53 +02:00
nicolasvienot
41262b008b feat(chat): update metrics name 2025-07-30 17:55:02 +02:00
Mubelotix
fc4c5d2718 Add dump test 2025-07-30 16:16:12 +02:00
Mubelotix
a75b327b37 Add test for webhooks over limits 2025-07-30 15:59:19 +02:00
Mubelotix
c70ae91d34 Add test for reserved webhooks 2025-07-30 15:52:24 +02:00
Mubelotix
e88480c7c4 Fix reserved name check 2025-07-30 15:44:51 +02:00
Mubelotix
b565ec1497 Test cli behavior 2025-07-30 15:44:42 +02:00
Mubelotix
3e77c1d8c8 Add reserved webhook 2025-07-30 15:23:06 +02:00
Mubelotix
dc7af47371 Add new errors 2025-07-30 15:18:43 +02:00
Mubelotix
064d9d5ff8 Add dump support 2025-07-30 15:06:37 +02:00
Mubelotix
93f8b31eec Fix tests 2025-07-30 12:52:01 +02:00
Mubelotix
466e1a7aac Support legacy cli arguments 2025-07-30 12:25:59 +02:00
Mubelotix
cc37eb870f Initial implementation 2025-07-30 12:01:40 +02:00
Mubelotix
5567653c96 Fix network documentation 2025-07-29 16:47:28 +02:00
Mubelotix
5e867f7ce0 Add webhooks api key action 2025-07-29 16:47:20 +02:00
Mubelotix
48a5f4db2d Improve comment 2025-07-28 16:42:33 +02:00
Mubelotix
224892e692 Enable new algorithm every time 2025-07-28 16:28:06 +02:00
Mubelotix
691a9ae4b1 Format 2025-07-28 16:24:11 +02:00
Mubelotix
e8a818f53d Optimize the filter 2025-07-28 16:24:04 +02:00
Mubelotix
478f374b9d Add benchmark 2025-07-28 16:23:26 +02:00
Louis Dureuil
d4c88f28f3 Merge pull request #5780 from meilisearch/fix-api-keys
Fix api key action inconsistencies
2025-07-28 10:33:48 +00:00
Mubelotix
d90c76d3cc Update tests 2025-07-28 11:35:15 +02:00
Mubelotix
f6bc6854f8 Fix key action inconsistencies 2025-07-28 11:10:55 +02:00
Mubelotix
1f18f0ba77 Update little tiny comments 2025-07-23 14:33:58 +02:00
Mubelotix
44b24652d2 Change strategy to remove task instead of marking it succeeded 2025-07-23 14:30:25 +02:00
Mubelotix
5dcf79233e Remove useless parameter
Co-Authored-By: Tamo <tamo@meilisearch.com>
2025-07-23 11:30:39 +02:00
Clément Renault
42001a25ff Merge pull request #5770 from meilisearch/fix/update-index-chat-settings
fix: index chat settings `searchParameters` incorrectly set with `limit: 20` when sending empty object
2025-07-22 13:26:01 +00:00
Mubelotix
846d27354b Format 2025-07-22 15:18:21 +02:00
Mubelotix
c1aa4120ac Update test 2025-07-22 15:18:13 +02:00
Mubelotix
6394efc4c2 Turn dirty fix into beautiful fix 2025-07-22 15:17:26 +02:00
Mubelotix
9716834380 Initial fix 2025-07-22 14:31:42 +02:00
Mubelotix
2f2e42e72d Add test for issue #4653 2025-07-22 12:33:18 +02:00
Louis Dureuil
080d5f94dd Merge pull request #5763 from meilisearch/embedding-fixes
Regenerate all fragments when coming from a user provided vector
2025-07-22 08:35:07 +00:00
nicolasvienot
ba0f50e5ef fix: update default deserialization for ChatSearchParams limit field 2025-07-22 10:24:18 +02:00
Louis Dureuil
ce6230aa85 Merge pull request #5762 from meilisearch/new-document-indexer-for-dumps
Use the edition 2024 documents indexer in the dumps
2025-07-21 14:53:43 +00:00
Louis Dureuil
6dc241f9de Fix tests 2025-07-21 15:11:24 +02:00
Louis Dureuil
01d1ef65c4 Update search and docs usages 2025-07-21 15:11:24 +02:00
Louis Dureuil
3246667590 when exporting vectors, for regenerate to false when the embedder has fragments 2025-07-21 15:11:24 +02:00
Louis Dureuil
109395c199 Index::embeddings specifies if the embedder has fragments 2025-07-21 15:11:24 +02:00
Louis Dureuil
a0b71a8785 EmbedderOptions::has_fragments() 2025-07-21 15:11:24 +02:00
Louis Dureuil
00a5c86f13 Remove accidentally added db snap 2025-07-21 15:11:24 +02:00
Louis Dureuil
366c37a686 Fix new indexer 2025-07-21 15:11:23 +02:00
Louis Dureuil
afc164a271 Fix in old indexer 2025-07-21 15:11:23 +02:00
Kerollmops
bdc2d1e64d Move the edition 2024 dump parameter to the right place 2025-07-21 14:50:05 +02:00
curquiza
f3b60a1dab Minor update on doc 2025-07-20 22:20:08 +02:00
curquiza
cd0523c3f1 Remove run of SDK test on PR because cannot work 2025-07-20 22:13:07 +02:00
curquiza
7f318ee964 Adapt issue template 2025-07-20 22:11:30 +02:00
curquiza
dc1656da8e Adapt automation 2025-07-20 22:11:14 +02:00
curquiza
dc0bd9f25d Add release drafter 2025-07-20 22:10:35 +02:00
curquiza
52d8007b12 Add pull request template 2025-07-20 22:10:17 +02:00
curquiza
4f8382b159 Remove useless automation 2025-07-20 22:07:59 +02:00
curquiza
c2c82be556 Update documentation 2025-07-20 22:07:23 +02:00
Tamo
0312fb22b8 Merge pull request #5761 from meilisearch/fix-chat-settings-dumpless-upgrade
Fix chat settings dumpless upgrade
2025-07-17 15:57:39 +00:00
Clément Renault
b85657de1e Update memmap2 version everywhere 2025-07-17 17:30:44 +02:00
Clément Renault
626be0ef28 Small typo fix
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-17 17:27:00 +02:00
Clément Renault
1b476b8a35 Add documentation to the new documents_file dump reader method
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-17 17:26:41 +02:00
Clément Renault
a1b42c10e2 Make clippy happy 2025-07-17 17:21:03 +02:00
Clément Renault
d67db6e3c2 Use the edition 2024 documents indexer in the dumps 2025-07-17 17:12:51 +02:00
Clément Renault
760ccffdbd Expose the documents files from the dumps 2025-07-17 17:12:51 +02:00
Clément Renault
338806283b Do not track meilisearch databases 2025-07-17 17:12:51 +02:00
Clément Renault
fe15e11c9d Introduce a new CLI and env var to use the old document indexer when
importing dumps
2025-07-17 17:12:51 +02:00
Clément Renault
f1d92bfead Make sure the new filter chat setting is set to it's default value if
missing
2025-07-17 15:36:21 +02:00
Clément Renault
a005a062da Add security if chat settings parameters are missing 2025-07-17 15:27:53 +02:00
Clément Renault
fd8b2451d7 Merge pull request #5754 from kametsun/fix/incorrect-stats-doc-count
Fix incorrect document count in stats after clearing all documents
2025-07-17 06:48:51 +00:00
Louis Dureuil
058f9ffda5 Merge pull request #5734 from meilisearch/request-fragments-test
Tests for multimodal
2025-07-16 11:04:00 +00:00
Louis Dureuil
5d363205a5 Merge pull request #5716 from meilisearch/document-sorting
Allow sorting on the /documents route
2025-07-16 10:26:50 +00:00
Mubelotix
a683faa882 Apply review suggestions 2025-07-16 11:03:24 +02:00
Tamo
421a23ee3d Merge pull request #3265 from LeSuisse/sign-container-image-cosign
Sign container image using Cosign in keyless mode
2025-07-16 08:54:57 +00:00
Thomas Gerbet
191ea340ed Sign container image using Cosign in keyless mode
Cosign keyless mode makes possible to sign the container image using the
OIDC Identity Tokens provided by GitHub Actions [0][1].
The signature is published to the registry storing the image and to the
public Rekor transparency log instance [2].

Cosign keyless mode has already been adopted by some major projects like
Kubernetes [3].

The image signature can be manually verified using:
```
$ cosign verify \
	--certificate-oidc-issuer='https://token.actions.githubusercontent.com' \
	--certificate-identity-regexp='^https://github.com/meilisearch/meilisearch/.github/workflows/publish-docker-images.yaml' \
	<image_name>
```

See #2179.
Note that a similar approach can be used to sign the release binaries.

[0] https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/about-security-hardening-with-openid-connect
[1] https://docs.sigstore.dev/cosign/signing/signing_with_containers/
[2] https://docs.sigstore.dev/rekor/overview
[3] https://kubernetes.io/docs/tasks/administer-cluster/verify-signed-artifacts/#verifying-image-signatures
2025-07-16 10:04:18 +02:00
Louis Dureuil
8887cbdcd5 Merge pull request #5725 from meilisearch/fix-threshold-overcounting-bug
Fix Total Hits being wrong when rankingScoreThreshold is used
2025-07-16 07:15:24 +00:00
Tamo
8d22972d84 Merge pull request #5626 from martin-g/faster-batches-it-tests
tests: Faster batches:: IT tests
2025-07-16 07:01:16 +00:00
Many the fish
634865ff53 Merge pull request #5710 from meilisearch/chat-route-support-filters
Introduce filters in the chat completions
2025-07-15 16:10:49 +00:00
Mubelotix
36fccf8525 Merge remote-tracking branch 'origin/release-v1.16.0' into fix-threshold-overcounting-bug 2025-07-15 18:01:29 +02:00
Mubelotix
d6bd60d569 Apply review suggestions
Co-Authored-By: Louis Dureuil <louis.dureuil@xinra.net>
2025-07-15 18:00:37 +02:00
Mubelotix
48ad959fc1 Merge remote-tracking branch 'origin/release-v1.16.0' into document-sorting 2025-07-15 17:41:46 +02:00
Mubelotix
1bc30cb4c8 Restore old benchmark names 2025-07-15 17:34:04 +02:00
Mubelotix
77138a42d6 Apply review suggestions
Add preconditions

Fix underflow

Remove unwrap

Turn methods to associated functions

Apply review suggestions
2025-07-15 17:31:11 +02:00
Kerollmops
0791506124 Fix some proposals 2025-07-15 17:10:45 +02:00
Kerollmops
2a015ac3b8 Implement basic few shot prompting to improve the query capabilities 2025-07-15 14:50:10 +02:00
Martin Grigorov
8772b5af87 Merge branch 'main' into faster-batches-it-tests 2025-07-15 15:21:32 +03:00
Clément Renault
6f248b78a9 Merge pull request #5751 from meilisearch/fix-searchable-attributes-order
Fix: Preserve order of searchable attributes when modified
2025-07-15 10:38:11 +00:00
Many the fish
d694e312ff Update crates/milli/src/update/settings.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-07-15 11:54:59 +02:00
Clément Renault
d76dcc8998 Make clippy happy 2025-07-15 11:49:48 +02:00
Clément Renault
e654f66223 Support filtering 2025-07-15 11:49:47 +02:00
Clément Renault
34f2ab7093 WIP report search errors to the LLM 2025-07-15 11:49:46 +02:00
Clément Renault
1a9dbd364e Fix some issues 2025-07-15 11:49:46 +02:00
Clément Renault
662c5d9871 Introduce filters in the chat completions 2025-07-15 11:49:45 +02:00
Tamo
df2e7cde53 Merge pull request #5703 from martin-g/all-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task()
2025-07-15 09:18:12 +00:00
Clément Renault
02b2ae6142 Merge pull request #5756 from meilisearch/fix-integration-test
Fix Rails CI
2025-07-15 07:38:06 +00:00
curquiza
f813eb7ca4 Fix 2025-07-13 12:35:54 +02:00
curquiza
d072edaa49 Fix Rails CI 2025-07-13 12:26:56 +02:00
kametsun
5cd61b50f9 Fix formatting 2025-07-12 18:19:26 +09:00
kametsun
9a9be76757 add: verify that the statistics are correctly update assert 2025-07-12 11:15:44 +09:00
kametsun
cfa6ba6c3b Fix stats showing wrong document count after clear all
Update database stats after clearing documents to ensure
/stats endpoint returns correct numberOfDocuments: 0 instead
of stale count.
2025-07-12 11:15:44 +09:00
Clément Renault
f4f333dbf6 Merge pull request #5753 from meilisearch/export-fixes
Various fixes on the export route
2025-07-11 19:15:42 +00:00
Mubelotix
1ade76ba10 Remove sneaky debug 2025-07-11 12:27:04 +02:00
Mubelotix
ae26658913 Use the most appropriate unit in payload_too_large error 2025-07-11 12:27:03 +02:00
Mubelotix
aa09edb3fb Fix errors being silently dropped 2025-07-11 12:27:03 +02:00
Mubelotix
3f42f1a036 Get rid of bearer 2025-07-11 12:27:03 +02:00
Mubelotix
9bdfdd395b Fix document step overflowing 2025-07-11 12:27:03 +02:00
Mubelotix
78d0625a91 Decrease default payload size for exports 2025-07-11 12:27:03 +02:00
Martin Tzvetanov Grigorov
e3daa907c5 Update redactions
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 11:14:39 +03:00
Martin Tzvetanov Grigorov
a39223822a More tests fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 11:11:46 +03:00
Martin Grigorov
1eb6cd38ce Merge branch 'main' into faster-batches-it-tests 2025-07-11 10:49:22 +03:00
Martin Tzvetanov Grigorov
eb6ad3ef9c Fix batch id detection
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 10:24:25 +03:00
Martin Tzvetanov Grigorov
3bef4f4413 Use Server::wait_task() instead of Index::wait_task()
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 10:16:25 +03:00
Martin Tzvetanov Grigorov
9f89881b0d More tests fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 10:11:58 +03:00
ManyTheFish
3f655ea20e compare user defined searchable fields instead of internal searchable fields 2025-07-10 18:24:23 +02:00
ManyTheFish
50bc1d55f3 Add test reproducing the bug 2025-07-10 18:23:46 +02:00
Martin Tzvetanov Grigorov
126aefc207 Fix more tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 16:47:04 +03:00
Martin Tzvetanov Grigorov
e7a60555d6 Formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:35:40 +03:00
Martin Tzvetanov Grigorov
ae912c4c3f Pass the Server as an extra parameter when the Index needs to wait for a task
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:28:57 +03:00
Martin Tzvetanov Grigorov
13ea29e511 Fix some search+replace issues. Make Server::wait_task() available for Index:: methods
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:03:16 +03:00
Martin Tzvetanov Grigorov
5342df26fe tests: Use Server::wait_task() instead of Index::wait_task()
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:03:15 +03:00
Tamo
61bc95e8d6 Merge pull request #5740 from meilisearch/ignore-flaky-test-2
Ignore yet another flaky test
2025-07-09 13:25:45 +00:00
Mubelotix
0a4f2ef891 Leak mock servers 2025-07-08 15:27:35 +02:00
Tamo
faa1f7c5b7 Merge pull request #5693 from Mubelotix/default-key
Add a Read-Only Admin API Key by default
2025-07-08 12:38:29 +00:00
Mubelotix
3cc5d86598 Format 2025-07-08 13:57:17 +02:00
Mubelotix
1ae47bec77 Improve composite test 2025-07-08 13:57:07 +02:00
Mubelotix
2f1be0ff86 Ignore faulty test (see #5746) 2025-07-08 13:55:07 +02:00
Mubelotix
9cee432255 Fix broken tests 2025-07-08 13:36:26 +02:00
Mubelotix
ff8d48d2f1 Merge branch 'main' into default-key 2025-07-08 12:21:46 +02:00
Mubelotix
a56c036994 Update crates/meilisearch-types/src/keys.rs
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2025-07-08 12:18:52 +02:00
Louis Dureuil
074744b8a6 Ignore yet-another flaky test 2025-07-08 10:54:39 +02:00
Tamo
511c48f520 Merge pull request #5737 from meilisearch/request-fragments-dumpless-upgrade
Fix the dumpless upgrade from v1.15 to v1.16 for request fragments
2025-07-08 08:49:38 +00:00
Louis Dureuil
4623691d1f Don't make the type-that-shall-not-be-written serializable
Following tamo's advice

Co-Authored-By: Tamo <tamo@meilisearch.com>
2025-07-08 10:04:33 +02:00
Mubelotix
3261aadcf2 Add composite test 2025-07-07 16:50:39 +02:00
Mubelotix
073e9f2967 Disable similarity check on composite embedders using fragments 2025-07-07 16:46:16 +02:00
Louis Dureuil
5f8f48ec95 Add new snapshot checking for regenerativeness 2025-07-07 16:43:05 +02:00
Louis Dureuil
ed2fe365a0 Fix existing snaps 2025-07-07 16:42:50 +02:00
Louis Dureuil
f7c8a77f89 Update v1.12.0 DB to contain vectors 2025-07-07 16:01:50 +02:00
Clément Renault
a8030850ee Merge pull request #5733 from meilisearch/improve-export-analytics
Improve the analytics of the `/export` route
2025-07-07 12:26:11 +00:00
Mubelotix
132065afda Minor improvements 2025-07-07 13:10:16 +02:00
Mubelotix
51c298662b Merge branch 'main' into request-fragments-test 2025-07-07 13:00:21 +02:00
Mubelotix
70a860a0f0 Merge branch 'main' into fix-threshold-overcounting-bug 2025-07-07 12:26:37 +02:00
Louis Dureuil
a3254d7d7d Implement dumpless upgrade from v1.15 to v1.16 2025-07-07 11:57:08 +02:00
Louis Dureuil
73c9c1ebdc Add compile-time checks for dumpless upgrade 2025-07-07 11:34:18 +02:00
Clément Renault
4c7a6e5c1b Do not leak private URLs 2025-07-07 11:07:58 +02:00
Mubelotix
fa3990daf9 Format 2025-07-04 13:33:49 +02:00
Mubelotix
c5993196b3 Add test 2025-07-04 13:32:55 +02:00
Mubelotix
16234e1313 Add fragment swapping test 2025-07-04 13:25:42 +02:00
Mubelotix
be9f4f96df Add experimental feature test 2025-07-04 13:15:15 +02:00
Mubelotix
b274106ad3 Add test 2025-07-04 13:05:52 +02:00
Mubelotix
48527761e7 Add test 2025-07-04 12:01:15 +02:00
Mubelotix
6792d048b8 Test both fragments and document template 2025-07-04 11:47:38 +02:00
Kerollmops
07bfed99e6 Expose the host in the analytics 2025-07-04 11:08:02 +02:00
Mubelotix
8dfded2993 Update tests 2025-07-04 10:49:03 +02:00
Mubelotix
3714f16696 Fix bug 2025-07-04 10:40:50 +02:00
Mubelotix
d0cd3cacec Add a way to reproduce the bug 2025-07-03 18:18:04 +02:00
Mubelotix
caccb51814 Add a complex value test 2025-07-03 16:10:23 +02:00
Mubelotix
cf9b311f71 Format 2025-07-03 15:53:09 +02:00
Mubelotix
7423243be0 Add test with multiple embedders 2025-07-03 15:52:18 +02:00
Mubelotix
5690700601 Add fragment addition test 2025-07-03 15:19:31 +02:00
Mubelotix
2faad504c6 Add test 2025-07-03 15:12:47 +02:00
Mubelotix
2bcd69750f Add fragment modification test 2025-07-03 15:08:27 +02:00
Mubelotix
de24e75be8 Update test 2025-07-03 15:00:11 +02:00
Louis Dureuil
a3af9fe057 new extractor bugfixes:
- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
2025-07-03 14:44:34 +02:00
Louis Dureuil
90683d0e4e add snapshot of get settings 2025-07-03 14:43:06 +02:00
Mubelotix
5c79273748 Add TODOs 2025-07-03 14:42:49 +02:00
Mubelotix
b45eea0d3e Add test for fragment deletion 2025-07-03 13:26:44 +02:00
Mubelotix
0b89ef1fd7 Make tests use a shared index 2025-07-03 11:32:49 +02:00
Mubelotix
65ba7b47af Test search fragments 2025-07-03 11:32:49 +02:00
Mubelotix
8af76a65bf Add test_fragment_indexing 2025-07-03 11:32:49 +02:00
Mubelotix
f60814b319 Add benchmark 2025-07-02 12:06:00 +02:00
Mubelotix
5a675bcb82 Add benchmarks 2025-07-02 11:50:32 +02:00
Mubelotix
600178c5ab Still limit to max hits 2025-07-01 18:33:09 +02:00
Mubelotix
dedae94102 Fix #5274 2025-07-01 16:22:25 +02:00
Mubelotix
7ae9a4afee Add a test for issue #5274 2025-07-01 15:42:43 +02:00
Mubelotix
e92b6beb20 Revert making check_sort_criteria usable without a search context 2025-07-01 14:26:55 +02:00
Mubelotix
27cc357362 Document code 2025-07-01 14:21:55 +02:00
Mubelotix
73dfeefc7c Remove plural form 2025-07-01 14:08:46 +02:00
Mubelotix
d85480de89 Move sort code out of facet 2025-07-01 14:05:47 +02:00
Mubelotix
9f55708d84 Format 2025-07-01 13:58:56 +02:00
Mubelotix
280c3907be Add test to sort the unsortable 2025-07-01 13:58:37 +02:00
Mubelotix
8419fd9b3b Ditch usage of check_sort_criteria 2025-07-01 13:42:38 +02:00
Mubelotix
283944ea89 Differentiate between document sort error and search sort error 2025-07-01 12:03:50 +02:00
Mubelotix
8aacd6374a Optimize geo sort 2025-07-01 11:50:01 +02:00
Mubelotix
8326f34ad1 Add analytics 2025-07-01 11:35:28 +02:00
Mubelotix
f4a908669c Add tests 2025-07-01 10:02:15 +02:00
Mubelotix
eb2c2815b6 Fix panic 2025-07-01 10:00:10 +02:00
Mubelotix
29e9c74a49 Merge two ifs 2025-06-30 16:17:04 +02:00
Mubelotix
f6803dd7d1 Simplify iterator chaining in facet sort 2025-06-30 14:05:23 +02:00
Mubelotix
f86f4f619f Implement geo sort on documents 2025-06-30 13:57:30 +02:00
Mubelotix
e35d58b531 Move geosort code out of search 2025-06-30 13:12:00 +02:00
Mubelotix
63827bbee0 Move sorting code out of search 2025-06-30 11:59:59 +02:00
Mubelotix
340d9e6edc Optimize facet sort
5 to 10x speedup
2025-06-27 14:40:55 +02:00
Mubelotix
28adbc0d18 Update tests 2025-06-27 09:47:46 +02:00
Mubelotix
e3fba62e13 Fix typo 2025-06-27 09:40:59 +02:00
Mubelotix
fb9170b8e3 Keep name consistent with others 2025-06-27 09:40:30 +02:00
Mubelotix
c15763f910 Improve key description
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-06-27 09:39:24 +02:00
Mubelotix
4534dc2cab Create another deserr error 2025-06-25 16:45:32 +02:00
Mubelotix
b05cb80803 Take sort criteria from the request 2025-06-25 16:41:08 +02:00
Mubelotix
6e0526090a Implement sorting documents 2025-06-25 15:36:12 +02:00
Mubelotix
2090e9ea31 Update test 2025-06-25 10:08:25 +02:00
Mubelotix
1c8f1c18f4 Fix constant name and key description 2025-06-25 09:59:34 +02:00
Mubelotix
c4a96b40eb Remove KeysGet from AllGet 2025-06-24 17:40:06 +02:00
Mubelotix
2d6dc83940 Format the code 2025-06-19 15:55:12 +02:00
Mubelotix
ab768f379f Fix comment 2025-06-19 15:49:34 +02:00
Mubelotix
705e9a9e5e Make the uuids random again to prevent abuse using rainbow tables 2025-06-19 15:45:09 +02:00
Mubelotix
67f2a30d7c Fix test 2025-06-19 13:10:08 +02:00
Mubelotix
99732f4084 Fix some tests 2025-06-19 13:04:55 +02:00
Mubelotix
5081d837ea Fix AllGet action being included in All 2025-06-19 12:12:30 +02:00
Mubelotix
9e1cb792f4 Rename Action::AllRead to AllGet 2025-06-19 11:55:25 +02:00
Mubelotix
b6b7ede266 Rename Action *.read to *.get 2025-06-19 11:53:42 +02:00
Mubelotix
f50e586a4f Allow management key to read other keys 2025-06-19 11:52:58 +02:00
Mubelotix
11fedea788 Set static uuids to keys 2025-06-19 11:42:45 +02:00
Mubelotix
032b34c377 Add a default management key 2025-06-19 11:29:32 +02:00
Mubelotix
b421c8e7de Add an AllRead key 2025-06-19 11:29:16 +02:00
Mubelotix
00eb258a53 Fix comment 2025-06-19 11:16:07 +02:00
Martin Tzvetanov Grigorov
9e31d6ceff Add batch_uid to all successful and failed tasks too
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:48 +03:00
Martin Tzvetanov Grigorov
139ec8c782 Add task.batch_uid() helper method
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:48 +03:00
Martin Tzvetanov Grigorov
2691999bd3 Add a helper method for getting the latest batch
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:47 +03:00
Martin Tzvetanov Grigorov
48460678df More assertion fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:47 +03:00
Martin Tzvetanov Grigorov
cb15e5c67e WIP: More snapshot updates
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:46 +03:00
Martin Tzvetanov Grigorov
7380808b26 tests: Faster batches:: IT tests
Use shared server + unique indices where possible

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:46 +03:00
181 changed files with 8910 additions and 2700 deletions

View File

@@ -1,28 +1,26 @@
---
name: New sprint issue
about: ⚠️ Should only be used by the engine team ⚠️
name: New feature issue
about: ⚠️ Should only be used by the internal Meili team ⚠️
title: ''
labels: 'missing usage in PRD, impacts docs'
labels: 'impacts docs, impacts integrations'
assignees: ''
---
Related product team resources: [PRD]() (_internal only_)
Related product discussion:
## Motivation
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
## Usage
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
TBD
## TODO
<!---If necessary, create a list with technical/product steps-->
### Are you modifying a database?
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
@@ -54,5 +52,5 @@ Related product discussion:
## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->
<!---Ping the related teams. Ask on Slack if any hesitation-->
<!---@meilisearch/docs-team and @meilisearch/integration-team when there is any API change, e.g. settings addition-->

16
.github/pull_request_template.md vendored Normal file
View File

@@ -0,0 +1,16 @@
## Related issue
Fixes #...
## Requirements
⚠️ Ensure the following requirements before merging ⚠️
- [ ] Automated tests have been added.
- [ ] If some tests cannot be automated, manual rigorous tests should be applied.
- [ ] ⚠️ If there is any change in the DB:
- [ ] Test that any impacted DB still works as expected after using `--experimental-dumpless-upgrade` on a DB created with the last released Meilisearch
- [ ] Test that during the upgrade, **search is still available** (artificially make the upgrade longer if needed)
- [ ] Set the `db change` label.
- [ ] If necessary, the feature have been tested in the Cloud production environment (with [prototypes](./documentation/prototypes.md)) and the Cloud UI is ready.
- [ ] If necessary, the [documentation](https://github.com/meilisearch/documentation) related to the implemented feature in the PR is ready.
- [ ] If necessary, the [integrations](https://github.com/meilisearch/integration-guides) related to the implemented feature in the PR are ready.

33
.github/release-draft-template.yml vendored Normal file
View File

@@ -0,0 +1,33 @@
name-template: 'v$RESOLVED_VERSION'
tag-template: 'v$RESOLVED_VERSION'
exclude-labels:
- 'skip changelog'
version-resolver:
minor:
labels:
- 'enhancement'
default: patch
categories:
- title: '⚠️ Breaking changes'
label: 'breaking-change'
- title: '🚀 Enhancements'
label: 'enhancement'
- title: '🐛 Bug Fixes'
label: 'bug'
- title: '🔒 Security'
label: 'security'
- title: '⚙️ Maintenance/misc'
label:
- 'maintenance'
- 'documentation'
template: |
$CHANGES
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
no-changes-template: 'Changes are coming soon 😎'
sort-direction: 'ascending'
replacers:
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
replace: ''
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
replace: ''

22
.github/templates/dependency-issue.md vendored Normal file
View File

@@ -0,0 +1,22 @@
This issue is about updating Meilisearch dependencies:
- [ ] Update Meilisearch dependencies with the help of `cargo +nightly udeps --all-targets` (remove unused dependencies) and `cargo upgrade` (upgrade dependencies versions) - ⚠️ Some repositories may contain subdirectories (like heed, charabia, or deserr). Take care of updating these in the main crate as well. This won't be done automatically by `cargo upgrade`.
- [ ] [deserr](https://github.com/meilisearch/deserr)
- [ ] [charabia](https://github.com/meilisearch/charabia/)
- [ ] [heed](https://github.com/meilisearch/heed/)
- [ ] [roaring-rs](https://github.com/RoaringBitmap/roaring-rs/)
- [ ] [obkv](https://github.com/meilisearch/obkv)
- [ ] [grenad](https://github.com/meilisearch/grenad/)
- [ ] [arroy](https://github.com/meilisearch/arroy/)
- [ ] [segment](https://github.com/meilisearch/segment)
- [ ] [bumparaw-collections](https://github.com/meilisearch/bumparaw-collections)
- [ ] [bbqueue](https://github.com/meilisearch/bbqueue)
- [ ] Finally, [Meilisearch](https://github.com/meilisearch/MeiliSearch)
- [ ] If new Rust versions have been released, update the minimal Rust version in use at Meilisearch:
- [ ] in this [GitHub Action file](https://github.com/meilisearch/meilisearch/blob/main/.github/workflows/test-suite.yml), by changing the `toolchain` field of the `rustfmt` job to the latest available nightly (of the day before or the current day).
- [ ] in every [GitHub Action files](https://github.com/meilisearch/meilisearch/blob/main/.github/workflows), by changing all the `dtolnay/rust-toolchain@` references to use the latest stable version.
- [ ] in this [`rust-toolchain.toml`](https://github.com/meilisearch/meilisearch/blob/main/rust-toolchain.toml), by changing the `channel` field to the latest stable version.
- [ ] in the [Dockerfile](https://github.com/meilisearch/meilisearch/blob/main/Dockerfile), by changing the base image to `rust:<target_rust_version>-alpine<alpine_version>`. Check that the image exists on [Dockerhub](https://hub.docker.com/_/rust/tags?page=1&name=alpine). Also, build and run the image to check everything still works!
⚠️ This issue should be prioritized to avoid any deprecation and vulnerability issues.
The GitHub action dependencies are managed by [Dependabot](https://github.com/meilisearch/meilisearch/blob/main/.github/dependabot.yml), so no need to update them when solving this issue.

View File

@@ -1,100 +0,0 @@
name: PR Milestone Check
on:
pull_request:
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
branches:
- "main"
- "release-v*.*.*"
jobs:
check-milestone:
name: Check PR Milestone
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Validate PR milestone
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Get PR number directly from the event payload
const prNumber = context.payload.pull_request.number;
// Get PR details
const { data: prData } = await github.rest.pulls.get({
owner: 'meilisearch',
repo: 'meilisearch',
pull_number: prNumber
});
// Get base branch name
const baseBranch = prData.base.ref;
console.log(`Base branch: ${baseBranch}`);
// Get PR milestone
const prMilestone = prData.milestone;
if (!prMilestone) {
core.setFailed('PR must have a milestone assigned');
return;
}
console.log(`PR milestone: ${prMilestone.title}`);
// Validate milestone format: vx.y.z
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
if (!milestoneRegex.test(prMilestone.title)) {
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
return;
}
// For main branch PRs, check if the milestone is the highest one
if (baseBranch === 'main') {
// Get all milestones
const { data: milestones } = await github.rest.issues.listMilestones({
owner: 'meilisearch',
repo: 'meilisearch',
state: 'open',
sort: 'due_on',
direction: 'desc'
});
// Sort milestones by version number (vx.y.z)
const sortedMilestones = milestones
.filter(m => milestoneRegex.test(m.title))
.sort((a, b) => {
const versionA = a.title.substring(1).split('.').map(Number);
const versionB = b.title.substring(1).split('.').map(Number);
// Compare major version
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
// Compare minor version
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
// Compare patch version
return versionB[2] - versionA[2];
});
if (sortedMilestones.length === 0) {
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
return;
}
const highestMilestone = sortedMilestones[0];
console.log(`Highest milestone: ${highestMilestone.title}`);
if (prMilestone.title !== highestMilestone.title) {
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
return;
}
} else {
// For release branches, the milestone should match the branch version
const branchVersion = baseBranch.substring(8); // remove 'release-'
if (prMilestone.title !== branchVersion) {
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
return;
}
}
console.log('PR milestone validation passed!');

View File

@@ -15,7 +15,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
run: curl -s https://raw.githubusercontent.com/meilisearch/meilisearch/main/.github/templates/dependency-issue.md > $ISSUE_TEMPLATE
- name: Create issue
run: |
gh issue create \

View File

@@ -3,7 +3,7 @@ name: Look for flaky tests
on:
workflow_dispatch:
schedule:
- cron: "0 12 * * FRI" # Every Friday at 12:00PM
- cron: '0 4 * * *' # Every day at 4:00AM
jobs:
flaky:

View File

@@ -1,224 +0,0 @@
name: Milestone's workflow
# /!\ No git flow are handled here
# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
# - the roadmap issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/roadmap-issue.md
# - the changelog issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/changelog-issue.md
# - update the ruleset to add the current release version to the list of allowed versions and be able to use the merge queue.
# For each Milestone closed
# - the `release_version` label is created
# - this label is applied to all issues/PRs in the Milestone
on:
milestone:
types: [created, closed]
env:
MILESTONE_VERSION: ${{ github.event.milestone.title }}
MILESTONE_URL: ${{ github.event.milestone.html_url }}
MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }}
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
# -----------------
# MILESTONE CREATED
# -----------------
get-release-version:
if: github.event.action == 'created'
runs-on: ubuntu-latest
outputs:
is-patch: ${{ steps.check-patch.outputs.is-patch }}
steps:
- uses: actions/checkout@v3
- name: Check if this release is a patch release only
id: check-patch
run: |
echo version: $MILESTONE_VERSION
if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then
echo 'This is NOT a patch release'
echo "is-patch=false" >> $GITHUB_OUTPUT
elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo 'This is a patch release'
echo "is-patch=true" >> $GITHUB_OUTPUT
else
echo "Not a valid format of release, check the Milestone's title."
echo 'Should be vX.Y.Z'
exit 1
fi
create-roadmap-issue:
needs: get-release-version
# Create the roadmap issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
# Replace all <<milestone_id>> occurrences
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
# Replace release date if exists
if [[ ! -z $MILESTONE_DUE_ON ]]; then
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE
fi
- name: Create the issue
run: |
gh issue create \
--title "$MILESTONE_VERSION ROADMAP" \
--label 'epic,impacts docs,impacts integrations,impacts cloud' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
create-changelog-issue:
needs: get-release-version
# Create the changelog issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
# Replace all <<milestone_id>> occurrences
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Create release changelogs for $MILESTONE_VERSION" \
--label 'impacts docs,documentation' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION \
--assignee curquiza
create-update-version-issue:
needs: get-release-version
# Create the update-version issue even if the release is a patch release
if: github.event.action == 'created'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
create-update-openapi-issue:
needs: get-release-version
# Create the openAPI issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-openapi-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update Open API file for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
update-ruleset:
runs-on: ubuntu-latest
if: github.event.action == 'created'
steps:
- uses: actions/checkout@v3
- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq
- name: Update ruleset
env:
# gh api repos/meilisearch/meilisearch/rulesets --jq '.[] | {name: .name, id: .id}'
RULESET_ID: 4253297
BRANCH_NAME: ${{ github.event.inputs.branch_name }}
run: |
echo "RULESET_ID: ${{ env.RULESET_ID }}"
echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
# Get current ruleset conditions
CONDITIONS=$(gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} --jq '{ conditions: .conditions }')
# Update the conditions by appending the milestone version
UPDATED_CONDITIONS=$(echo $CONDITIONS | jq '.conditions.ref_name.include += ["refs/heads/release-'${{ env.MILESTONE_VERSION }}'"]')
# Update the ruleset from stdin (-)
echo $UPDATED_CONDITIONS |
gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} \
--method PUT \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
--input -
# ----------------
# MILESTONE CLOSED
# ----------------
create-release-label:
if: github.event.action == 'closed'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Create the ${{ env.MILESTONE_VERSION }} label
run: |
label_description="PRs/issues solved in $MILESTONE_VERSION"
if [[ ! -z $MILESTONE_DUE_ON ]]; then
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
label_description="$label_description released on $date"
fi
gh api repos/meilisearch/meilisearch/labels \
--method POST \
-H "Accept: application/vnd.github+json" \
-f name="$MILESTONE_VERSION" \
-f description="$label_description" \
-f color='ff5ba3'
labelize-all-milestone-content:
if: github.event.action == 'closed'
needs: create-release-label
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone
run: |
prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
for pr in $prs; do
gh pr edit $pr --add-label $MILESTONE_VERSION
done
- name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone
run: |
issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
for issue in $issues; do
gh issue edit $issue --add-label $MILESTONE_VERSION
done

View File

@@ -32,7 +32,7 @@ jobs:
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.11.1
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb

View File

@@ -51,7 +51,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.1
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
@@ -81,7 +81,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.1
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
@@ -113,7 +113,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.1
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@@ -178,7 +178,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.1
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@@ -16,6 +16,8 @@ on:
jobs:
docker:
runs-on: docker
permissions:
id-token: write # This is needed to use Cosign in keyless mode
steps:
- uses: actions/checkout@v3
@@ -62,6 +64,9 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install cosign
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
@@ -85,6 +90,7 @@ jobs:
- name: Build and push
uses: docker/build-push-action@v6
id: build-and-push
with:
push: true
platforms: linux/amd64,linux/arm64
@@ -94,6 +100,17 @@ jobs:
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
- name: Sign the images with GitHub OIDC Token
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
TAGS: ${{ steps.meta.outputs.tags }}
run: |
images=""
for tag in ${TAGS}; do
images+="${tag}@${DIGEST} "
done
cosign sign --yes ${images}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)

20
.github/workflows/release-drafter.yml vendored Normal file
View File

@@ -0,0 +1,20 @@
name: Release Drafter
permissions:
contents: read
pull-requests: write
on:
push:
branches:
- main
jobs:
update_release_draft:
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v6
with:
config-name: release-draft-template.yml
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFTER_TOKEN }}

View File

@@ -9,7 +9,7 @@ on:
required: false
default: nightly
schedule:
- cron: "0 6 * * MON" # Every Monday at 6:00AM
- cron: '0 6 * * *' # Every day at 6:00am
env:
MEILI_MASTER_KEY: 'masterKey'
@@ -114,7 +114,7 @@ jobs:
dep ensure
fi
- name: Run integration tests
run: go test -v ./...
run: go test --race -v ./integration
meilisearch-java-tests:
needs: define-docker-image
@@ -344,15 +344,23 @@ jobs:
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
env:
RAILS_VERSION: '7.0'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
- name: Install SQLite dependencies
run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev
- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: 3
bundler-cache: true
- name: Start MongoDB
uses: supercharge/mongodb-github-action@1.12.0
with:
mongodb-version: 8.0
- name: Run tests
run: bundle exec rspec

View File

@@ -3,7 +3,7 @@ name: Test suite
on:
workflow_dispatch:
schedule:
# Everyday at 5:00am
# Every day at 5:00am
- cron: "0 5 * * *"
pull_request:
merge_group:

10
.gitignore vendored
View File

@@ -5,18 +5,24 @@
**/*.json_lines
**/*.rs.bk
/*.mdb
/data.ms
/*.ms
/snapshots
/dumps
/bench
/_xtask_benchmark.ms
/benchmarks
.DS_Store
# Snapshots
## ... large
*.full.snap
## ... unreviewed
## ... unreviewed
*.snap.new
## ... pending
*.pending-snap
# Tmp files
.tmp*
# Database snapshot
crates/meilisearch/db.snapshot

View File

@@ -106,7 +106,13 @@ Run `cargo xtask --help` from the root of the repository to find out what is ava
#### Update the openAPI file if the APIchanged
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api):
- Pull the latest version of the latest rc of Meilisearch `git checkout release-vX.Y.Z; git pull`
- Starts Meilisearch with the `swagger` feature flag: `cargo run --features swagger`
- On a browser, open the following URL: http://localhost:7700/scalar
- Click the « Download openAPI file »
- Open a PR replacing [this file](https://github.com/meilisearch/open-api/blob/main/open-api.json) with the one downloaded
### Logging
@@ -160,25 +166,37 @@ Some notes on GitHub PRs:
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
## Release Process (for internal team only)
Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org/).
### Automation to rebase and Merge the PRs
## Merging PRs
This project uses GitHub Merge Queues that helps us manage pull requests merging.
### How to Publish a new Release
Before merging a PR, the maintainer should ensure the following requirements are met
- Automated tests have been added.
- If some tests cannot be automated, manual rigorous tests should be applied.
- ⚠️ If there is an change in the DB: it's mandatory to manually test the `--experimental-dumpless-upgrade` on a DB of the previous Meilisearch minor version (e.g. v1.13 for the v1.14 release).
- If necessary, the feature have been tested in the Cloud production environment (with [prototypes](./documentation/prototypes.md)) and the Cloud UI is ready.
- If necessary, the [documentation](https://github.com/meilisearch/documentation) related to the implemented feature in the PR is ready.
- If necessary, the [integrations](https://github.com/meilisearch/integration-guides) related to the implemented feature in the PR are ready.
The full Meilisearch release process is described in [this guide](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md). Please follow it carefully before doing any release.
## Publish Process (for internal team only)
Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org/).
### How to publish a new release
The full Meilisearch release process is described in [this guide](./documentation/release.md).
### How to publish a prototype
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
This happens in two steps:
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
- [Release the prototype](./documentation/prototypes.md#how-to-publish-a-prototype)
- [Communicate about it](./documentation/prototypes.md#communication)
### How to implement and publish an experimental feature
Here is our [guidelines and process](./documentation/experimental-features.md) to implement and publish an experimental feature.
### Release assets

59
Cargo.lock generated
View File

@@ -444,12 +444,12 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arroy"
version = "0.7.0"
source = "git+https://github.com/meilisearch/arroy.git?rev=a63f0979b216dde10d50fdfa4fadcb2b1dea73c7#a63f0979b216dde10d50fdfa4fadcb2b1dea73c7"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6"
dependencies = [
"bytemuck",
"byteorder",
"crossbeam",
"enum-iterator",
"heed",
"memmap2",
@@ -461,7 +461,6 @@ dependencies = [
"roaring",
"tempfile",
"thiserror 2.0.12",
"thread_local",
"tracing",
]
@@ -581,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
[[package]]
name = "benchmarks"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"anyhow",
"bumpalo",
@@ -771,7 +770,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"anyhow",
"time",
@@ -1368,19 +1367,6 @@ dependencies = [
"itertools 0.10.5",
]
[[package]]
name = "crossbeam"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
"crossbeam-queue",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
@@ -1788,7 +1774,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"anyhow",
"big_s",
@@ -2020,7 +2006,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-store"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"tempfile",
"thiserror 2.0.12",
@@ -2042,7 +2028,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"insta",
"nom",
@@ -2063,7 +2049,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"criterion",
"serde_json",
@@ -2208,7 +2194,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"arbitrary",
"bumpalo",
@@ -3008,7 +2994,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"anyhow",
"backoff",
@@ -3244,7 +3230,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"criterion",
"serde_json",
@@ -3738,7 +3724,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"insta",
"md5",
@@ -3749,7 +3735,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -3789,6 +3775,7 @@ dependencies = [
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"memmap2",
"mimalloc",
"mime",
"mopa-maintained",
@@ -3844,7 +3831,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@@ -3863,7 +3850,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"actix-web",
"anyhow",
@@ -3898,7 +3885,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"anyhow",
"clap",
@@ -3922,9 +3909,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "memmap2"
version = "0.9.5"
version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28"
dependencies = [
"libc",
"stable_deref_trait",
@@ -3932,7 +3919,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"allocator-api2 0.3.0",
"arroy",
@@ -4484,7 +4471,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "permissive-json-pointer"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"big_s",
"serde_json",
@@ -7272,7 +7259,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.16.0"
version = "1.17.0"
dependencies = [
"anyhow",
"build-info",

View File

@@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.16.0"
version = "1.17.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -119,6 +119,6 @@ Meilisearch is, and will always be, open-source! If you want to contribute to th
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](./documentation/versioning-policy.md).
Differently from the binaries, crates in this repository are not currently available on [crates.io](https://crates.io/) and do not follow [SemVer conventions](https://semver.org).

View File

@@ -14,7 +14,7 @@ license.workspace = true
anyhow = "1.0.98"
bumpalo = "3.18.1"
csv = "1.3.1"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
milli = { path = "../milli" }
mimalloc = { version = "0.1.47", default-features = false }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
@@ -51,3 +51,11 @@ harness = false
[[bench]]
name = "indexing"
harness = false
[[bench]]
name = "sort"
harness = false
[[bench]]
name = "filter_starts_with"
harness = false

View File

@@ -0,0 +1,66 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use milli::FilterableAttributesRule;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields = ["geonameid", "name"].iter().map(|s| s.to_string()).collect();
builder.set_displayed_fields(displayed_fields);
let filterable_fields =
["name"].iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
builder.set_filterable_fields(filterable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: "jsonl",
queries: &[
"",
],
configure: base_conf,
primary_key: Some("geonameid"),
..Conf::BASE
};
fn filter_starts_with(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
utils::Conf {
group_name: "1 letter",
filter: Some("name STARTS WITH e"),
..BASE_CONF
},
utils::Conf {
group_name: "2 letters",
filter: Some("name STARTS WITH es"),
..BASE_CONF
},
utils::Conf {
group_name: "3 letters",
filter: Some("name STARTS WITH est"),
..BASE_CONF
},
utils::Conf {
group_name: "6 letters",
filter: Some("name STARTS WITH estoni"),
..BASE_CONF
}
];
utils::run_benches(c, confs);
}
criterion_group!(benches, filter_starts_with);
criterion_main!(benches);

View File

@@ -0,0 +1,114 @@
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
//!
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let sortable_fields =
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_sortable_fields(sortable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: "jsonl",
configure: base_conf,
primary_key: Some("geonameid"),
queries: &[""],
offsets: &[
Some((0, 20)), // The most common query in the real world
Some((0, 500)), // A query that ranges over many documents
Some((980, 20)), // The worst query that could happen in the real world
Some((800_000, 20)) // The worst query
],
get_documents: true,
..Conf::BASE
};
fn bench_sort(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
utils::Conf {
group_name: "without sort",
sort: None,
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values",
sort: Some(vec!["name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values",
sort: Some(vec!["timezone:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar then different values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different then similar values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "geo sort",
sample_size: Some(10),
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values then geo sort",
sample_size: Some(50),
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values then geo sort",
sample_size: Some(50),
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many fields",
sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]),
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_sort);
criterion_main!(benches);

View File

@@ -9,6 +9,7 @@ use anyhow::Context;
use bumpalo::Bump;
use criterion::BenchmarkId;
use memmap2::Mmap;
use milli::documents::sort::recursive_sort;
use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
@@ -35,6 +36,12 @@ pub struct Conf<'a> {
pub configure: fn(&mut Settings),
pub filter: Option<&'a str>,
pub sort: Option<Vec<&'a str>>,
/// set to skip documents (offset, limit)
pub offsets: &'a [Option<(usize, usize)>],
/// enable if you want to bench getting documents without querying
pub get_documents: bool,
/// configure the benchmark sample size
pub sample_size: Option<usize>,
/// enable or disable the optional words on the query
pub optional_words: bool,
/// primary key, if there is None we'll auto-generate docids for every documents
@@ -52,6 +59,9 @@ impl Conf<'_> {
configure: |_| (),
filter: None,
sort: None,
offsets: &[None],
get_documents: false,
sample_size: None,
optional_words: true,
primary_key: None,
};
@@ -145,25 +155,79 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
let name = format!("{}: {}", file_name, conf.group_name);
let mut group = c.benchmark_group(&name);
if let Some(sample_size) = conf.sample_size {
group.sample_size(sample_size);
}
for &query in conf.queries {
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
let _ids = search.execute().unwrap();
});
});
for offset in conf.offsets {
let parameter = match offset {
None => query.to_string(),
Some((offset, limit)) => format!("{query}[{offset}:{limit}]"),
};
group.bench_with_input(
BenchmarkId::from_parameter(parameter),
&query,
|b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search
.query(query)
.terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
if let Some((offset, limit)) = offset {
search.offset(*offset).limit(*limit);
}
let _ids = search.execute().unwrap();
});
},
);
}
}
if conf.get_documents {
for offset in conf.offsets {
let parameter = match offset {
None => String::from("get_documents"),
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
};
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
let all_docs = index.documents_ids(&rtxn).unwrap();
let facet_sort =
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
let iter = facet_sort.iter().unwrap();
if let Some((offset, limit)) = offset {
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = iter.collect::<Vec<_>>();
}
} else {
let all_docs = index.documents_ids(&rtxn).unwrap();
if let Some((offset, limit)) = offset {
let _results =
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = all_docs.iter().collect::<Vec<_>>();
}
}
});
});
}
}
group.finish();
index.prepare_for_closing().wait();

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::str::FromStr;
use super::v2_to_v3::CompatV2ToV3;
@@ -94,6 +95,10 @@ impl CompatIndexV1ToV2 {
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
}
pub fn documents_file(&self) -> &File {
self.from.documents_file()
}
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
}

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::str::FromStr;
use time::OffsetDateTime;
@@ -122,6 +123,13 @@ impl CompatIndexV2ToV3 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV2ToV3::V2(v2) => v2.documents_file(),
CompatIndexV2ToV3::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
let settings = match self {
CompatIndexV2ToV3::V2(from) => from.settings()?,

View File

@@ -1,3 +1,5 @@
use std::fs::File;
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
use super::v4_to_v5::CompatV4ToV5;
use crate::reader::{v3, v4, UpdateFile};
@@ -252,6 +254,13 @@ impl CompatIndexV3ToV4 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV3ToV4::V3(v3) => v3.documents_file(),
CompatIndexV3ToV4::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
Ok(match self {
CompatIndexV3ToV4::V3(v3) => {

View File

@@ -1,3 +1,5 @@
use std::fs::File;
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
use super::v5_to_v6::CompatV5ToV6;
use crate::reader::{v4, v5, Document};
@@ -241,6 +243,13 @@ impl CompatIndexV4ToV5 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV4ToV5::V4(v4) => v4.documents_file(),
CompatIndexV4ToV5::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
match self {
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::num::NonZeroUsize;
use std::str::FromStr;
@@ -201,6 +202,10 @@ impl CompatV5ToV6 {
pub fn network(&self) -> Result<Option<&v6::Network>> {
Ok(None)
}
pub fn webhooks(&self) -> Option<&v6::Webhooks> {
None
}
}
pub enum CompatIndexV5ToV6 {
@@ -243,6 +248,13 @@ impl CompatIndexV5ToV6 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV5ToV6::V5(v5) => v5.documents_file(),
CompatIndexV5ToV6::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),

View File

@@ -138,6 +138,13 @@ impl DumpReader {
DumpReader::Compat(compat) => compat.network(),
}
}
pub fn webhooks(&self) -> Option<&v6::Webhooks> {
match self {
DumpReader::Current(current) => current.webhooks(),
DumpReader::Compat(compat) => compat.webhooks(),
}
}
}
impl From<V6Reader> for DumpReader {
@@ -192,6 +199,14 @@ impl DumpIndexReader {
}
}
/// A reference to a file in the NDJSON format containing all the documents of the index
pub fn documents_file(&self) -> &File {
match self {
DumpIndexReader::Current(v6) => v6.documents_file(),
DumpIndexReader::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
DumpIndexReader::Current(v6) => v6.settings(),
@@ -357,6 +372,7 @@ pub(crate) mod test {
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
assert_eq!(dump.network().unwrap(), None);
assert_eq!(dump.webhooks(), None);
}
#[test]
@@ -427,6 +443,43 @@ pub(crate) mod test {
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
}
#[test]
fn import_dump_v6_webhooks() {
let dump = File::open("tests/assets/v6-with-webhooks.dump").unwrap();
let dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2025-07-31 9:21:30.479544 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @r"
Some(
cb887dcc-34b3-48d1-addd-9815ae721a81,
)
");
// webhooks
let webhooks = dump.webhooks().unwrap();
insta::assert_json_snapshot!(webhooks, @r#"
{
"webhooks": {
"627ea538-733d-4545-8d2d-03526eb381ce": {
"url": "https://example.com/authorization-less",
"headers": {}
},
"771b0a28-ef28-4082-b984-536f82958c65": {
"url": "https://example.com/hook",
"headers": {
"authorization": "TOKEN"
}
},
"f3583083-f8a7-4cbf-a5e7-fb3f1e28a7e9": {
"url": "https://third.com",
"headers": {}
}
}
}
"#);
}
#[test]
fn import_dump_v5() {
let dump = File::open("tests/assets/v5.dump").unwrap();

View File

@@ -72,6 +72,10 @@ impl V1IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<self::settings::Settings> {
Ok(serde_json::from_reader(&mut self.settings)?)
}

View File

@@ -203,6 +203,10 @@ impl V2IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -215,6 +215,10 @@ impl V3IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -210,6 +210,10 @@ impl V4IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -247,6 +247,10 @@ impl V5IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -25,6 +25,7 @@ pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network;
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
@@ -59,6 +60,7 @@ pub struct V6Reader {
keys: BufReader<File>,
features: Option<RuntimeTogglableFeatures>,
network: Option<Network>,
webhooks: Option<Webhooks>,
}
impl V6Reader {
@@ -93,8 +95,8 @@ impl V6Reader {
Err(e) => return Err(e.into()),
};
let network_file = match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(network_file),
let network = match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled.
ErrorKind::NotFound => {
@@ -104,10 +106,16 @@ impl V6Reader {
_ => return Err(error.into()),
},
};
let network = if let Some(network_file) = network_file {
Some(serde_json::from_reader(&*network_file)?)
} else {
None
let webhooks = match fs::read(dump.path().join("webhooks.json")) {
Ok(webhooks_file) => Some(serde_json::from_reader(&*webhooks_file)?),
Err(error) => match error.kind() {
ErrorKind::NotFound => {
debug!("`webhooks.json` not found in dump");
None
}
_ => return Err(error.into()),
},
};
Ok(V6Reader {
@@ -119,6 +127,7 @@ impl V6Reader {
features,
network,
dump,
webhooks,
})
}
@@ -229,6 +238,10 @@ impl V6Reader {
pub fn network(&self) -> Option<&Network> {
self.network.as_ref()
}
pub fn webhooks(&self) -> Option<&Webhooks> {
self.webhooks.as_ref()
}
}
pub struct UpdateFile {
@@ -284,6 +297,10 @@ impl V6IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
patch_embedders(&mut settings);

View File

@@ -8,6 +8,7 @@ use meilisearch_types::batches::Batch;
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::webhooks::WebhooksDumpView;
use serde_json::{Map, Value};
use tempfile::TempDir;
use time::OffsetDateTime;
@@ -74,6 +75,13 @@ impl DumpWriter {
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
}
pub fn create_webhooks(&self, webhooks: WebhooksDumpView) -> Result<()> {
Ok(std::fs::write(
self.dir.path().join("webhooks.json"),
serde_json::to_string(&webhooks)?,
)?)
}
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
let mut tar_encoder = tar::Builder::new(gz_encoder);

Binary file not shown.

View File

@@ -165,9 +165,9 @@ impl<'a> FilterCondition<'a> {
| Condition::Exists
| Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_)
| Condition::Between { .. } => None,
Condition::Contains { keyword, word: _ }
| Condition::StartsWith { keyword, word: _ } => Some(keyword),
| Condition::Between { .. }
| Condition::StartsWith { .. } => None,
Condition::Contains { keyword, word: _ } => Some(keyword),
},
FilterCondition::Not(this) => this.use_contains_operator(),
FilterCondition::Or(seq) | FilterCondition::And(seq) => {

View File

@@ -26,7 +26,7 @@ flate2 = "1.1.2"
indexmap = "2.9.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5"
memmap2 = "0.9.7"
page_size = "0.6.0"
rayon = "1.10.0"
roaring = { version = "0.10.12", features = ["serde"] }

View File

@@ -85,7 +85,7 @@ impl RoFeatures {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter",
disabled_action: "Using `CONTAINS` in a filter",
feature: "contains filter",
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
}
@@ -182,6 +182,7 @@ impl FeatureData {
..persisted_features
}));
// Once this is stabilized, network should be stored along with webhooks in index-scheduler's persisted database
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();

View File

@@ -71,7 +71,7 @@ pub struct IndexMapper {
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
/// The map size an index is opened with on the first time.
index_base_map_size: usize,
pub(crate) index_base_map_size: usize,
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
index_growth_amount: usize,
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.

View File

@@ -20,16 +20,17 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
cleanup_enabled: _,
experimental_no_edition_2024_for_dumps: _,
processing_tasks,
env,
version,
queue,
scheduler,
persisted,
index_mapper,
features: _,
webhook_url: _,
webhook_authorization_header: _,
webhooks: _,
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
@@ -61,6 +62,13 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
}
snap.push_str("\n----------------------------------------------------------------------\n");
let persisted_db_snapshot = snapshot_persisted_db(&rtxn, persisted);
if !persisted_db_snapshot.is_empty() {
snap.push_str("### Persisted:\n");
snap.push_str(&persisted_db_snapshot);
snap.push_str("----------------------------------------------------------------------\n");
}
snap.push_str("### All Tasks:\n");
snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks));
snap.push_str("----------------------------------------------------------------------\n");
@@ -199,6 +207,16 @@ pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec
snap
}
pub fn snapshot_persisted_db(rtxn: &RoTxn, db: &Database<Str, Str>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (key, value) = next.unwrap();
snap.push_str(&format!("{key}: {value}\n"));
}
snap
}
pub fn snapshot_task(task: &Task) -> String {
let mut snap = String::new();
let Task {
@@ -310,6 +328,7 @@ pub fn snapshot_status(
}
snap
}
pub fn snapshot_kind(rtxn: &RoTxn, db: Database<SerdeBincode<Kind>, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
@@ -330,6 +349,7 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
}
snap
}
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();

View File

@@ -65,13 +65,16 @@ use meilisearch_types::milli::vector::{
use meilisearch_types::milli::{self, Index};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{KindWithContent, Task};
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks;
pub use queue::Query;
use queue::Queue;
use roaring::RoaringBitmap;
use scheduler::Scheduler;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use versioning::Versioning;
use crate::index_mapper::IndexMapper;
@@ -80,7 +83,15 @@ use crate::utils::clamp_to_page_size;
pub(crate) type BEI128 = I128<BE>;
const TASK_SCHEDULER_SIZE_THRESHOLD_PERCENT_INT: u64 = 40;
const CHAT_SETTINGS_DB_NAME: &str = "chat-settings";
mod db_name {
pub const CHAT_SETTINGS: &str = "chat-settings";
pub const PERSISTED: &str = "persisted";
}
mod db_keys {
pub const WEBHOOKS: &str = "webhooks";
}
#[derive(Debug)]
pub struct IndexSchedulerOptions {
@@ -98,10 +109,10 @@ pub struct IndexSchedulerOptions {
pub snapshots_path: PathBuf,
/// The path to the folder containing the dumps.
pub dumps_path: PathBuf,
/// The URL on which we must send the tasks statuses
pub webhook_url: Option<String>,
/// The value we will send into the Authorization HTTP header on the webhook URL
pub webhook_authorization_header: Option<String>,
/// The webhook url that was set by the CLI.
pub cli_webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL that was set by the CLI.
pub cli_webhook_authorization: Option<String>,
/// The maximum size, in bytes, of the task index.
pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@@ -168,10 +179,14 @@ pub struct IndexScheduler {
/// Whether we should automatically cleanup the task queue or not.
pub(crate) cleanup_enabled: bool,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>,
/// Whether we should use the old document indexer or the new one.
pub(crate) experimental_no_edition_2024_for_dumps: bool,
/// A database to store single-keyed data that is persisted across restarts.
persisted: Database<Str, Str>,
/// Webhook, loaded and stored in the `persisted` database
webhooks: Arc<Webhooks>,
/// A map to retrieve the runtime representation of an embedder depending on its configuration.
///
@@ -210,8 +225,10 @@ impl IndexScheduler {
index_mapper: self.index_mapper.clone(),
cleanup_enabled: self.cleanup_enabled,
webhook_url: self.webhook_url.clone(),
webhook_authorization_header: self.webhook_authorization_header.clone(),
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
persisted: self.persisted,
webhooks: self.webhooks.clone(),
embedders: self.embedders.clone(),
#[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
@@ -230,6 +247,7 @@ impl IndexScheduler {
+ IndexMapper::nb_db()
+ features::FeatureData::nb_db()
+ 1 // chat-prompts
+ 1 // persisted
}
/// Create an index scheduler and start its run loop.
@@ -280,10 +298,18 @@ impl IndexScheduler {
let version = versioning::Versioning::new(&env, from_db_version)?;
let mut wtxn = env.write_txn()?;
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
let chat_settings = env.create_database(&mut wtxn, Some(db_name::CHAT_SETTINGS))?;
let persisted = env.create_database(&mut wtxn, Some(db_name::PERSISTED))?;
let webhooks_db = persisted.remap_data_type::<SerdeJson<Webhooks>>();
let mut webhooks = webhooks_db.get(&wtxn, db_keys::WEBHOOKS)?.unwrap_or_default();
webhooks
.with_cli(options.cli_webhook_url.clone(), options.cli_webhook_authorization.clone());
wtxn.commit()?;
// allow unreachable_code to get rids of the warning in the case of a test build.
@@ -296,8 +322,11 @@ impl IndexScheduler {
index_mapper,
env,
cleanup_enabled: options.cleanup_enabled,
webhook_url: options.webhook_url,
webhook_authorization_header: options.webhook_authorization_header,
experimental_no_edition_2024_for_dumps: options
.indexer_config
.experimental_no_edition_2024_for_dumps,
persisted,
webhooks: Arc::new(webhooks),
embedders: Default::default(),
#[cfg(test)]
@@ -594,6 +623,11 @@ impl IndexScheduler {
Ok(nbr_index_processing_tasks > 0)
}
/// Whether the index should use the old document indexer.
pub fn no_edition_2024_for_dumps(&self) -> bool {
self.experimental_no_edition_2024_for_dumps
}
/// Return the tasks matching the query from the user's point of view along
/// with the total number of tasks matching the query, ignoring from and limit.
///
@@ -740,86 +774,92 @@ impl IndexScheduler {
Ok(())
}
/// Once the tasks changes have been committed we must send all the tasks that were updated to our webhook if there is one.
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
if let Some(ref url) = self.webhook_url {
struct TaskReader<'a, 'b> {
rtxn: &'a RoTxn<'a>,
index_scheduler: &'a IndexScheduler,
tasks: &'b mut roaring::bitmap::Iter<'b>,
buffer: Vec<u8>,
written: usize,
}
/// Once the tasks changes have been committed we must send all the tasks that were updated to our webhooks
fn notify_webhooks(&self, updated: RoaringBitmap) {
struct TaskReader<'a, 'b> {
rtxn: &'a RoTxn<'a>,
index_scheduler: &'a IndexScheduler,
tasks: &'b mut roaring::bitmap::Iter<'b>,
buffer: Vec<u8>,
written: usize,
}
impl Read for TaskReader<'_, '_> {
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
if self.buffer.is_empty() {
match self.tasks.next() {
None => return Ok(0),
Some(task_id) => {
let task = self
.index_scheduler
.queue
.tasks
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
Error::CorruptedTaskQueue,
)
})?;
impl Read for TaskReader<'_, '_> {
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
if self.buffer.is_empty() {
match self.tasks.next() {
None => return Ok(0),
Some(task_id) => {
let task = self
.index_scheduler
.queue
.tasks
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, Error::CorruptedTaskQueue)
})?;
serde_json::to_writer(
&mut self.buffer,
&TaskView::from_task(&task),
)?;
self.buffer.push(b'\n');
}
serde_json::to_writer(&mut self.buffer, &TaskView::from_task(&task))?;
self.buffer.push(b'\n');
}
}
let mut to_write = &self.buffer[self.written..];
let wrote = io::copy(&mut to_write, &mut buf)?;
self.written += wrote as usize;
// we wrote everything and must refresh our buffer on the next call
if self.written == self.buffer.len() {
self.written = 0;
self.buffer.clear();
}
Ok(wrote as usize)
}
}
let rtxn = self.env.read_txn()?;
let mut to_write = &self.buffer[self.written..];
let wrote = io::copy(&mut to_write, &mut buf)?;
self.written += wrote as usize;
let task_reader = TaskReader {
rtxn: &rtxn,
index_scheduler: self,
tasks: &mut updated.into_iter(),
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
written: 0,
};
// we wrote everything and must refresh our buffer on the next call
if self.written == self.buffer.len() {
self.written = 0;
self.buffer.clear();
}
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let request = ureq::post(url)
.timeout(Duration::from_secs(30))
.set("Content-Encoding", "gzip")
.set("Content-Type", "application/x-ndjson");
let request = match &self.webhook_authorization_header {
Some(header) => request.set("Authorization", header),
None => request,
};
if let Err(e) = request.send(reader) {
tracing::error!("While sending data to the webhook: {e}");
Ok(wrote as usize)
}
}
Ok(())
let webhooks = self.webhooks.get_all();
if webhooks.is_empty() {
return;
}
let this = self.private_clone();
// We must take the RoTxn before entering the thread::spawn otherwise another batch may be
// processed before we had the time to take our txn.
let rtxn = match self.env.clone().static_read_txn() {
Ok(rtxn) => rtxn,
Err(e) => {
tracing::error!("Couldn't get an rtxn to notify the webhook: {e}");
return;
}
};
std::thread::spawn(move || {
for (uuid, Webhook { url, headers }) in webhooks.iter() {
let task_reader = TaskReader {
rtxn: &rtxn,
index_scheduler: &this,
tasks: &mut updated.iter(),
buffer: Vec::with_capacity(page_size::get()),
written: 0,
};
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let mut request = ureq::post(url)
.timeout(Duration::from_secs(30))
.set("Content-Encoding", "gzip")
.set("Content-Type", "application/x-ndjson");
for (header_name, header_value) in headers.iter() {
request = request.set(header_name, header_value);
}
if let Err(e) = request.send(reader) {
tracing::error!("While sending data to the webhook {uuid}: {e}");
}
}
});
}
pub fn index_stats(&self, index_uid: &str) -> Result<IndexStats> {
@@ -850,6 +890,29 @@ impl IndexScheduler {
self.features.network()
}
pub fn update_runtime_webhooks(&self, runtime: RuntimeWebhooks) -> Result<()> {
let webhooks = Webhooks::from_runtime(runtime);
let mut wtxn = self.env.write_txn()?;
let webhooks_db = self.persisted.remap_data_type::<SerdeJson<Webhooks>>();
webhooks_db.put(&mut wtxn, db_keys::WEBHOOKS, &webhooks)?;
wtxn.commit()?;
self.webhooks.update_runtime(webhooks.into_runtime());
Ok(())
}
pub fn webhooks_dump_view(&self) -> WebhooksDumpView {
// We must not dump the cli api key
WebhooksDumpView { webhooks: self.webhooks.get_runtime() }
}
pub fn webhooks_view(&self) -> WebhooksView {
WebhooksView { webhooks: self.webhooks.get_all() }
}
pub fn retrieve_runtime_webhooks(&self) -> RuntimeWebhooks {
self.webhooks.get_runtime()
}
pub fn embedders(
&self,
index_uid: String,
@@ -978,3 +1041,72 @@ pub struct IndexStats {
/// Internal stats computed from the index.
pub inner_stats: index_mapper::IndexStats,
}
/// These structure are not meant to be exposed to the end user, if needed, use the meilisearch-types::webhooks structure instead.
/// /!\ Everytime you deserialize this structure you should fill the cli_webhook later on with the `with_cli` method. /!\
#[derive(Debug, Serialize, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
struct Webhooks {
// The cli webhook should *never* be stored in a database.
// It represent a state that only exists for this execution of meilisearch
#[serde(skip)]
pub cli: Option<CliWebhook>,
#[serde(default)]
pub runtime: RwLock<RuntimeWebhooks>,
}
type RuntimeWebhooks = BTreeMap<Uuid, Webhook>;
impl Webhooks {
pub fn with_cli(&mut self, url: Option<String>, auth: Option<String>) {
if let Some(url) = url {
let webhook = CliWebhook { url, auth };
self.cli = Some(webhook);
}
}
pub fn from_runtime(webhooks: RuntimeWebhooks) -> Self {
Self { cli: None, runtime: RwLock::new(webhooks) }
}
pub fn into_runtime(self) -> RuntimeWebhooks {
// safe because we own self and it cannot be cloned
self.runtime.into_inner().unwrap()
}
pub fn update_runtime(&self, webhooks: RuntimeWebhooks) {
*self.runtime.write().unwrap() = webhooks;
}
/// Returns all the webhooks in an unified view. The cli webhook is represented with an uuid set to 0
pub fn get_all(&self) -> BTreeMap<Uuid, Webhook> {
self.cli
.as_ref()
.map(|wh| (Uuid::nil(), Webhook::from(wh)))
.into_iter()
.chain(self.runtime.read().unwrap().iter().map(|(uuid, wh)| (*uuid, wh.clone())))
.collect()
}
/// Returns all the runtime webhooks.
pub fn get_runtime(&self) -> BTreeMap<Uuid, Webhook> {
self.runtime.read().unwrap().iter().map(|(uuid, wh)| (*uuid, wh.clone())).collect()
}
}
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
struct CliWebhook {
pub url: String,
pub auth: Option<String>,
}
impl From<&CliWebhook> for Webhook {
fn from(webhook: &CliWebhook) -> Self {
let mut headers = BTreeMap::new();
if let Some(ref auth) = webhook.auth {
headers.insert("Authorization".to_string(), auth.to_string());
}
Self { url: webhook.url.to_string(), headers }
}
}

View File

@@ -108,6 +108,7 @@ make_enum_progress! {
DumpTheBatches,
DumpTheIndexes,
DumpTheExperimentalFeatures,
DumpTheWebhooks,
CompressTheDump,
}
}

View File

@@ -238,7 +238,7 @@ impl IndexScheduler {
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u64);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj);
process_batch_info = info;
let mut success = 0;
@@ -317,7 +317,7 @@ impl IndexScheduler {
Err(err) => {
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u64);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
progress.update_progress(task_progress_obj);
if matches!(err, Error::DatabaseUpgrade(_)) {
@@ -446,8 +446,7 @@ impl IndexScheduler {
Ok(())
})?;
// We shouldn't crash the tick function if we can't send data to the webhook.
let _ = self.notify_webhook(&ids);
self.notify_webhooks(ids);
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);

View File

@@ -353,8 +353,8 @@ impl IndexScheduler {
for (step, swap) in swaps.iter().enumerate() {
progress.update_progress(VariableNameStep::<SwappingTheIndexes>::new(
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
step as u64,
swaps.len() as u64,
step as u32,
swaps.len() as u32,
));
self.apply_index_swap(
&mut wtxn,
@@ -472,7 +472,7 @@ impl IndexScheduler {
// 3. before_name -> new_name in the task's KindWithContent
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u64);
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
progress.update_progress(task_progress);
for task_id in tasks_to_update {
@@ -529,7 +529,7 @@ impl IndexScheduler {
// The tasks that have been removed *per batches*.
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u64);
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
progress.update_progress(task_progress);
for task_id in to_delete_tasks.iter() {
let task =
@@ -575,7 +575,7 @@ impl IndexScheduler {
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
let (atomic_progress, task_progress) = AtomicTaskStep::new(
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u64,
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
);
progress.update_progress(task_progress);
for index in affected_indexes.iter() {
@@ -594,7 +594,7 @@ impl IndexScheduler {
}
progress.update_progress(TaskDeletionProgress::DeletingTasks);
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u64);
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
progress.update_progress(task_progress);
for task in to_delete_tasks.iter() {
self.queue.tasks.all_tasks.delete(wtxn, &task)?;
@@ -611,7 +611,7 @@ impl IndexScheduler {
}
}
progress.update_progress(TaskDeletionProgress::DeletingBatches);
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u64);
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
progress.update_progress(batch_progress);
for (batch_id, to_delete_tasks) in affected_batches {
if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
@@ -786,7 +786,7 @@ impl IndexScheduler {
}
// 3. We now have a list of tasks to cancel, cancel them
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u64);
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
progress.update_progress(progress_obj);
let mut tasks = self.queue.tasks.get_existing_tasks(
@@ -797,7 +797,7 @@ impl IndexScheduler {
)?;
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u64);
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
progress.update_progress(progress_obj);
for task in tasks.iter_mut() {
task.status = Status::Canceled;

View File

@@ -5,6 +5,7 @@ use std::sync::atomic::Ordering;
use dump::IndexMetadata;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self};
@@ -57,7 +58,7 @@ impl IndexScheduler {
let mut dump_tasks = dump.create_tasks_queue()?;
let (atomic, update_task_progress) =
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u64);
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32);
progress.update_progress(update_task_progress);
for ret in self.queue.tasks.all_tasks.iter(&rtxn)? {
@@ -119,7 +120,7 @@ impl IndexScheduler {
let mut dump_batches = dump.create_batches_queue()?;
let (atomic_batch_progress, update_batch_progress) =
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u64);
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
progress.update_progress(update_batch_progress);
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
@@ -149,7 +150,7 @@ impl IndexScheduler {
// 5. Dump the indexes
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u64;
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0;
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
progress.update_progress(VariableNameStep::<DumpCreationProgress>::new(
@@ -178,7 +179,7 @@ impl IndexScheduler {
let nb_documents = index
.number_of_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
as u64;
as u32;
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
progress.update_progress(update_document_progress);
let documents = index
@@ -227,12 +228,21 @@ impl IndexScheduler {
return Err(Error::from_milli(user_err, Some(uid.to_string())));
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
@@ -260,6 +270,11 @@ impl IndexScheduler {
let network = self.network();
dump.create_network(network)?;
// 7. Dump the webhooks
progress.update_progress(DumpCreationProgress::DumpTheWebhooks);
let webhooks = self.webhooks_dump_view();
dump.create_webhooks(webhooks)?;
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
)).unwrap();

View File

@@ -9,6 +9,7 @@ use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::update::{request_threads, Setting};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
@@ -55,20 +56,21 @@ impl IndexScheduler {
progress.update_progress(VariableNameStep::<ExportIndex>::new(
format!("Exporting index `{uid}`"),
i as u64,
indexes.len() as u64,
i as u32,
indexes.len() as u32,
));
let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?;
let index_rtxn = index.read_txn()?;
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
// First, check if the index already exists
let url = format!("{base_url}/indexes/{uid}");
let response = retry(&must_stop_processing, || {
let mut request = agent.get(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
@@ -90,8 +92,8 @@ impl IndexScheduler {
let url = format!("{base_url}/indexes");
retry(&must_stop_processing, || {
let mut request = agent.post(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
@@ -103,8 +105,8 @@ impl IndexScheduler {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
@@ -122,7 +124,6 @@ impl IndexScheduler {
}
// Retry logic for sending settings
let url = format!("{base_url}/indexes/{uid}/settings");
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
@@ -155,7 +156,7 @@ impl IndexScheduler {
// spawn many threads to process the documents
drop(index_rtxn);
let total_documents = universe.len();
let total_documents = universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
@@ -163,14 +164,14 @@ impl IndexScheduler {
IndexUidPattern::new_unchecked(uid.clone()),
DetailsExportIndexSettings {
settings: (*export_settings).clone(),
matched_documents: Some(total_documents),
matched_documents: Some(total_documents as u64),
},
);
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(50 * 1024 * 1024); // defaults to 50 MiB
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
request_threads()
let results = request_threads()
.broadcast(|ctx| {
let index_rtxn = index
.read_txn()
@@ -229,12 +230,21 @@ impl IndexScheduler {
));
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(
embedder_name,
@@ -265,9 +275,8 @@ impl IndexScheduler {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(api_key) = api_key {
request = request
.set("Authorization", &(format!("Bearer {api_key}")));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
@@ -276,7 +285,7 @@ impl IndexScheduler {
}
buffer.extend_from_slice(&tmp_buffer);
if i % 100 == 0 {
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
@@ -284,8 +293,8 @@ impl IndexScheduler {
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(api_key) = api_key {
request = request.set("Authorization", &(format!("Bearer {api_key}")));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
@@ -298,6 +307,9 @@ impl IndexScheduler {
Some(uid.to_string()),
)
})?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
}

View File

@@ -7,9 +7,73 @@ use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::{compression, VERSION_FILE_NAME};
use crate::heed::EnvOpenOptions;
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
use crate::queue::TaskQueue;
use crate::{Error, IndexScheduler, Result};
/// # Safety
///
/// See [`EnvOpenOptions::open`].
unsafe fn remove_tasks(
tasks: &[Task],
dst: &std::path::Path,
index_base_map_size: usize,
) -> Result<()> {
let env_options = EnvOpenOptions::new();
let mut env_options = env_options.read_txn_without_tls();
let env = env_options.max_dbs(TaskQueue::nb_db()).map_size(index_base_map_size).open(dst)?;
let mut wtxn = env.write_txn()?;
let task_queue = TaskQueue::new(&env, &mut wtxn)?;
// Destructuring to ensure the code below gets updated if a database gets added in the future.
let TaskQueue {
all_tasks,
status,
kind,
index_tasks: _, // snapshot creation tasks are not index tasks
canceled_by,
enqueued_at,
started_at,
finished_at,
} = task_queue;
for task in tasks {
all_tasks.delete(&mut wtxn, &task.uid)?;
let mut tasks = status.get(&wtxn, &task.status)?.unwrap_or_default();
tasks.remove(task.uid);
status.put(&mut wtxn, &task.status, &tasks)?;
let mut tasks = kind.get(&wtxn, &task.kind.as_kind())?.unwrap_or_default();
tasks.remove(task.uid);
kind.put(&mut wtxn, &task.kind.as_kind(), &tasks)?;
canceled_by.delete(&mut wtxn, &task.uid)?;
let timestamp = task.enqueued_at.unix_timestamp_nanos();
let mut tasks = enqueued_at.get(&wtxn, &timestamp)?.unwrap_or_default();
tasks.remove(task.uid);
enqueued_at.put(&mut wtxn, &timestamp, &tasks)?;
if let Some(task_started_at) = task.started_at {
let timestamp = task_started_at.unix_timestamp_nanos();
let mut tasks = started_at.get(&wtxn, &timestamp)?.unwrap_or_default();
tasks.remove(task.uid);
started_at.put(&mut wtxn, &timestamp, &tasks)?;
}
if let Some(task_finished_at) = task.finished_at {
let timestamp = task_finished_at.unix_timestamp_nanos();
let mut tasks = finished_at.get(&wtxn, &timestamp)?.unwrap_or_default();
tasks.remove(task.uid);
finished_at.put(&mut wtxn, &timestamp, &tasks)?;
}
}
wtxn.commit()?;
Ok(())
}
impl IndexScheduler {
pub(super) fn process_snapshot(
&self,
@@ -48,17 +112,29 @@ impl IndexScheduler {
};
self.env.copy_to_path(dst.join("data.mdb"), compaction_option)?;
// 2.2 Create a read transaction on the index-scheduler
// 2.2 Remove the current snapshot tasks
//
// This is done to ensure that the tasks are not processed again when the snapshot is imported
//
// # Safety
//
// This is safe because we open the env file we just created in a temporary directory.
// We are sure it's not being used by any other process nor thread.
unsafe {
remove_tasks(&tasks, &dst, self.index_mapper.index_base_map_size)?;
}
// 2.3 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?;
// 2.3 Create the update files directory
// 2.4 Create the update files directory
let update_files_dir = temp_snapshot_dir.path().join("update_files");
fs::create_dir_all(&update_files_dir)?;
// 2.4 Only copy the update files of the enqueued tasks
// 2.5 Only copy the update files of the enqueued tasks
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u64);
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
progress.update_progress(update_file_progress);
for task_id in enqueued {
let task =
@@ -74,12 +150,12 @@ impl IndexScheduler {
// 3. Snapshot every indexes
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
let index_mapping = self.index_mapper.index_mapping;
let nb_indexes = index_mapping.len(&rtxn)? as u64;
let nb_indexes = index_mapping.len(&rtxn)? as u32;
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
let (name, uuid) = result?;
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
name, i as u64, nb_indexes,
name, i as u32, nb_indexes,
));
let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());

View File

@@ -22,8 +22,8 @@ impl IndexScheduler {
}
progress.update_progress(VariableNameStep::<UpgradeIndex>::new(
format!("Upgrading index `{uid}`"),
i as u64,
indexes.len() as u64,
i as u32,
indexes.len() as u32,
));
let index = self.index(uid)?;
let mut index_wtxn = index.write_txn()?;
@@ -65,8 +65,8 @@ impl IndexScheduler {
for (i, uid) in indexes.iter().enumerate() {
progress.update_progress(VariableNameStep::<UpgradeIndex>::new(
format!("Rollbacking index `{uid}`"),
i as u64,
indexes.len() as u64,
i as u32,
indexes.len() as u32,
));
let index_schd_rtxn = self.env.read_txn()?;

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use big_s::S;
use insta::assert_json_snapshot;
use meili_snap::{json_string, snapshot};
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::milli::vector::SearchQuery;
@@ -220,8 +221,8 @@ fn import_vectors() {
let embeddings = index.embeddings(&rtxn, 0).unwrap();
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -311,9 +312,9 @@ fn import_vectors() {
let embeddings = index.embeddings(&rtxn, 0).unwrap();
// automatically changed to patou because set to regenerate
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == patou_embed, @"true");
// remained beagle
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -497,13 +498,13 @@ fn import_vectors_first_and_embedder_later() {
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty(), "{embedding:?}");
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
assert!(!embeddings.is_empty(), "{embeddings:?}");
// the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embeddings, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@@ -558,7 +559,7 @@ fn import_vectors_first_and_embedder_later() {
"###);
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
@@ -566,7 +567,7 @@ fn import_vectors_first_and_embedder_later() {
// the document with the id 4 should generate an embedding
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
}
@@ -696,7 +697,7 @@ fn delete_document_containing_vector() {
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["manual"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["manual"];
assert!(!embedding.is_empty(), "{embedding:?}");
index_scheduler

View File

@@ -98,8 +98,8 @@ impl IndexScheduler {
indexes_path: tempdir.path().join("indexes"),
snapshots_path: tempdir.path().join("snapshots"),
dumps_path: tempdir.path().join("dumps"),
webhook_url: None,
webhook_authorization_header: None,
cli_webhook_url: None,
cli_webhook_authorization: None,
task_db_size: 1000 * 1000 * 10, // 10 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,

View File

@@ -39,6 +39,7 @@ pub fn upgrade_index_scheduler(
(1, 13, _) => 0,
(1, 14, _) => 0,
(1, 15, _) => 0,
(1, 16, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)

View File

@@ -158,7 +158,7 @@ impl AuthController {
self.store.delete_all_keys()
}
/// Delete all the keys in the DB.
/// Insert a key directly into the store.
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
self.store.put_api_key(key)?;
Ok(())
@@ -351,6 +351,7 @@ pub struct IndexSearchRules {
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
store.put_api_key(Key::default_chat())?;
store.put_api_key(Key::default_read_only_admin())?;
store.put_api_key(Key::default_admin())?;
store.put_api_key(Key::default_search())?;

View File

@@ -88,7 +88,13 @@ impl HeedAuthStore {
let mut actions = HashSet::new();
for action in &key.actions {
match action {
Action::All => actions.extend(enum_iterator::all::<Action>()),
Action::All => {
actions.extend(enum_iterator::all::<Action>());
actions.remove(&Action::AllGet);
}
Action::AllGet => {
actions.extend(enum_iterator::all::<Action>().filter(|a| a.is_read()))
}
Action::DocumentsAll => {
actions.extend(
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]
@@ -131,6 +137,14 @@ impl HeedAuthStore {
Action::ChatsSettingsAll => {
actions.extend([Action::ChatsSettingsGet, Action::ChatsSettingsUpdate]);
}
Action::WebhooksAll => {
actions.extend([
Action::WebhooksGet,
Action::WebhooksUpdate,
Action::WebhooksDelete,
Action::WebhooksCreate,
]);
}
other => {
actions.insert(*other);
}

View File

@@ -24,7 +24,7 @@ enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.1.2"
fst = "0.4.7"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
milli = { path = "../milli" }
roaring = { version = "0.10.12", features = ["serde"] }
rustc-hash = "2.1.1"

View File

@@ -237,6 +237,7 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
@@ -415,8 +416,18 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST ;
// Webhooks
InvalidWebhooks , InvalidRequest , BAD_REQUEST ;
InvalidWebhookUrl , InvalidRequest , BAD_REQUEST ;
InvalidWebhookHeaders , InvalidRequest , BAD_REQUEST ;
ImmutableWebhook , InvalidRequest , BAD_REQUEST ;
InvalidWebhookUuid , InvalidRequest , BAD_REQUEST ;
WebhookNotFound , InvalidRequest , NOT_FOUND ;
ImmutableWebhookUuid , InvalidRequest , BAD_REQUEST ;
ImmutableWebhookIsEditable , InvalidRequest , BAD_REQUEST
}
impl ErrorCode for JoinError {
@@ -476,7 +487,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort,
UserError::InvalidSearchableAttribute { .. } => {
Code::InvalidSearchAttributesToSearchOn
}
@@ -492,7 +504,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::SortError { search: true, .. } => Code::InvalidSearchSort,
UserError::SortError { search: false, .. } => Code::InvalidDocumentSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance
}

View File

@@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
use crate::error::{Code, ResponseError};
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
"Search the database for relevant JSON documents using an optional query.";
"Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}";
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
@@ -161,18 +162,31 @@ impl ChatCompletionSource {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct ChatCompletionPrompts {
#[serde(default)]
pub system: String,
#[serde(default)]
pub search_description: String,
#[serde(default)]
pub search_q_param: String,
#[serde(default = "default_search_filter_param")]
pub search_filter_param: String,
#[serde(default)]
pub search_index_uid_param: String,
}
/// This function is used for when the search_filter_param is
/// not provided and this can happen when the database is in v1.15.
fn default_search_filter_param() -> String {
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string()
}
impl Default for ChatCompletionPrompts {
fn default() -> Self {
Self {
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
}
}

View File

@@ -144,6 +144,21 @@ impl Key {
}
}
pub fn default_read_only_admin() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
Self {
name: Some("Default Read-Only Admin API Key".to_string()),
description: Some("Use it to read information across the whole database. Caution! Do not expose this key on a public frontend".to_string()),
uid,
actions: vec![Action::AllGet, Action::KeysGet],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: now,
updated_at: now,
}
}
pub fn default_search() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
@@ -347,6 +362,24 @@ pub enum Action {
#[serde(rename = "chatsSettings.update")]
#[deserr(rename = "chatsSettings.update")]
ChatsSettingsUpdate,
#[serde(rename = "*.get")]
#[deserr(rename = "*.get")]
AllGet,
#[serde(rename = "webhooks.get")]
#[deserr(rename = "webhooks.get")]
WebhooksGet,
#[serde(rename = "webhooks.update")]
#[deserr(rename = "webhooks.update")]
WebhooksUpdate,
#[serde(rename = "webhooks.delete")]
#[deserr(rename = "webhooks.delete")]
WebhooksDelete,
#[serde(rename = "webhooks.create")]
#[deserr(rename = "webhooks.create")]
WebhooksCreate,
#[serde(rename = "webhooks.*")]
#[deserr(rename = "webhooks.*")]
WebhooksAll,
}
impl Action {
@@ -385,6 +418,7 @@ impl Action {
METRICS_GET => Some(Self::MetricsGet),
DUMPS_ALL => Some(Self::DumpsAll),
DUMPS_CREATE => Some(Self::DumpsCreate),
SNAPSHOTS_ALL => Some(Self::SnapshotsAll),
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
VERSION => Some(Self::Version),
KEYS_CREATE => Some(Self::KeysAdd),
@@ -393,12 +427,71 @@ impl Action {
KEYS_DELETE => Some(Self::KeysDelete),
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
EXPORT => Some(Self::Export),
NETWORK_GET => Some(Self::NetworkGet),
NETWORK_UPDATE => Some(Self::NetworkUpdate),
ALL_GET => Some(Self::AllGet),
WEBHOOKS_GET => Some(Self::WebhooksGet),
WEBHOOKS_UPDATE => Some(Self::WebhooksUpdate),
WEBHOOKS_DELETE => Some(Self::WebhooksDelete),
WEBHOOKS_CREATE => Some(Self::WebhooksCreate),
WEBHOOKS_ALL => Some(Self::WebhooksAll),
_otherwise => None,
}
}
/// Whether the action should be included in [Action::AllRead].
pub fn is_read(&self) -> bool {
use Action::*;
// It's using an exhaustive match to force the addition of new actions.
match self {
// Any action that expands to others must return false, as it wouldn't be able to expand recursively.
All | AllGet | DocumentsAll | IndexesAll | ChatsAll | TasksAll | SettingsAll
| StatsAll | MetricsAll | DumpsAll | SnapshotsAll | ChatsSettingsAll | WebhooksAll => {
false
}
Search => true,
DocumentsAdd => false,
DocumentsGet => true,
DocumentsDelete => false,
Export => true,
IndexesAdd => false,
IndexesGet => true,
IndexesUpdate => false,
IndexesDelete => false,
IndexesSwap => false,
TasksCancel => false,
TasksDelete => false,
TasksGet => true,
SettingsGet => true,
SettingsUpdate => false,
StatsGet => true,
MetricsGet => true,
DumpsCreate => false,
SnapshotsCreate => false,
Version => true,
KeysAdd => false,
KeysGet => false, // Disabled in order to prevent privilege escalation
KeysUpdate => false,
KeysDelete => false,
ExperimentalFeaturesGet => true,
ExperimentalFeaturesUpdate => false,
NetworkGet => true,
NetworkUpdate => false,
ChatCompletions => false, // Disabled because it might trigger generation of new chats
ChatsGet => true,
ChatsDelete => false,
ChatsSettingsGet => true,
ChatsSettingsUpdate => false,
WebhooksGet => true,
WebhooksUpdate => false,
WebhooksDelete => false,
WebhooksCreate => false,
}
}
pub const fn repr(&self) -> u8 {
*self as u8
}
@@ -408,6 +501,7 @@ pub mod actions {
use super::Action::*;
pub(crate) const ALL: u8 = All.repr();
pub const ALL_GET: u8 = AllGet.repr();
pub const SEARCH: u8 = Search.repr();
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();
@@ -432,6 +526,7 @@ pub mod actions {
pub const METRICS_GET: u8 = MetricsGet.repr();
pub const DUMPS_ALL: u8 = DumpsAll.repr();
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
pub const SNAPSHOTS_ALL: u8 = SnapshotsAll.repr();
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
pub const VERSION: u8 = Version.repr();
pub const KEYS_CREATE: u8 = KeysAdd.repr();
@@ -453,4 +548,80 @@ pub mod actions {
pub const CHATS_SETTINGS_ALL: u8 = ChatsSettingsAll.repr();
pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr();
pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr();
pub const WEBHOOKS_GET: u8 = WebhooksGet.repr();
pub const WEBHOOKS_UPDATE: u8 = WebhooksUpdate.repr();
pub const WEBHOOKS_DELETE: u8 = WebhooksDelete.repr();
pub const WEBHOOKS_CREATE: u8 = WebhooksCreate.repr();
pub const WEBHOOKS_ALL: u8 = WebhooksAll.repr();
}
#[cfg(test)]
pub(crate) mod test {
use super::actions::*;
use super::Action::*;
use super::*;
#[test]
fn test_action_repr_and_constants() {
assert!(All.repr() == 0 && ALL == 0);
assert!(Search.repr() == 1 && SEARCH == 1);
assert!(DocumentsAll.repr() == 2 && DOCUMENTS_ALL == 2);
assert!(DocumentsAdd.repr() == 3 && DOCUMENTS_ADD == 3);
assert!(DocumentsGet.repr() == 4 && DOCUMENTS_GET == 4);
assert!(DocumentsDelete.repr() == 5 && DOCUMENTS_DELETE == 5);
assert!(IndexesAll.repr() == 6 && INDEXES_ALL == 6);
assert!(IndexesAdd.repr() == 7 && INDEXES_CREATE == 7);
assert!(IndexesGet.repr() == 8 && INDEXES_GET == 8);
assert!(IndexesUpdate.repr() == 9 && INDEXES_UPDATE == 9);
assert!(IndexesDelete.repr() == 10 && INDEXES_DELETE == 10);
assert!(IndexesSwap.repr() == 11 && INDEXES_SWAP == 11);
assert!(TasksAll.repr() == 12 && TASKS_ALL == 12);
assert!(TasksCancel.repr() == 13 && TASKS_CANCEL == 13);
assert!(TasksDelete.repr() == 14 && TASKS_DELETE == 14);
assert!(TasksGet.repr() == 15 && TASKS_GET == 15);
assert!(SettingsAll.repr() == 16 && SETTINGS_ALL == 16);
assert!(SettingsGet.repr() == 17 && SETTINGS_GET == 17);
assert!(SettingsUpdate.repr() == 18 && SETTINGS_UPDATE == 18);
assert!(StatsAll.repr() == 19 && STATS_ALL == 19);
assert!(StatsGet.repr() == 20 && STATS_GET == 20);
assert!(MetricsAll.repr() == 21 && METRICS_ALL == 21);
assert!(MetricsGet.repr() == 22 && METRICS_GET == 22);
assert!(DumpsAll.repr() == 23 && DUMPS_ALL == 23);
assert!(DumpsCreate.repr() == 24 && DUMPS_CREATE == 24);
assert!(SnapshotsAll.repr() == 25 && SNAPSHOTS_ALL == 25);
assert!(SnapshotsCreate.repr() == 26 && SNAPSHOTS_CREATE == 26);
assert!(Version.repr() == 27 && VERSION == 27);
assert!(KeysAdd.repr() == 28 && KEYS_CREATE == 28);
assert!(KeysGet.repr() == 29 && KEYS_GET == 29);
assert!(KeysUpdate.repr() == 30 && KEYS_UPDATE == 30);
assert!(KeysDelete.repr() == 31 && KEYS_DELETE == 31);
assert!(ExperimentalFeaturesGet.repr() == 32 && EXPERIMENTAL_FEATURES_GET == 32);
assert!(ExperimentalFeaturesUpdate.repr() == 33 && EXPERIMENTAL_FEATURES_UPDATE == 33);
assert!(Export.repr() == 34 && EXPORT == 34);
assert!(NetworkGet.repr() == 35 && NETWORK_GET == 35);
assert!(NetworkUpdate.repr() == 36 && NETWORK_UPDATE == 36);
assert!(ChatCompletions.repr() == 37 && CHAT_COMPLETIONS == 37);
assert!(ChatsAll.repr() == 38 && CHATS_ALL == 38);
assert!(ChatsGet.repr() == 39 && CHATS_GET == 39);
assert!(ChatsDelete.repr() == 40 && CHATS_DELETE == 40);
assert!(ChatsSettingsAll.repr() == 41 && CHATS_SETTINGS_ALL == 41);
assert!(ChatsSettingsGet.repr() == 42 && CHATS_SETTINGS_GET == 42);
assert!(ChatsSettingsUpdate.repr() == 43 && CHATS_SETTINGS_UPDATE == 43);
assert!(AllGet.repr() == 44 && ALL_GET == 44);
assert!(WebhooksGet.repr() == 45 && WEBHOOKS_GET == 45);
assert!(WebhooksUpdate.repr() == 46 && WEBHOOKS_UPDATE == 46);
assert!(WebhooksDelete.repr() == 47 && WEBHOOKS_DELETE == 47);
assert!(WebhooksCreate.repr() == 48 && WEBHOOKS_CREATE == 48);
assert!(WebhooksAll.repr() == 49 && WEBHOOKS_ALL == 49);
}
#[test]
fn test_from_repr() {
for action in enum_iterator::all::<Action>() {
let repr = action.repr();
let action_from_repr = Action::from_repr(repr);
assert_eq!(Some(action), action_from_repr, "Failed for action: {:?}", action);
}
}
}

View File

@@ -15,6 +15,7 @@ pub mod star_or;
pub mod task_view;
pub mod tasks;
pub mod versioning;
pub mod webhooks;
pub use milli::{heed, Index};
use uuid::Uuid;
pub use versioning::VERSION_FILE_NAME;

View File

@@ -0,0 +1,28 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct Webhook {
pub url: String,
#[serde(default)]
pub headers: BTreeMap<String, String>,
}
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct WebhooksView {
#[serde(default)]
pub webhooks: BTreeMap<Uuid, Webhook>,
}
// Same as the WebhooksView instead it should never contains the CLI webhooks.
// It's the right structure to use in the dump
#[derive(Debug, Deserialize, Serialize, Default, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct WebhooksDumpView {
#[serde(default)]
pub webhooks: BTreeMap<Uuid, Webhook>,
}

View File

@@ -50,6 +50,7 @@ jsonwebtoken = "9.3.1"
lazy_static = "1.5.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.7"
mimalloc = { version = "0.1.47", default-features = false }
mime = "0.3.17"
num_cpus = "1.17.0"
@@ -169,5 +170,5 @@ german = ["meilisearch-types/german"]
turkish = ["meilisearch-types/turkish"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.20/build.zip"
sha1 = "82a7ddd7bf14bb5323c3d235d2b62892a98b6a59"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.22/build.zip"
sha1 = "b70b2036b5f167da9ea0b637da8b320c7ea88254"

View File

@@ -104,6 +104,4 @@ impl Analytics for MockAnalytics {
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
}

View File

@@ -73,12 +73,6 @@ pub enum DocumentDeletionKind {
PerFilter,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
}
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
pub trait Aggregate: 'static + mopa::Any + Send {
/// The name of the event that will be sent to segment.

View File

@@ -203,6 +203,7 @@ struct Infos {
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
experimental_no_snapshot_compaction: bool,
experimental_no_edition_2024_for_dumps: bool,
experimental_no_edition_2024_for_settings: bool,
gpu_enabled: bool,
db_path: bool,
@@ -293,6 +294,7 @@ impl Infos {
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
} = indexer_options;
let RuntimeTogglableFeatures {
@@ -329,6 +331,7 @@ impl Infos {
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_no_edition_2024_for_dumps,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),

View File

@@ -49,7 +49,7 @@ pub enum MeilisearchHttpError {
TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(if *.0 % 1024 == 0 { UnitType::Binary } else { UnitType::Decimal }))]
PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()

View File

@@ -30,6 +30,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest};
use analytics::Analytics;
use anyhow::bail;
use bumpalo::Bump;
use error::PayloadError;
use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning;
@@ -38,6 +39,7 @@ use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer;
use meilisearch_types::milli::update::{
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
};
@@ -221,8 +223,8 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
cli_webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
cli_webhook_authorization: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.as_u64() as usize,
index_base_map_size: opt.max_index_size.as_u64() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
@@ -489,7 +491,12 @@ fn import_dump(
let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes());
};
// 2. Import the `Key`s.
// 2. Import the webhooks
if let Some(webhooks) = dump_reader.webhooks() {
index_scheduler.update_runtime_webhooks(webhooks.webhooks.clone())?;
}
// 3. Import the `Key`s.
let mut keys = Vec::new();
auth.raw_delete_all_keys()?;
for key in dump_reader.keys()? {
@@ -498,20 +505,20 @@ fn import_dump(
keys.push(key);
}
// 3. Import the `ChatCompletionSettings`s.
// 4. Import the `ChatCompletionSettings`s.
for result in dump_reader.chat_completions_settings()? {
let (name, settings) = result?;
index_scheduler.put_chat_settings(&name, &settings)?;
}
// 4. Import the runtime features and network
// 5. Import the runtime features and network
let features = dump_reader.features()?.unwrap_or_default();
index_scheduler.put_runtime_features(features)?;
let network = dump_reader.network()?.cloned().unwrap_or_default();
index_scheduler.put_network(network)?;
// 4.1 Use all cpus to process dump if `max_indexing_threads` not configured
// 5.1 Use all cpus to process dump if `max_indexing_threads` not configured
let backup_config;
let base_config = index_scheduler.indexer_config();
@@ -528,12 +535,12 @@ fn import_dump(
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
// try to process tasks while we're trying to import the indexes.
// 5. Import the indexes.
// 6. Import the indexes.
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
let uid = metadata.uid.clone();
tracing::info!("Importing index `{}`.", metadata.uid);
tracing::info!("Importing index `{uid}`.");
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@@ -541,71 +548,123 @@ fn import_dump(
let mut wtxn = index.write_txn()?;
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
// 5.1 Import the primary key if there is one.
// 6.1 Import the primary key if there is one.
if let Some(ref primary_key) = metadata.primary_key {
builder.set_primary_key(primary_key.to_string());
}
// 5.2 Import the settings.
// 6.2 Import the settings.
tracing::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
let embedder_stats: Arc<EmbedderStats> = Default::default();
builder.execute(&|| false, &progress, embedder_stats.clone())?;
wtxn.commit()?;
// 5.3 Import the documents.
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
let mut wtxn = index.write_txn()?;
let rtxn = index.read_txn()?;
if index_scheduler.no_edition_2024_for_dumps() {
// 6.3 Import the documents.
// 6.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 6.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
&embedder_stats,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
} else {
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
let primary_key = index.primary_key(&rtxn)?;
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer = indexer::DocumentOperation::new();
let embedders = index.embedding_configs().embedding_configs(&rtxn)?;
let embedders = index_scheduler.embedders(uid.clone(), embedders)?;
let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? };
indexer.replace_documents(&mmap)?;
let indexer_config = index_scheduler.indexer_config();
let pool = &indexer_config.thread_pool;
let indexer_alloc = Bump::new();
let (document_changes, mut operation_stats, primary_key) = indexer.into_changes(
&indexer_alloc,
&index,
&rtxn,
primary_key,
&mut new_fields_ids_map,
&|| false, // never stop processing a dump
progress.clone(),
)?;
let operation_stats = operation_stats.pop().unwrap();
if let Some(error) = operation_stats.error {
return Err(error.into());
}
let _congestion = indexer::index(
&mut wtxn,
&index,
pool,
indexer_config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
primary_key,
&document_changes,
embedders,
&|| false, // never stop processing a dump
&progress,
&embedder_stats,
)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 5.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
&embedder_stats,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
wtxn.commit()?;
tracing::info!("All documents successfully imported.");
index_scheduler.refresh_index_stats(&uid)?;
}
// 6. Import the queue
// 7. Import the queue
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
// 6.1. Import the batches
// 7.1. Import the batches
for ret in dump_reader.batches()? {
let batch = ret?;
index_scheduler_dump.register_dumped_batch(batch)?;
}
// 6.2. Import the tasks
// 7.2. Import the tasks
for ret in dump_reader.tasks()? {
let (task, file) = ret?;
index_scheduler_dump.register_dumped_task(task, file)?;

View File

@@ -15,30 +15,33 @@ lazy_static! {
"Meilisearch number of degraded search requests"
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_SEARCH_REQUESTS: IntCounterVec = register_int_counter_vec!(
pub static ref MEILISEARCH_CHAT_SEARCHES_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!(
"meilisearch_chat_search_requests",
"Meilisearch number of search requests performed by the chat route itself"
"meilisearch_chat_searches_total",
"Total number of searches performed by the chat route"
),
&["type"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_chat_prompt_tokens_usage", "Meilisearch Chat Prompt Tokens Usage"),
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_chat_prompt_tokens_total", "Total number of prompt tokens consumed"),
&["workspace", "model"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE: IntCounterVec =
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_TOTAL: IntCounterVec =
register_int_counter_vec!(
opts!(
"meilisearch_chat_completion_tokens_usage",
"Meilisearch Chat Completion Tokens Usage"
"meilisearch_chat_completion_tokens_total",
"Total number of completion tokens consumed"
),
&["workspace", "model"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_chat_total_tokens_usage", "Meilisearch Chat Total Tokens Usage"),
pub static ref MEILISEARCH_CHAT_TOKENS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!(
"meilisearch_chat_tokens_total",
"Total number of tokens consumed (prompt + completion)"
),
&["workspace", "model"]
)
.expect("Can't create a metric");

View File

@@ -68,6 +68,8 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@@ -204,11 +206,13 @@ pub struct Opt {
pub env: String,
/// Called whenever a task finishes so a third party can be notified.
/// See also the dedicated API `/webhooks`.
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
pub task_webhook_url: Option<Url>,
/// The Authorization header to send on the webhook URL whenever
/// a task finishes so a third party can be notified.
/// See also the dedicated API `/webhooks`.
#[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
pub task_webhook_authorization_header: Option<String>,
@@ -759,6 +763,15 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
#[serde(default)]
pub experimental_no_edition_2024_for_settings: bool,
/// Experimental make dump imports use the old document indexer.
///
/// When enabled, Meilisearch will use the old document indexer when importing dumps.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/851>.
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
#[serde(default)]
pub experimental_no_edition_2024_for_dumps: bool,
}
impl IndexerOpts {
@@ -769,6 +782,7 @@ impl IndexerOpts {
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
@@ -788,6 +802,12 @@ impl IndexerOpts {
experimental_no_edition_2024_for_settings.to_string(),
);
}
if experimental_no_edition_2024_for_dumps {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS,
experimental_no_edition_2024_for_dumps.to_string(),
);
}
}
}
@@ -808,6 +828,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
skip_index_budget: other.skip_index_budget,
experimental_no_edition_2024_for_settings: other
.experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
chunk_compression_type: Default::default(),
chunk_compression_level: Default::default(),
documents_chunk_size: Default::default(),

View File

@@ -27,9 +27,10 @@ use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts,
ChatCompletionSource as DbChatCompletionSource, SystemRole,
};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget};
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
use meilisearch_types::{Document, Index};
use serde::Deserialize;
use serde_json::json;
@@ -49,8 +50,8 @@ use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
use crate::metrics::{
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE, MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE,
MEILISEARCH_CHAT_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE,
MEILISEARCH_CHAT_COMPLETION_TOKENS_TOTAL, MEILISEARCH_CHAT_PROMPT_TOKENS_TOTAL,
MEILISEARCH_CHAT_SEARCHES_TOTAL, MEILISEARCH_CHAT_TOKENS_TOTAL,
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
};
use crate::routes::chats::utils::SseEventSender;
@@ -169,6 +170,7 @@ fn setup_search_tool(
let mut index_uids = Vec::new();
let mut function_description = prompts.search_description.clone();
let mut filter_description = prompts.search_filter_param.clone();
index_scheduler.try_for_each_index::<_, ()>(|name, index| {
// Make sure to skip unauthorized indexes
if !filters.is_index_authorized(name) {
@@ -180,16 +182,22 @@ fn setup_search_tool(
let index_description = chat_config.description;
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
index_uids.push(name.to_string());
let facet_distributions = format_facet_distributions(index, &rtxn, 10).unwrap(); // TODO do not unwrap
let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index");
let _ = writeln!(&mut filter_description, "{facet_distributions}");
Ok(())
})?;
tracing::debug!("LLM function description: {function_description}");
tracing::debug!("LLM filter description: {filter_description}");
let tool = ChatCompletionToolArgs::default()
.r#type(ChatCompletionToolType::Function)
.function(
FunctionObjectArgs::default()
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
.description(&function_description)
.description(function_description)
.parameters(json!({
"type": "object",
"properties": {
@@ -203,9 +211,13 @@ fn setup_search_tool(
// "type": ["string", "null"],
"type": "string",
"description": prompts.search_q_param,
},
"filter": {
"type": "string",
"description": filter_description,
}
},
"required": ["index_uid", "q"],
"required": ["index_uid", "q", "filter"],
"additionalProperties": false,
}))
.strict(true)
@@ -247,11 +259,19 @@ async fn process_search_request(
auth_token: &str,
index_uid: String,
q: Option<String>,
filter: Option<String>,
) -> Result<(Index, Vec<Document>, String), ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.static_read_txn()?;
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) };
let mut query = SearchQuery {
q,
filter: filter.map(serde_json::Value::from),
..SearchQuery::from(search_parameters)
};
tracing::debug!("LLM query: {:?}", query);
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
auth_ctrl,
auth_token,
@@ -280,17 +300,26 @@ async fn process_search_request(
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
search_from_kind(index_uid, search_kind, search)
.map(|(search_results, _)| (rtxn, search_results))
.map_err(ResponseError::from)
match search_from_kind(index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
Err(MeilisearchHttpError::Milli {
error: meilisearch_types::milli::Error::UserError(user_error),
index_name: _,
}) => Ok((rtxn, Err(user_error))),
Err(err) => Err(ResponseError::from(err)),
}
})
.await;
permit.drop().await;
let output = output?;
let output = match output? {
Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)),
Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())),
Err(err) => Err(err),
};
let mut documents = Vec::new();
if let Ok((ref rtxn, ref search_result)) = output {
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
MEILISEARCH_CHAT_SEARCHES_TOTAL.with_label_values(&["internal"]).inc();
if search_result.degraded {
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
}
@@ -395,16 +424,19 @@ async fn non_streamed_chat(
for call in meili_calls {
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
)
.await
.map_err(|e| e.to_string()),
Ok(SearchInIndexParameters { index_uid, q, filter }) => {
process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
.map_err(|e| e.to_string())
}
Err(err) => Err(err.to_string()),
};
@@ -564,13 +596,13 @@ async fn run_conversation<C: async_openai::config::Config>(
match result {
Ok(resp) => {
if let Some(usage) = resp.usage.as_ref() {
MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE
MEILISEARCH_CHAT_PROMPT_TOKENS_TOTAL
.with_label_values(&[workspace_uid, &chat_completion.model])
.inc_by(usage.prompt_tokens as u64);
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE
MEILISEARCH_CHAT_COMPLETION_TOKENS_TOTAL
.with_label_values(&[workspace_uid, &chat_completion.model])
.inc_by(usage.completion_tokens as u64);
MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE
MEILISEARCH_CHAT_TOKENS_TOTAL
.with_label_values(&[workspace_uid, &chat_completion.model])
.inc_by(usage.total_tokens as u64);
}
@@ -719,13 +751,14 @@ async fn handle_meili_tools(
let mut error = None;
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request(
Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request(
index_scheduler,
auth_ctrl.clone(),
search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
{
@@ -801,4 +834,42 @@ struct SearchInIndexParameters {
index_uid: String,
/// The query parameter to use.
q: Option<String>,
/// The filter parameter to use.
filter: Option<String>,
}
fn format_facet_distributions(
index: &Index,
rtxn: &RoTxn,
max_values_per_facet: usize,
) -> meilisearch_types::milli::Result<String> {
let universe = index.documents_ids(rtxn)?;
let rules = index.filterable_attributes_rules(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_attributes = fields_ids_map
.names()
.filter(|name| rules.iter().any(|rule| matches!(rule.match_str(name), PatternMatch::Match)))
.map(|name| (name, OrderBy::Count));
let facets_distribution = index
.facets_distribution(rtxn)
.max_values_per_facet(max_values_per_facet)
.candidates(universe)
.facets(filterable_attributes)
.execute()?;
let mut output = String::new();
for (facet_name, entries) in facets_distribution {
let _ = write!(&mut output, "{}: ", facet_name);
let total_entries = entries.len();
for (i, (value, _count)) in entries.into_iter().enumerate() {
let _ = if total_entries.saturating_sub(1) == i {
write!(&mut output, "{value}.")
} else {
write!(&mut output, "{value}, ")
};
}
let _ = writeln!(&mut output);
}
Ok(output)
}

View File

@@ -8,8 +8,8 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT,
DEFAULT_CHAT_SYSTEM_PROMPT,
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT,
DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT,
};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
@@ -84,6 +84,11 @@ async fn patch_settings(
Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_q_param,
},
search_filter_param: match new_prompts.search_filter_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_filter_param,
},
search_index_uid_param: match new_prompts.search_index_uid_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
@@ -252,6 +257,10 @@ pub struct ChatPrompts {
#[schema(value_type = Option<String>, example = json!("This is query parameter..."))]
pub search_q_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchFilterParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is filter parameter..."))]
pub search_filter_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
pub search_index_uid_param: Setting<String>,

View File

@@ -1,3 +1,5 @@
use url::Url;
use crate::analytics::Aggregate;
use crate::routes::export::Export;
@@ -5,6 +7,7 @@ use crate::routes::export::Export;
pub struct ExportAnalytics {
total_received: usize,
has_api_key: bool,
sum_exports_meilisearch_cloud: usize,
sum_index_patterns: usize,
sum_patterns_with_filter: usize,
sum_patterns_with_override_settings: usize,
@@ -13,8 +16,14 @@ pub struct ExportAnalytics {
impl ExportAnalytics {
pub fn from_export(export: &Export) -> Self {
let Export { url: _, api_key, payload_size, indexes } = export;
let Export { url, api_key, payload_size, indexes } = export;
let url = Url::parse(url).ok();
let is_meilisearch_cloud = url.as_ref().and_then(Url::host_str).is_some_and(|host| {
host.ends_with("meilisearch.dev")
|| host.ends_with("meilisearch.com")
|| host.ends_with("meilisearch.io")
});
let has_api_key = api_key.is_some();
let index_patterns_count = indexes.as_ref().map_or(0, |indexes| indexes.len());
let patterns_with_filter_count = indexes.as_ref().map_or(0, |indexes| {
@@ -33,6 +42,7 @@ impl ExportAnalytics {
Self {
total_received: 1,
has_api_key,
sum_exports_meilisearch_cloud: is_meilisearch_cloud as usize,
sum_index_patterns: index_patterns_count,
sum_patterns_with_filter: patterns_with_filter_count,
sum_patterns_with_override_settings: patterns_with_override_settings_count,
@@ -49,6 +59,7 @@ impl Aggregate for ExportAnalytics {
fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
self.total_received += other.total_received;
self.has_api_key |= other.has_api_key;
self.sum_exports_meilisearch_cloud += other.sum_exports_meilisearch_cloud;
self.sum_index_patterns += other.sum_index_patterns;
self.sum_patterns_with_filter += other.sum_patterns_with_filter;
self.sum_patterns_with_override_settings += other.sum_patterns_with_override_settings;
@@ -63,6 +74,12 @@ impl Aggregate for ExportAnalytics {
Some(self.payload_sizes.iter().sum::<u64>() / self.payload_sizes.len() as u64)
};
let avg_exports_meilisearch_cloud = if self.total_received == 0 {
None
} else {
Some(self.sum_exports_meilisearch_cloud as f64 / self.total_received as f64)
};
let avg_index_patterns = if self.total_received == 0 {
None
} else {
@@ -84,6 +101,7 @@ impl Aggregate for ExportAnalytics {
serde_json::json!({
"total_received": self.total_received,
"has_api_key": self.has_api_key,
"avg_exports_meilisearch_cloud": avg_exports_meilisearch_cloud,
"avg_index_patterns": avg_index_patterns,
"avg_patterns_with_filter": avg_patterns_with_filter,
"avg_patterns_with_override_settings": avg_patterns_with_override_settings,

View File

@@ -1,6 +1,7 @@
use std::collections::HashSet;
use std::io::{ErrorKind, Seek as _};
use std::marker::PhantomData;
use std::str::FromStr;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
@@ -17,9 +18,11 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::documents::sort::recursive_sort;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::milli::{AscDesc, DocumentId};
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
@@ -42,6 +45,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
};
@@ -135,6 +139,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
per_document_id: bool,
// if a filter was used
per_filter: bool,
// if documents were sorted
sort: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
@@ -151,39 +157,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
marker: std::marker::PhantomData<Method>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
}
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
pub fn from_query(query: &DocumentFetchKind) -> Self {
let (limit, offset, retrieve_vectors) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
(*limit, *offset, *retrieve_vectors)
}
};
let ids = match query {
DocumentFetchKind::Normal { ids, .. } => *ids,
DocumentFetchKind::PerDocumentId { .. } => 0,
};
Self {
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
retrieve_vectors,
max_document_ids: ids,
marker: PhantomData,
}
}
}
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
@@ -193,6 +166,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
Box::new(Self {
per_document_id: self.per_document_id | new.per_document_id,
per_filter: self.per_filter | new.per_filter,
sort: self.sort | new.sort,
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset),
@@ -276,6 +250,7 @@ pub async fn get_document(
retrieve_vectors: param_retrieve_vectors.0,
per_document_id: true,
per_filter: false,
sort: false,
max_limit: 0,
max_offset: 0,
max_document_ids: 0,
@@ -406,6 +381,8 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentSort>)]
sort: Option<String>,
}
#[derive(Debug, Deserr, ToSchema)]
@@ -430,6 +407,9 @@ pub struct BrowseQuery {
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
#[schema(default, value_type = Option<Vec<String>>, example = json!(["title:asc", "rating:desc"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentSort>)]
sort: Option<Vec<String>>,
}
/// Get documents with POST
@@ -495,6 +475,7 @@ pub async fn documents_by_query_post(
analytics.publish(
DocumentsFetchAggregator::<DocumentsPOST> {
per_filter: body.filter.is_some(),
sort: body.sort.is_some(),
retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit,
max_offset: body.offset,
@@ -571,7 +552,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } =
params.into_inner();
let filter = match filter {
@@ -582,20 +563,20 @@ pub async fn get_documents(
None => None,
};
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery {
offset: offset.0,
limit: limit.0,
fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0,
filter,
ids,
ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()),
sort: sort.map(|attr| fix_sort_query_parameters(&attr)),
};
analytics.publish(
DocumentsFetchAggregator::<DocumentsGET> {
per_filter: query.filter.is_some(),
sort: query.sort.is_some(),
retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit,
max_offset: query.offset,
@@ -615,7 +596,7 @@ fn documents_by_query(
query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
@@ -633,6 +614,18 @@ fn documents_by_query(
None
};
let sort_criteria = if let Some(sort) = &sort {
let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::SortError::from(asc_desc_error).into_document_error().into())
}
};
Some(sorts)
} else {
None
};
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(
&index,
@@ -643,6 +636,7 @@ fn documents_by_query(
fields,
retrieve_vectors,
index_scheduler.features(),
sort_criteria,
)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@@ -1467,9 +1461,13 @@ fn some_documents<'a, 't: 'a>(
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
for (
name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ },
) in index.embeddings(rtxn, key)?
{
let embeddings =
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
@@ -1494,6 +1492,7 @@ fn retrieve_documents<S: AsRef<str>>(
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
sort_criteria: Option<Vec<AscDesc>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
@@ -1526,15 +1525,32 @@ fn retrieve_documents<S: AsRef<str>>(
})?
}
let (it, number_of_documents) = {
let (it, number_of_documents) = if let Some(sort) = sort_criteria {
let number_of_documents = candidates.len();
let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
let iter = facet_sort.iter()?;
let mut documents = Vec::with_capacity(limit);
for result in iter.skip(offset).take(limit) {
documents.push(result?);
}
(
itertools::Either::Left(some_documents(
index,
&rtxn,
documents.into_iter(),
retrieve_vectors,
)?),
number_of_documents,
)
} else {
let number_of_documents = candidates.len();
(
some_documents(
itertools::Either::Right(some_documents(
index,
&rtxn,
candidates.into_iter().skip(offset).take(limit),
retrieve_vectors,
)?,
)?),
number_of_documents,
)
};

View File

@@ -511,7 +511,7 @@ make_setting_routes!(
},
{
route: "/chat",
update_verb: put,
update_verb: patch,
value_type: ChatSettings,
err_type: meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsIndexChat,

View File

@@ -41,6 +41,7 @@ use crate::routes::indexes::IndexView;
use crate::routes::multi_search::SearchResults;
use crate::routes::network::{Network, Remote};
use crate::routes::swap_indexes::SwapIndexesPayload;
use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata};
use crate::search::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
@@ -70,6 +71,7 @@ mod swap_indexes;
pub mod tasks;
#[cfg(test)]
mod tasks_test;
mod webhooks;
#[derive(OpenApi)]
#[openapi(
@@ -89,6 +91,7 @@ mod tasks_test;
(path = "/experimental-features", api = features::ExperimentalFeaturesApi),
(path = "/export", api = export::ExportApi),
(path = "/network", api = network::NetworkApi),
(path = "/webhooks", api = webhooks::WebhooksApi),
),
paths(get_health, get_version, get_stats),
tags(
@@ -99,7 +102,7 @@ mod tasks_test;
url = "/",
description = "Local server",
)),
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export))
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadata))
)]
pub struct MeilisearchApi;
@@ -120,7 +123,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/experimental-features").configure(features::configure))
.service(web::scope("/network").configure(network::configure))
.service(web::scope("/export").configure(export::configure))
.service(web::scope("/chats").configure(chats::configure));
.service(web::scope("/chats").configure(chats::configure))
.service(web::scope("/webhooks").configure(webhooks::configure));
#[cfg(feature = "swagger")]
{

View File

@@ -51,7 +51,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
get,
path = "",
tag = "Network",
security(("Bearer" = ["network.get", "network.*", "*"])),
security(("Bearer" = ["network.get", "*"])),
responses(
(status = OK, description = "Known nodes are returned", body = Network, content_type = "application/json", example = json!(
{
@@ -168,7 +168,7 @@ impl Aggregate for PatchNetworkAnalytics {
path = "",
tag = "Network",
request_body = Network,
security(("Bearer" = ["network.update", "network.*", "*"])),
security(("Bearer" = ["network.update", "*"])),
responses(
(status = OK, description = "New network state is returned", body = Network, content_type = "application/json", example = json!(
{

View File

@@ -0,0 +1,474 @@
use std::collections::BTreeMap;
use std::str::FromStr;
use actix_http::header::{
HeaderName, HeaderValue, InvalidHeaderName as ActixInvalidHeaderName,
InvalidHeaderValue as ActixInvalidHeaderValue,
};
use actix_web::web::{self, Data, Path};
use actix_web::{HttpRequest, HttpResponse};
use core::convert::Infallible;
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError};
use meilisearch_types::error::deserr_codes::{
BadRequest, InvalidWebhookHeaders, InvalidWebhookUrl,
};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::webhooks::Webhook;
use serde::Serialize;
use tracing::debug;
use url::Url;
use utoipa::{OpenApi, ToSchema};
use uuid::Uuid;
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use WebhooksError::*;
#[derive(OpenApi)]
#[openapi(
paths(get_webhooks, get_webhook, post_webhook, patch_webhook, delete_webhook),
tags((
name = "Webhooks",
description = "The `/webhooks` route allows you to register endpoints to be called once tasks are processed.",
external_docs(url = "https://www.meilisearch.com/docs/reference/api/webhooks"),
)),
)]
pub struct WebhooksApi;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(get_webhooks))
.route(web::post().to(SeqHandler(post_webhook))),
)
.service(
web::resource("/{uuid}")
.route(web::get().to(get_webhook))
.route(web::patch().to(SeqHandler(patch_webhook)))
.route(web::delete().to(SeqHandler(delete_webhook))),
);
}
#[derive(Debug, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_webhook)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub(super) struct WebhookSettings {
#[schema(value_type = Option<String>, example = "https://your.site/on-tasks-completed")]
#[deserr(default, error = DeserrJsonError<InvalidWebhookUrl>)]
#[serde(default)]
url: Setting<String>,
#[schema(value_type = Option<BTreeMap<String, String>>, example = json!({"Authorization":"Bearer a-secret-token"}))]
#[deserr(default, error = DeserrJsonError<InvalidWebhookHeaders>)]
#[serde(default)]
headers: Setting<BTreeMap<String, Setting<String>>>,
}
fn deny_immutable_fields_webhook(
field: &str,
accepted: &[&str],
location: ValuePointerRef,
) -> DeserrJsonError {
match field {
"uuid" => immutable_field_error(field, accepted, Code::ImmutableWebhookUuid),
"isEditable" => immutable_field_error(field, accepted, Code::ImmutableWebhookIsEditable),
_ => deserr::take_cf_content(DeserrJsonError::<BadRequest>::error::<Infallible>(
None,
deserr::ErrorKind::UnknownKey { key: field, accepted },
location,
)),
}
}
#[derive(Debug, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub(super) struct WebhookWithMetadata {
uuid: Uuid,
is_editable: bool,
#[schema(value_type = WebhookSettings)]
#[serde(flatten)]
webhook: Webhook,
}
impl WebhookWithMetadata {
pub fn from(uuid: Uuid, webhook: Webhook) -> Self {
Self { uuid, is_editable: uuid != Uuid::nil(), webhook }
}
}
#[derive(Debug, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub(super) struct WebhookResults {
results: Vec<WebhookWithMetadata>,
}
#[utoipa::path(
get,
path = "",
tag = "Webhooks",
security(("Bearer" = ["webhooks.get", "webhooks.*", "*.get", "*"])),
responses(
(status = OK, description = "Webhooks are returned", body = WebhookResults, content_type = "application/json", example = json!({
"results": [
{
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://your.site/on-tasks-completed",
"headers": {
"Authorization": "Bearer a-secret-token"
},
"isEditable": true
},
{
"uuid": "550e8400-e29b-41d4-a716-446655440001",
"url": "https://another.site/on-tasks-completed",
"isEditable": true
}
]
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
async fn get_webhooks(
index_scheduler: GuardedData<ActionPolicy<{ actions::WEBHOOKS_GET }>, Data<IndexScheduler>>,
) -> Result<HttpResponse, ResponseError> {
let webhooks = index_scheduler.webhooks_view();
let results = webhooks
.webhooks
.into_iter()
.map(|(uuid, webhook)| WebhookWithMetadata::from(uuid, webhook))
.collect::<Vec<_>>();
let results = WebhookResults { results };
debug!(returns = ?results, "Get webhooks");
Ok(HttpResponse::Ok().json(results))
}
#[derive(Serialize, Default)]
pub struct PatchWebhooksAnalytics;
impl Aggregate for PatchWebhooksAnalytics {
fn event_name(&self) -> &'static str {
"Webhooks Updated"
}
fn aggregate(self: Box<Self>, _new: Box<Self>) -> Box<Self> {
self
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
#[derive(Serialize, Default)]
pub struct PostWebhooksAnalytics;
impl Aggregate for PostWebhooksAnalytics {
fn event_name(&self) -> &'static str {
"Webhooks Created"
}
fn aggregate(self: Box<Self>, _new: Box<Self>) -> Box<Self> {
self
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
#[derive(Debug, thiserror::Error)]
enum WebhooksError {
#[error("The URL for the webhook `{0}` is missing.")]
MissingUrl(Uuid),
#[error("Defining too many webhooks would crush the server. Please limit the number of webhooks to 20. You may use a third-party proxy server to dispatch events to more than 20 endpoints.")]
TooManyWebhooks,
#[error("Too many headers for the webhook `{0}`. Please limit the number of headers to 200. Hint: To remove an already defined header set its value to `null`")]
TooManyHeaders(Uuid),
#[error("Webhook `{0}` is immutable. The webhook defined from the command line cannot be modified using the API.")]
ImmutableWebhook(Uuid),
#[error("Webhook `{0}` not found.")]
WebhookNotFound(Uuid),
#[error("Invalid header name `{0}`: {1}")]
InvalidHeaderName(String, ActixInvalidHeaderName),
#[error("Invalid header value `{0}`: {1}")]
InvalidHeaderValue(String, ActixInvalidHeaderValue),
#[error("Invalid URL `{0}`: {1}")]
InvalidUrl(String, url::ParseError),
#[error("Invalid UUID: {0}")]
InvalidUuid(uuid::Error),
}
impl ErrorCode for WebhooksError {
fn error_code(&self) -> meilisearch_types::error::Code {
match self {
MissingUrl(_) => meilisearch_types::error::Code::InvalidWebhookUrl,
TooManyWebhooks => meilisearch_types::error::Code::InvalidWebhooks,
TooManyHeaders(_) => meilisearch_types::error::Code::InvalidWebhookHeaders,
ImmutableWebhook(_) => meilisearch_types::error::Code::ImmutableWebhook,
WebhookNotFound(_) => meilisearch_types::error::Code::WebhookNotFound,
InvalidHeaderName(_, _) => meilisearch_types::error::Code::InvalidWebhookHeaders,
InvalidHeaderValue(_, _) => meilisearch_types::error::Code::InvalidWebhookHeaders,
InvalidUrl(_, _) => meilisearch_types::error::Code::InvalidWebhookUrl,
InvalidUuid(_) => meilisearch_types::error::Code::InvalidWebhookUuid,
}
}
}
fn patch_webhook_inner(
uuid: &Uuid,
old_webhook: Webhook,
new_webhook: WebhookSettings,
) -> Result<Webhook, WebhooksError> {
let Webhook { url: old_url, mut headers } = old_webhook;
let url = match new_webhook.url {
Setting::Set(url) => url,
Setting::NotSet => old_url,
Setting::Reset => return Err(MissingUrl(uuid.to_owned())),
};
match new_webhook.headers {
Setting::Set(new_headers) => {
for (name, value) in new_headers {
match value {
Setting::Set(value) => {
headers.insert(name, value);
}
Setting::NotSet => continue,
Setting::Reset => {
headers.remove(&name);
continue;
}
}
}
}
Setting::Reset => headers.clear(),
Setting::NotSet => (),
};
if headers.len() > 200 {
return Err(TooManyHeaders(uuid.to_owned()));
}
Ok(Webhook { url, headers })
}
fn check_changed(uuid: Uuid, webhook: &Webhook) -> Result<(), WebhooksError> {
if uuid.is_nil() {
return Err(ImmutableWebhook(uuid));
}
if webhook.url.is_empty() {
return Err(MissingUrl(uuid));
}
if webhook.headers.len() > 200 {
return Err(TooManyHeaders(uuid));
}
for (header, value) in &webhook.headers {
HeaderName::from_bytes(header.as_bytes())
.map_err(|e| InvalidHeaderName(header.to_owned(), e))?;
HeaderValue::from_str(value).map_err(|e| InvalidHeaderValue(header.to_owned(), e))?;
}
if let Err(e) = Url::parse(&webhook.url) {
return Err(InvalidUrl(webhook.url.to_owned(), e));
}
Ok(())
}
#[utoipa::path(
get,
path = "/{uuid}",
tag = "Webhooks",
security(("Bearer" = ["webhooks.get", "webhooks.*", "*.get", "*"])),
responses(
(status = 200, description = "Webhook found", body = WebhookWithMetadata, content_type = "application/json", example = json!({
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://your.site/on-tasks-completed",
"headers": {
"Authorization": "Bearer a-secret"
},
"isEditable": true
})),
(status = 404, description = "Webhook not found", body = ResponseError, content_type = "application/json"),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json"),
),
params(
("uuid" = Uuid, Path, description = "The universally unique identifier of the webhook")
)
)]
async fn get_webhook(
index_scheduler: GuardedData<ActionPolicy<{ actions::WEBHOOKS_GET }>, Data<IndexScheduler>>,
uuid: Path<String>,
) -> Result<HttpResponse, ResponseError> {
let uuid = Uuid::from_str(&uuid.into_inner()).map_err(InvalidUuid)?;
let mut webhooks = index_scheduler.webhooks_view();
let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
let webhook = WebhookWithMetadata::from(uuid, webhook);
debug!(returns = ?webhook, "Get webhook");
Ok(HttpResponse::Ok().json(webhook))
}
#[utoipa::path(
post,
path = "",
tag = "Webhooks",
request_body = WebhookSettings,
security(("Bearer" = ["webhooks.create", "webhooks.*", "*"])),
responses(
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://your.site/on-tasks-completed",
"headers": {
"Authorization": "Bearer a-secret-token"
},
"isEditable": true
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json"),
(status = 400, description = "Bad request", body = ResponseError, content_type = "application/json"),
)
)]
async fn post_webhook(
index_scheduler: GuardedData<ActionPolicy<{ actions::WEBHOOKS_CREATE }>, Data<IndexScheduler>>,
webhook_settings: AwebJson<WebhookSettings, DeserrJsonError>,
req: HttpRequest,
analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let webhook_settings = webhook_settings.into_inner();
debug!(parameters = ?webhook_settings, "Post webhook");
let uuid = Uuid::new_v4();
if webhook_settings.headers.as_ref().set().is_some_and(|h| h.len() > 200) {
return Err(TooManyHeaders(uuid).into());
}
let mut webhooks = index_scheduler.retrieve_runtime_webhooks();
if webhooks.len() >= 20 {
return Err(TooManyWebhooks.into());
}
let webhook = Webhook {
url: webhook_settings.url.set().ok_or(MissingUrl(uuid))?,
headers: webhook_settings
.headers
.set()
.map(|h| h.into_iter().map(|(k, v)| (k, v.set().unwrap_or_default())).collect())
.unwrap_or_default(),
};
check_changed(uuid, &webhook)?;
webhooks.insert(uuid, webhook.clone());
index_scheduler.update_runtime_webhooks(webhooks)?;
analytics.publish(PostWebhooksAnalytics, &req);
let response = WebhookWithMetadata::from(uuid, webhook);
debug!(returns = ?response, "Post webhook");
Ok(HttpResponse::Created().json(response))
}
#[utoipa::path(
patch,
path = "/{uuid}",
tag = "Webhooks",
request_body = WebhookSettings,
security(("Bearer" = ["webhooks.update", "webhooks.*", "*"])),
responses(
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://your.site/on-tasks-completed",
"headers": {
"Authorization": "Bearer a-secret-token"
},
"isEditable": true
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json"),
(status = 400, description = "Bad request", body = ResponseError, content_type = "application/json"),
),
params(
("uuid" = Uuid, Path, description = "The universally unique identifier of the webhook")
)
)]
async fn patch_webhook(
index_scheduler: GuardedData<ActionPolicy<{ actions::WEBHOOKS_UPDATE }>, Data<IndexScheduler>>,
uuid: Path<String>,
webhook_settings: AwebJson<WebhookSettings, DeserrJsonError>,
req: HttpRequest,
analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let uuid = Uuid::from_str(&uuid.into_inner()).map_err(InvalidUuid)?;
let webhook_settings = webhook_settings.into_inner();
debug!(parameters = ?(uuid, &webhook_settings), "Patch webhook");
if uuid.is_nil() {
return Err(ImmutableWebhook(uuid).into());
}
let mut webhooks = index_scheduler.retrieve_runtime_webhooks();
let old_webhook = webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
let webhook = patch_webhook_inner(&uuid, old_webhook, webhook_settings)?;
check_changed(uuid, &webhook)?;
webhooks.insert(uuid, webhook.clone());
index_scheduler.update_runtime_webhooks(webhooks)?;
analytics.publish(PatchWebhooksAnalytics, &req);
let response = WebhookWithMetadata::from(uuid, webhook);
debug!(returns = ?response, "Patch webhook");
Ok(HttpResponse::Ok().json(response))
}
#[utoipa::path(
delete,
path = "/{uuid}",
tag = "Webhooks",
security(("Bearer" = ["webhooks.delete", "webhooks.*", "*"])),
responses(
(status = 204, description = "Webhook deleted successfully"),
(status = 404, description = "Webhook not found", body = ResponseError, content_type = "application/json"),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json"),
),
params(
("uuid" = Uuid, Path, description = "The universally unique identifier of the webhook")
)
)]
async fn delete_webhook(
index_scheduler: GuardedData<ActionPolicy<{ actions::WEBHOOKS_DELETE }>, Data<IndexScheduler>>,
uuid: Path<String>,
) -> Result<HttpResponse, ResponseError> {
let uuid = Uuid::from_str(&uuid.into_inner()).map_err(InvalidUuid)?;
debug!(parameters = ?uuid, "Delete webhook");
if uuid.is_nil() {
return Err(ImmutableWebhook(uuid).into());
}
let mut webhooks = index_scheduler.retrieve_runtime_webhooks();
webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
index_scheduler.update_runtime_webhooks(webhooks)?;
debug!(returns = "No Content", "Delete webhook");
Ok(HttpResponse::NoContent().finish())
}

View File

@@ -745,10 +745,9 @@ impl SearchByIndex {
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(milli::SortError::from(
asc_desc_error,
))
.into())
return Err(milli::SortError::from(asc_desc_error)
.into_search_error()
.into())
}
};
Some(sorts)

View File

@@ -16,7 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli::index::{self, SearchParameters};
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
@@ -1051,6 +1051,7 @@ pub fn prepare_search<'t>(
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
search.max_total_hits(Some(max_total_hits));
search.scoring_strategy(
if query.show_ranking_score
|| query.show_ranking_score_details
@@ -1091,7 +1092,7 @@ pub fn prepare_search<'t>(
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(SortError::from(asc_desc_error)).into())
return Err(SortError::from(asc_desc_error).into_search_error().into())
}
};
@@ -1527,8 +1528,11 @@ impl<'a> HitMaker<'a> {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, (vector, regenerate)) in self.index.embeddings(self.rtxn, id)? {
let embeddings = ExplicitVectors { embeddings: Some(vector.into()), regenerate };
for (name, EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ }) in
self.index.embeddings(self.rtxn, id)?
{
let embeddings =
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,

View File

@@ -419,14 +419,14 @@ async fn error_add_api_key_invalid_parameters_actions() {
let (response, code) = server.add_api_key(content).await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
{
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
}
"###);
"#);
}
#[actix_rt::test]
@@ -790,7 +790,7 @@ async fn list_api_keys() {
meili_snap::snapshot!(code, @"201 Created");
let (response, code) = server.list_api_keys("").await;
meili_snap::snapshot!(meili_snap::json_string!(response, { ".results[].createdAt" => "[ignored]", ".results[].updatedAt" => "[ignored]", ".results[].uid" => "[ignored]", ".results[].key" => "[ignored]" }), @r###"
meili_snap::snapshot!(meili_snap::json_string!(response, { ".results[].createdAt" => "[ignored]", ".results[].updatedAt" => "[ignored]", ".results[].uid" => "[ignored]", ".results[].key" => "[ignored]" }), @r#"
{
"results": [
{
@@ -850,6 +850,22 @@ async fn list_api_keys() {
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Read-Only Admin API Key",
"description": "Use it to read information across the whole database. Caution! Do not expose this key on a public frontend",
"key": "[ignored]",
"uid": "[ignored]",
"actions": [
"*.get",
"keys.get"
],
"indexes": [
"*"
],
"expiresAt": null,
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Chat API Key",
"description": "Use it to chat and search from the frontend",
@@ -869,9 +885,9 @@ async fn list_api_keys() {
],
"offset": 0,
"limit": 20,
"total": 4
"total": 5
}
"###);
"#);
meili_snap::snapshot!(code, @"200 OK");
}

View File

@@ -304,7 +304,7 @@ async fn access_authorized_stats_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@@ -344,7 +344,7 @@ async fn access_authorized_stats_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@@ -384,7 +384,7 @@ async fn list_authorized_indexes_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@@ -425,7 +425,7 @@ async fn list_authorized_indexes_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@@ -507,10 +507,10 @@ async fn access_authorized_index_patterns() {
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
// refer to products_1 with a modified api key.
let index_1 = server.index("products_1");
index_1.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index_1.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -578,19 +578,19 @@ async fn raise_error_non_authorized_index_patterns() {
assert_eq!(202, code, "{:?}", &response);
let task2_id = response["taskUid"].as_u64().unwrap();
// Adding document to test index. Should Fail with 403 -- invalid_api_key
// Adding a document to test index. Should Fail with 403 -- invalid_api_key
let (response, code) = test_index.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
// refer to products_1 with a modified api key.
let product_1_index = server.index("products_1");
// refer to products_2 with modified api key.
let product_2_index = server.index("products_2");
// refer to products_2 with a modified api key.
// let product_2_index = server.index("products_2");
product_1_index.wait_task(task1_id).await;
product_2_index.wait_task(task2_id).await;
server.wait_task(task1_id).await;
server.wait_task(task2_id).await;
let (response, code) = product_1_index.get_task(task1_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -603,7 +603,7 @@ async fn raise_error_non_authorized_index_patterns() {
#[actix_rt::test]
async fn pattern_indexes() {
// Create server with master key
// Create a server with master key
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
@@ -650,7 +650,7 @@ async fn list_authorized_tasks_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@@ -690,7 +690,7 @@ async fn list_authorized_tasks_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@@ -757,7 +757,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = index.wait_task(task_id).await;
let response = server.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@@ -768,7 +768,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = index.wait_task(task_id).await;
let response = server.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@@ -778,7 +778,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = index.wait_task(task_id).await;
let response = server.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@@ -830,7 +830,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -844,7 +844,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -856,7 +856,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -911,7 +911,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -929,7 +929,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -949,7 +949,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);

View File

@@ -91,14 +91,14 @@ async fn create_api_key_bad_actions() {
// can't parse
let (response, code) = server.add_api_key(json!({ "actions": ["doggo"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(json_string!(response), @r#"
{
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
}
"###);
"#);
}
#[actix_rt::test]

View File

@@ -100,11 +100,11 @@ macro_rules! compute_authorized_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task1,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task1.uid()).await.succeeded();
server.wait_task(task1.uid()).await.succeeded();
let (task2,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task2.uid()).await.succeeded();
server.wait_task(task2.uid()).await.succeeded();
drop(index);
for key_content in ACCEPTED_KEYS.iter() {
@@ -147,7 +147,7 @@ macro_rules! compute_forbidden_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task, _status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
for key_content in $parent_keys.iter() {

View File

@@ -268,21 +268,21 @@ macro_rules! compute_authorized_single_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (add_task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(add_task.uid()).await.succeeded();
server.wait_task(add_task.uid()).await.succeeded();
let (update_task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(update_task.uid()).await.succeeded();
server.wait_task(update_task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (add_task2,_status_code) = index.add_documents(documents, None).await;
index.wait_task(add_task2.uid()).await.succeeded();
server.wait_task(add_task2.uid()).await.succeeded();
let (update_task2,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(update_task2.uid()).await.succeeded();
server.wait_task(update_task2.uid()).await.succeeded();
drop(index);
@@ -339,21 +339,21 @@ macro_rules! compute_authorized_multiple_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
@@ -423,21 +423,21 @@ macro_rules! compute_forbidden_single_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");
@@ -499,21 +499,21 @@ macro_rules! compute_forbidden_multiple_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");

File diff suppressed because it is too large Load Diff

View File

@@ -1,15 +1,13 @@
use std::fmt::Write;
use std::marker::PhantomData;
use std::panic::{catch_unwind, resume_unwind, UnwindSafe};
use std::time::Duration;
use actix_web::http::StatusCode;
use tokio::time::sleep;
use urlencoding::encode as urlencode;
use super::encoder::Encoder;
use super::service::Service;
use super::{Owned, Shared, Value};
use super::{Owned, Server, Shared, Value};
use crate::json;
pub struct Index<'a, State = Owned> {
@@ -33,7 +31,7 @@ impl<'a> Index<'a, Owned> {
Index { uid: self.uid.clone(), service: self.service, encoder, marker: PhantomData }
}
pub async fn load_test_set(&self) -> u64 {
pub async fn load_test_set<State>(&self, waiter: &Server<State>) -> u64 {
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
let (response, code) = self
.service
@@ -44,12 +42,12 @@ impl<'a> Index<'a, Owned> {
)
.await;
assert_eq!(code, 202);
let update_id = response["taskUid"].as_i64().unwrap();
self.wait_task(update_id as u64).await;
update_id as u64
let update_id = response["taskUid"].as_u64().unwrap();
waiter.wait_task(update_id).await;
update_id
}
pub async fn load_test_set_ndjson(&self) -> u64 {
pub async fn load_test_set_ndjson<State>(&self, waiter: &Server<State>) -> u64 {
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
let (response, code) = self
.service
@@ -60,9 +58,9 @@ impl<'a> Index<'a, Owned> {
)
.await;
assert_eq!(code, 202);
let update_id = response["taskUid"].as_i64().unwrap();
self.wait_task(update_id as u64).await;
update_id as u64
let update_id = response["taskUid"].as_u64().unwrap();
waiter.wait_task(update_id).await;
update_id
}
pub async fn create(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
@@ -251,6 +249,11 @@ impl<'a> Index<'a, Owned> {
self.service.put_encoded(url, settings, self.encoder).await
}
pub async fn update_settings_chat(&self, settings: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/settings/chat", urlencode(self.uid.as_ref()));
self.service.patch_encoded(url, settings, self.encoder).await
}
pub async fn delete_settings(&self) -> (Value, StatusCode) {
let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref()));
self.service.delete(url).await
@@ -267,10 +270,14 @@ impl Index<'_, Shared> {
/// You cannot modify the content of a shared index, thus the delete_document_by_filter call
/// must fail. If the task successfully enqueue itself, we'll wait for the task to finishes,
/// and if it succeed the function will panic.
pub async fn delete_document_by_filter_fail(&self, body: Value) -> (Value, StatusCode) {
pub async fn delete_document_by_filter_fail<State>(
&self,
body: Value,
waiter: &Server<State>,
) -> (Value, StatusCode) {
let (mut task, code) = self._delete_document_by_filter(body).await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
task = waiter.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`delete_document_by_filter_fail` succeeded: {}",
@@ -281,10 +288,10 @@ impl Index<'_, Shared> {
(task, code)
}
pub async fn delete_index_fail(&self) -> (Value, StatusCode) {
pub async fn delete_index_fail<State>(&self, waiter: &Server<State>) -> (Value, StatusCode) {
let (mut task, code) = self._delete().await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
task = waiter.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`delete_index_fail` succeeded: {}",
@@ -295,10 +302,14 @@ impl Index<'_, Shared> {
(task, code)
}
pub async fn update_index_fail(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
pub async fn update_index_fail<State>(
&self,
primary_key: Option<&str>,
waiter: &Server<State>,
) -> (Value, StatusCode) {
let (mut task, code) = self._update(primary_key).await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
task = waiter.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`update_index_fail` succeeded: {}",
@@ -364,23 +375,6 @@ impl<State> Index<'_, State> {
self.service.delete(url).await
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);
for _ in 0..100 {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;
}
// wait 0.5 second.
sleep(Duration::from_millis(500)).await;
}
panic!("Timeout waiting for update id");
}
pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) {
let url = format!("/tasks/{}", update_id);
self.service.get(url).await
@@ -562,5 +556,7 @@ pub struct GetAllDocumentsOptions {
pub offset: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Vec<&'static str>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sort: Option<Vec<&'static str>>,
pub retrieve_vectors: bool,
}

View File

@@ -3,8 +3,10 @@ pub mod index;
pub mod server;
pub mod service;
use std::collections::BTreeMap;
use std::fmt::{self, Display};
use actix_http::StatusCode;
#[allow(unused)]
pub use index::GetAllDocumentsOptions;
use meili_snap::json_string;
@@ -13,6 +15,8 @@ use serde::{Deserialize, Serialize};
#[allow(unused)]
pub use server::{default_settings, Server};
use tokio::sync::OnceCell;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
use crate::common::index::Index;
@@ -38,6 +42,15 @@ impl Value {
self["uid"].as_u64().is_some() || self["taskUid"].as_u64().is_some()
}
#[track_caller]
pub fn batch_uid(&self) -> u32 {
if let Some(batch_uid) = self["batchUid"].as_u64() {
batch_uid as u32
} else {
panic!("Didn't find `batchUid` in: {self}");
}
}
/// Return `true` if the `status` field is set to `succeeded`.
/// Panic if the `status` field doesn't exists.
#[track_caller]
@@ -181,7 +194,7 @@ pub async fn shared_empty_index() -> &'static Index<'static, Shared> {
let server = Server::new_shared();
let index = server._index("EMPTY_INDEX").to_shared();
let (response, _code) = index._create(None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
})
.await
@@ -229,13 +242,13 @@ pub async fn shared_index_with_documents() -> &'static Index<'static, Shared> {
let index = server._index("SHARED_DOCUMENTS").to_shared();
let documents = DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["id", "title"], "sortableAttributes": ["id", "title"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
}).await
}
@@ -272,13 +285,13 @@ pub async fn shared_index_with_score_documents() -> &'static Index<'static, Shar
let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared();
let documents = SCORE_DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["id", "title"], "sortableAttributes": ["id", "title"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
}).await
}
@@ -349,13 +362,13 @@ pub async fn shared_index_with_nested_documents() -> &'static Index<'static, Sha
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
let documents = NESTED_DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["father", "doggos", "cattos"], "sortableAttributes": ["doggos"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
}).await
}
@@ -449,7 +462,7 @@ pub async fn shared_index_with_test_set() -> &'static Index<'static, Shared> {
)
.await;
assert_eq!(code, 202);
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
})
.await
@@ -496,15 +509,178 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
let server = Server::new_shared();
let index = server._index("SHARED_GEO_DOCUMENTS").to_shared();
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["_geo"], "sortableAttributes": ["_geo"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
})
.await
}
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
let (server, uid) = INDEX
.get_or_init(|| async {
let (server, uid, _) = init_fragments_index().await;
(server.into_shared(), uid)
})
.await;
server._index(uid).to_shared()
}
async fn fragment_mock_server() -> String {
let text_to_embedding: BTreeMap<_, _> = vec![
("kefir", [0.5, -0.5, 0.0]),
("intel", [1.0, 1.0, 0.0]),
("dustin", [-0.5, 0.5, 0.0]),
("bulldog", [0.0, 0.0, 1.0]),
("labrador", [0.0, 0.0, -1.0]),
("{{ doc.", [-9999.0, -9999.0, -9999.0]), // If a template didn't render
]
.into_iter()
.collect();
let mock_server = Box::leak(Box::new(MockServer::start().await));
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
let text = String::from_utf8_lossy(&req.body).to_string();
let mut data = [0.0, 0.0, 0.0];
for (inner_text, inner_data) in &text_to_embedding {
if text.contains(inner_text) {
for (i, &value) in inner_data.iter().enumerate() {
data[i] += value;
}
}
}
ResponseTemplate::new(200).set_body_json(json!({ "data": data }))
})
.mount(mock_server)
.await;
mock_server.uri()
}
pub async fn init_fragments_index() -> (Server<Owned>, String, crate::common::Value) {
let url = fragment_mock_server().await;
let server = Server::new().await;
let index = server.unique_index();
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
assert_eq!(code, StatusCode::OK);
// Configure the index to use our mock embedder
let settings = json!({
"embedders": {
"rest": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"indexingFragments": {
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
"basic": {"value": "{{ doc.name }} is a dog"},
},
"searchFragments": {
"justBreed": {"value": "It's a {{ media.breed }}"},
"justName": {"value": "{{ media.name }} is a dog"},
"query": {"value": "Some pre-prompt for query {{ q }}"},
}
},
},
});
let (response, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, StatusCode::ACCEPTED);
server.wait_task(response.uid()).await.succeeded();
// Send documents
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel", "breed": "labrador"},
{"id": 3, "name": "dustin", "breed": "bulldog"},
]);
let (value, code) = index.add_documents(documents, None).await;
assert_eq!(code, StatusCode::ACCEPTED);
let _task = server.wait_task(value.uid()).await.succeeded();
let uid = index.uid.clone();
(server, uid, settings)
}
pub async fn init_fragments_index_composite() -> (Server<Owned>, String, crate::common::Value) {
let url = fragment_mock_server().await;
let server = Server::new().await;
let index = server.unique_index();
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
assert_eq!(code, StatusCode::OK);
let (_response, code) = server.set_features(json!({"compositeEmbedders": true})).await;
assert_eq!(code, StatusCode::OK);
// Configure the index to use our mock embedder
let settings = json!({
"embedders": {
"rest": {
"source": "composite",
"searchEmbedder": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"searchFragments": {
"query": {"value": "Some pre-prompt for query {{ q }}"},
}
},
"indexingEmbedder": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"indexingFragments": {
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
"basic": {"value": "{{ doc.name }} is a dog"},
}
},
},
},
});
let (response, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, StatusCode::ACCEPTED);
server.wait_task(response.uid()).await.succeeded();
// Send documents
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel", "breed": "labrador"},
{"id": 3, "name": "dustin", "breed": "bulldog"},
]);
let (value, code) = index.add_documents(documents, None).await;
assert_eq!(code, StatusCode::ACCEPTED);
server.wait_task(value.uid()).await.succeeded();
let uid = index.uid.clone();
(server, uid, settings)
}

View File

@@ -35,7 +35,7 @@ pub struct Server<State = Owned> {
pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
impl Server<Owned> {
fn into_shared(self) -> Server<Shared> {
pub(super) fn into_shared(self) -> Server<Shared> {
Server { service: self.service, _dir: self._dir, _marker: PhantomData }
}
@@ -97,6 +97,7 @@ impl Server<Owned> {
self.use_api_key(master_key);
let (response, code) = self.list_api_keys("").await;
assert_eq!(200, code, "{:?}", response);
// TODO: relying on the order of keys is not ideal, we should use the name instead
let admin_key = &response["results"][1]["key"];
self.use_api_key(admin_key.as_str().unwrap());
}
@@ -181,6 +182,25 @@ impl Server<Owned> {
self.service.patch("/network", value).await
}
pub async fn create_webhook(&self, value: Value) -> (Value, StatusCode) {
self.service.post("/webhooks", value).await
}
pub async fn get_webhook(&self, uuid: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/webhooks/{}", uuid.as_ref());
self.service.get(url).await
}
pub async fn delete_webhook(&self, uuid: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/webhooks/{}", uuid.as_ref());
self.service.delete(url).await
}
pub async fn patch_webhook(&self, uuid: impl AsRef<str>, value: Value) -> (Value, StatusCode) {
let url = format!("/webhooks/{}", uuid.as_ref());
self.service.patch(url, value).await
}
pub async fn get_metrics(&self) -> (Value, StatusCode) {
self.service.get("/metrics").await
}
@@ -408,12 +428,12 @@ impl<State> Server<State> {
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);
let max_attempts = 400; // 200 seconds total, 0.5s per attempt
let url = format!("/tasks/{update_id}");
let max_attempts = 400; // 200 seconds in total, 0.5secs per attempt
for i in 0..max_attempts {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
let (response, status_code) = self.service.get(url.clone()).await;
assert_eq!(200, status_code, "response: {response}");
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;
@@ -446,6 +466,10 @@ impl<State> Server<State> {
pub async fn get_network(&self) -> (Value, StatusCode) {
self.service.get("/network").await
}
pub async fn get_webhooks(&self) -> (Value, StatusCode) {
self.service.get("/webhooks").await
}
}
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@@ -465,6 +489,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
// Having 2 threads makes the tests way faster
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
experimental_no_edition_2024_for_settings: false,
experimental_no_edition_2024_for_dumps: false,
},
experimental_enable_metrics: false,
..Parser::parse_from(None as Option<&str>)

View File

@@ -1318,7 +1318,7 @@ async fn add_no_documents() {
async fn add_larger_dataset() {
let server = Server::new_shared();
let index = server.unique_index();
let update_id = index.load_test_set().await;
let update_id = index.load_test_set(server).await;
let (response, code) = index.get_task(update_id).await;
assert_eq!(code, 200);
assert_eq!(response["status"], "succeeded");
@@ -1333,7 +1333,7 @@ async fn add_larger_dataset() {
// x-ndjson add large test
let index = server.unique_index();
let update_id = index.load_test_set_ndjson().await;
let update_id = index.load_test_set_ndjson(server).await;
let (response, code) = index.get_task(update_id).await;
assert_eq!(code, 200);
assert_eq!(response["status"], "succeeded");

View File

@@ -7,7 +7,8 @@ use crate::json;
async fn delete_one_document_unexisting_index() {
let server = Server::new_shared();
let index = shared_does_not_exists_index().await;
let (task, code) = index.delete_document_by_filter_fail(json!({"filter": "a = b"})).await;
let (task, code) =
index.delete_document_by_filter_fail(json!({"filter": "a = b"}), server).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.failed();

View File

@@ -559,7 +559,7 @@ async fn delete_document_by_filter() {
let index = shared_does_not_exists_index().await;
// index does not exists
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
@@ -589,7 +589,7 @@ async fn delete_document_by_filter() {
// no filterable are set
let index = shared_empty_index().await;
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
@@ -619,7 +619,7 @@ async fn delete_document_by_filter() {
// not filterable while there is a filterable attribute
let index = shared_index_with_documents().await;
let (response, code) =
index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"})).await;
index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"}), server).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await.failed();
snapshot!(response, @r###"

View File

@@ -5,8 +5,8 @@ use urlencoding::encode as urlencode;
use crate::common::encoder::Encoder;
use crate::common::{
shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set,
GetAllDocumentsOptions, Server, Value,
shared_does_not_exists_index, shared_empty_index, shared_index_with_geo_documents,
shared_index_with_test_set, GetAllDocumentsOptions, Server, Value,
};
use crate::json;
@@ -83,6 +83,311 @@ async fn get_document() {
);
}
#[actix_rt::test]
async fn get_document_sorted() {
let server = Server::new_shared();
let index = server.unique_index();
index.load_test_set(server).await;
let (task, _status_code) =
index.update_settings_sortable_attributes(json!(["age", "email", "gender", "name"])).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "age", "email"]),
sort: Some(vec!["age:asc", "email:desc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 5,
"age": 20,
"email": "warrenwatson@chorizon.com"
},
{
"id": 6,
"age": 20,
"email": "sheliaberry@chorizon.com"
},
{
"id": 57,
"age": 20,
"email": "kaitlinconner@chorizon.com"
},
{
"id": 45,
"age": 20,
"email": "irenebennett@chorizon.com"
},
{
"id": 40,
"age": 21,
"email": "staffordemerson@chorizon.com"
},
{
"id": 41,
"age": 21,
"email": "salinasgamble@chorizon.com"
},
{
"id": 63,
"age": 21,
"email": "knowleshebert@chorizon.com"
},
{
"id": 50,
"age": 21,
"email": "guerramcintyre@chorizon.com"
},
{
"id": 44,
"age": 22,
"email": "jonispears@chorizon.com"
},
{
"id": 56,
"age": 23,
"email": "tuckerbarry@chorizon.com"
},
{
"id": 51,
"age": 23,
"email": "keycervantes@chorizon.com"
},
{
"id": 60,
"age": 23,
"email": "jodyherrera@chorizon.com"
},
{
"id": 70,
"age": 23,
"email": "glassperkins@chorizon.com"
},
{
"id": 75,
"age": 24,
"email": "emmajacobs@chorizon.com"
},
{
"id": 68,
"age": 24,
"email": "angelinadyer@chorizon.com"
},
{
"id": 17,
"age": 25,
"email": "ortegabrennan@chorizon.com"
},
{
"id": 76,
"age": 25,
"email": "claricegardner@chorizon.com"
},
{
"id": 43,
"age": 25,
"email": "arnoldbender@chorizon.com"
},
{
"id": 12,
"age": 25,
"email": "aidakirby@chorizon.com"
},
{
"id": 9,
"age": 26,
"email": "kellimendez@chorizon.com"
}
]
"#);
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "gender", "name"]),
sort: Some(vec!["gender:asc", "name:asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 3,
"name": "Adeline Flynn",
"gender": "female"
},
{
"id": 12,
"name": "Aida Kirby",
"gender": "female"
},
{
"id": 68,
"name": "Angelina Dyer",
"gender": "female"
},
{
"id": 15,
"name": "Aurelia Contreras",
"gender": "female"
},
{
"id": 36,
"name": "Barbra Valenzuela",
"gender": "female"
},
{
"id": 23,
"name": "Blanca Mcclain",
"gender": "female"
},
{
"id": 53,
"name": "Caitlin Burnett",
"gender": "female"
},
{
"id": 71,
"name": "Candace Sawyer",
"gender": "female"
},
{
"id": 65,
"name": "Carole Rowland",
"gender": "female"
},
{
"id": 33,
"name": "Cecilia Greer",
"gender": "female"
},
{
"id": 1,
"name": "Cherry Orr",
"gender": "female"
},
{
"id": 38,
"name": "Christina Short",
"gender": "female"
},
{
"id": 7,
"name": "Chrystal Boyd",
"gender": "female"
},
{
"id": 76,
"name": "Clarice Gardner",
"gender": "female"
},
{
"id": 73,
"name": "Eleanor Shepherd",
"gender": "female"
},
{
"id": 75,
"name": "Emma Jacobs",
"gender": "female"
},
{
"id": 16,
"name": "Estella Bass",
"gender": "female"
},
{
"id": 62,
"name": "Estelle Ramirez",
"gender": "female"
},
{
"id": 20,
"name": "Florence Long",
"gender": "female"
},
{
"id": 42,
"name": "Graciela Russell",
"gender": "female"
}
]
"#);
}
#[actix_rt::test]
async fn get_document_geosorted() {
let index = shared_index_with_geo_documents().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
}
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
}
},
{
"id": 3,
"name": "Crêpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
]
"#);
}
#[actix_rt::test]
async fn get_document_sort_the_unsortable() {
let index = shared_index_with_test_set().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "name"]),
sort: Some(vec!["name:asc"]),
..Default::default()
})
.await;
snapshot!(json_string!(response), @r#"
{
"message": "Attribute `name` is not sortable. This index does not have configured sortable attributes.",
"code": "invalid_document_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_sort"
}
"#);
}
#[actix_rt::test]
async fn error_get_unexisting_index_all_documents() {
let index = shared_does_not_exists_index().await;
@@ -334,7 +639,7 @@ async fn get_document_s_nested_attributes_to_retrieve() {
async fn get_documents_displayed_attributes_is_ignored() {
let server = Server::new_shared();
let index = server.unique_index();
index.load_test_set().await;
index.load_test_set(server).await;
index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;

View File

@@ -2366,7 +2366,7 @@ async fn generate_and_import_dump_containing_vectors() {
))
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
let response = server.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = index
.add_documents(
@@ -2381,12 +2381,12 @@ async fn generate_and_import_dump_containing_vectors() {
)
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
let response = server.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = server.create_dump().await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
let response = server.wait_task(response.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// ========= We made a dump, now we should clear the DB and try to import our dump

View File

@@ -161,9 +161,9 @@ async fn test_create_multiple_indexes() {
let (task2, _) = index2.create(None).await;
let (task3, _) = index3.create(None).await;
index1.wait_task(task1.uid()).await.succeeded();
index2.wait_task(task2.uid()).await.succeeded();
index3.wait_task(task3.uid()).await.succeeded();
server.wait_task(task1.uid()).await.succeeded();
server.wait_task(task2.uid()).await.succeeded();
server.wait_task(task3.uid()).await.succeeded();
assert_eq!(index1.get().await.1, 200);
assert_eq!(index2.get().await.1, 200);

Some files were not shown because too many files have changed in this diff Show More