Compare commits

..

155 Commits

Author SHA1 Message Date
776e55d209 Improve code readability 2025-07-22 11:37:21 +02:00
3362fb8476 Remove print 2025-07-22 11:21:06 +02:00
6d93b36279 Format 2025-07-22 11:18:41 +02:00
982e989886 Test regenerate filter 2025-07-22 11:10:05 +02:00
0014ed3114 Apply review suggestions 2025-07-22 10:56:05 +02:00
ab07e9480e Resolve post-merge issues 2025-07-21 18:22:10 +02:00
00e957051e Merge remote-tracking branch 'origin/release-v1.16.0' into fragment-filters 2025-07-21 18:19:45 +02:00
0312fb22b8 Merge pull request #5761 from meilisearch/fix-chat-settings-dumpless-upgrade
Fix chat settings dumpless upgrade
2025-07-17 15:57:39 +00:00
f1d92bfead Make sure the new filter chat setting is set to it's default value if
missing
2025-07-17 15:36:21 +02:00
a005a062da Add security if chat settings parameters are missing 2025-07-17 15:27:53 +02:00
fd8b2451d7 Merge pull request #5754 from kametsun/fix/incorrect-stats-doc-count
Fix incorrect document count in stats after clearing all documents
2025-07-17 06:48:51 +00:00
058f9ffda5 Merge pull request #5734 from meilisearch/request-fragments-test
Tests for multimodal
2025-07-16 11:04:00 +00:00
5d363205a5 Merge pull request #5716 from meilisearch/document-sorting
Allow sorting on the /documents route
2025-07-16 10:26:50 +00:00
a683faa882 Apply review suggestions 2025-07-16 11:03:24 +02:00
8887cbdcd5 Merge pull request #5725 from meilisearch/fix-threshold-overcounting-bug
Fix Total Hits being wrong when rankingScoreThreshold is used
2025-07-16 07:15:24 +00:00
634865ff53 Merge pull request #5710 from meilisearch/chat-route-support-filters
Introduce filters in the chat completions
2025-07-15 16:10:49 +00:00
36fccf8525 Merge remote-tracking branch 'origin/release-v1.16.0' into fix-threshold-overcounting-bug 2025-07-15 18:01:29 +02:00
d6bd60d569 Apply review suggestions
Co-Authored-By: Louis Dureuil <louis.dureuil@xinra.net>
2025-07-15 18:00:37 +02:00
48ad959fc1 Merge remote-tracking branch 'origin/release-v1.16.0' into document-sorting 2025-07-15 17:41:46 +02:00
1bc30cb4c8 Restore old benchmark names 2025-07-15 17:34:04 +02:00
77138a42d6 Apply review suggestions
Add preconditions

Fix underflow

Remove unwrap

Turn methods to associated functions

Apply review suggestions
2025-07-15 17:31:11 +02:00
0791506124 Fix some proposals 2025-07-15 17:10:45 +02:00
2a015ac3b8 Implement basic few shot prompting to improve the query capabilities 2025-07-15 14:50:10 +02:00
6f248b78a9 Merge pull request #5751 from meilisearch/fix-searchable-attributes-order
Fix: Preserve order of searchable attributes when modified
2025-07-15 10:38:11 +00:00
d694e312ff Update crates/milli/src/update/settings.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-07-15 11:54:59 +02:00
d76dcc8998 Make clippy happy 2025-07-15 11:49:48 +02:00
e654f66223 Support filtering 2025-07-15 11:49:47 +02:00
34f2ab7093 WIP report search errors to the LLM 2025-07-15 11:49:46 +02:00
1a9dbd364e Fix some issues 2025-07-15 11:49:46 +02:00
662c5d9871 Introduce filters in the chat completions 2025-07-15 11:49:45 +02:00
5cd61b50f9 Fix formatting 2025-07-12 18:19:26 +09:00
9a9be76757 add: verify that the statistics are correctly update assert 2025-07-12 11:15:44 +09:00
cfa6ba6c3b Fix stats showing wrong document count after clear all
Update database stats after clearing documents to ensure
/stats endpoint returns correct numberOfDocuments: 0 instead
of stale count.
2025-07-12 11:15:44 +09:00
f4f333dbf6 Merge pull request #5753 from meilisearch/export-fixes
Various fixes on the export route
2025-07-11 19:15:42 +00:00
1ade76ba10 Remove sneaky debug 2025-07-11 12:27:04 +02:00
ae26658913 Use the most appropriate unit in payload_too_large error 2025-07-11 12:27:03 +02:00
aa09edb3fb Fix errors being silently dropped 2025-07-11 12:27:03 +02:00
3f42f1a036 Get rid of bearer 2025-07-11 12:27:03 +02:00
9bdfdd395b Fix document step overflowing 2025-07-11 12:27:03 +02:00
78d0625a91 Decrease default payload size for exports 2025-07-11 12:27:03 +02:00
3f655ea20e compare user defined searchable fields instead of internal searchable fields 2025-07-10 18:24:23 +02:00
50bc1d55f3 Add test reproducing the bug 2025-07-10 18:23:46 +02:00
f244439b4f Revert "Format"
This reverts commit 30fd546c12.
2025-07-10 16:43:45 +02:00
30fd546c12 Format 2025-07-10 16:43:10 +02:00
a930977460 Fix test 2025-07-10 09:37:58 +02:00
a3b8c2b71f Gate behind multimodal experimental feature 2025-07-09 18:21:52 +02:00
39f808714d Implement a documentTemplate filter 2025-07-09 18:03:32 +02:00
8adf6141e0 Fix old test 2025-07-08 16:55:43 +02:00
df3f282e4d Merge branch 'request-fragments-test' into fragment-filters 2025-07-08 16:35:14 +02:00
d81855015b Add test 2025-07-08 16:23:45 +02:00
feb53104e5 Grammar 2025-07-08 16:19:55 +02:00
881c37393f Add telemetry 2025-07-08 16:06:27 +02:00
9e98a25e45 Fix clippy 2025-07-08 15:56:09 +02:00
0a4f2ef891 Leak mock servers 2025-07-08 15:27:35 +02:00
faa1f7c5b7 Merge pull request #5693 from Mubelotix/default-key
Add a Read-Only Admin API Key by default
2025-07-08 12:38:29 +00:00
3cc5d86598 Format 2025-07-08 13:57:17 +02:00
1ae47bec77 Improve composite test 2025-07-08 13:57:07 +02:00
2f1be0ff86 Ignore faulty test (see #5746) 2025-07-08 13:55:07 +02:00
9cee432255 Fix broken tests 2025-07-08 13:36:26 +02:00
ff8d48d2f1 Merge branch 'main' into default-key 2025-07-08 12:21:46 +02:00
a56c036994 Update crates/meilisearch-types/src/keys.rs
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2025-07-08 12:18:52 +02:00
fb73b83abe Fix performance 2025-07-08 12:14:34 +02:00
29b74424ad Clean code 2025-07-08 12:03:32 +02:00
b4cafec8b3 Add tests for operators along vector filter 2025-07-08 11:56:19 +02:00
d43cd40807 Split tests 2025-07-08 11:48:23 +02:00
0301d8f239 Improve error handling 2025-07-08 11:39:10 +02:00
511c48f520 Merge pull request #5737 from meilisearch/request-fragments-dumpless-upgrade
Fix the dumpless upgrade from v1.15 to v1.16 for request fragments
2025-07-08 08:49:38 +00:00
4623691d1f Don't make the type-that-shall-not-be-written serializable
Following tamo's advice

Co-Authored-By: Tamo <tamo@meilisearch.com>
2025-07-08 10:04:33 +02:00
2d45124d9b Fix parsing 2025-07-08 10:01:50 +02:00
40e7284d70 Add tests 2025-07-08 10:01:35 +02:00
4d8d34cc93 Merge branch 'request-fragments-test' into fragment-filters 2025-07-07 18:45:34 +02:00
5cced0af02 Prevent having both a fragment name and userProvided 2025-07-07 18:41:03 +02:00
9c60e9689f Support not specifying an embedder in the vector filter 2025-07-07 18:34:24 +02:00
3261aadcf2 Add composite test 2025-07-07 16:50:39 +02:00
073e9f2967 Disable similarity check on composite embedders using fragments 2025-07-07 16:46:16 +02:00
5f8f48ec95 Add new snapshot checking for regenerativeness 2025-07-07 16:43:05 +02:00
ed2fe365a0 Fix existing snaps 2025-07-07 16:42:50 +02:00
f7c8a77f89 Update v1.12.0 DB to contain vectors 2025-07-07 16:01:50 +02:00
2052537681 Implement core filter logic 2025-07-07 15:28:35 +02:00
a9bb64c55a Unrelated minor fixes 2025-07-07 15:28:10 +02:00
132065afda Minor improvements 2025-07-07 13:10:16 +02:00
51c298662b Merge branch 'main' into request-fragments-test 2025-07-07 13:00:21 +02:00
70a860a0f0 Merge branch 'main' into fix-threshold-overcounting-bug 2025-07-07 12:26:37 +02:00
a3254d7d7d Implement dumpless upgrade from v1.15 to v1.16 2025-07-07 11:57:08 +02:00
73c9c1ebdc Add compile-time checks for dumpless upgrade 2025-07-07 11:34:18 +02:00
fa3990daf9 Format 2025-07-04 13:33:49 +02:00
c5993196b3 Add test 2025-07-04 13:32:55 +02:00
16234e1313 Add fragment swapping test 2025-07-04 13:25:42 +02:00
be9f4f96df Add experimental feature test 2025-07-04 13:15:15 +02:00
b274106ad3 Add test 2025-07-04 13:05:52 +02:00
48527761e7 Add test 2025-07-04 12:01:15 +02:00
6792d048b8 Test both fragments and document template 2025-07-04 11:47:38 +02:00
8dfded2993 Update tests 2025-07-04 10:49:03 +02:00
3714f16696 Fix bug 2025-07-04 10:40:50 +02:00
d0cd3cacec Add a way to reproduce the bug 2025-07-03 18:18:04 +02:00
caccb51814 Add a complex value test 2025-07-03 16:10:23 +02:00
cf9b311f71 Format 2025-07-03 15:53:09 +02:00
7423243be0 Add test with multiple embedders 2025-07-03 15:52:18 +02:00
5690700601 Add fragment addition test 2025-07-03 15:19:31 +02:00
2faad504c6 Add test 2025-07-03 15:12:47 +02:00
2bcd69750f Add fragment modification test 2025-07-03 15:08:27 +02:00
de24e75be8 Update test 2025-07-03 15:00:11 +02:00
a3af9fe057 new extractor bugfixes:
- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
2025-07-03 14:44:34 +02:00
90683d0e4e add snapshot of get settings 2025-07-03 14:43:06 +02:00
5c79273748 Add TODOs 2025-07-03 14:42:49 +02:00
b45eea0d3e Add test for fragment deletion 2025-07-03 13:26:44 +02:00
0b89ef1fd7 Make tests use a shared index 2025-07-03 11:32:49 +02:00
65ba7b47af Test search fragments 2025-07-03 11:32:49 +02:00
8af76a65bf Add test_fragment_indexing 2025-07-03 11:32:49 +02:00
f60814b319 Add benchmark 2025-07-02 12:06:00 +02:00
5a675bcb82 Add benchmarks 2025-07-02 11:50:32 +02:00
600178c5ab Still limit to max hits 2025-07-01 18:33:09 +02:00
dedae94102 Fix #5274 2025-07-01 16:22:25 +02:00
7ae9a4afee Add a test for issue #5274 2025-07-01 15:42:43 +02:00
e92b6beb20 Revert making check_sort_criteria usable without a search context 2025-07-01 14:26:55 +02:00
27cc357362 Document code 2025-07-01 14:21:55 +02:00
73dfeefc7c Remove plural form 2025-07-01 14:08:46 +02:00
d85480de89 Move sort code out of facet 2025-07-01 14:05:47 +02:00
9f55708d84 Format 2025-07-01 13:58:56 +02:00
280c3907be Add test to sort the unsortable 2025-07-01 13:58:37 +02:00
8419fd9b3b Ditch usage of check_sort_criteria 2025-07-01 13:42:38 +02:00
283944ea89 Differentiate between document sort error and search sort error 2025-07-01 12:03:50 +02:00
8aacd6374a Optimize geo sort 2025-07-01 11:50:01 +02:00
8326f34ad1 Add analytics 2025-07-01 11:35:28 +02:00
f4a908669c Add tests 2025-07-01 10:02:15 +02:00
eb2c2815b6 Fix panic 2025-07-01 10:00:10 +02:00
29e9c74a49 Merge two ifs 2025-06-30 16:17:04 +02:00
f6803dd7d1 Simplify iterator chaining in facet sort 2025-06-30 14:05:23 +02:00
f86f4f619f Implement geo sort on documents 2025-06-30 13:57:30 +02:00
e35d58b531 Move geosort code out of search 2025-06-30 13:12:00 +02:00
63827bbee0 Move sorting code out of search 2025-06-30 11:59:59 +02:00
340d9e6edc Optimize facet sort
5 to 10x speedup
2025-06-27 14:40:55 +02:00
28adbc0d18 Update tests 2025-06-27 09:47:46 +02:00
e3fba62e13 Fix typo 2025-06-27 09:40:59 +02:00
fb9170b8e3 Keep name consistent with others 2025-06-27 09:40:30 +02:00
c15763f910 Improve key description
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-06-27 09:39:24 +02:00
4534dc2cab Create another deserr error 2025-06-25 16:45:32 +02:00
b05cb80803 Take sort criteria from the request 2025-06-25 16:41:08 +02:00
6e0526090a Implement sorting documents 2025-06-25 15:36:12 +02:00
2090e9ea31 Update test 2025-06-25 10:08:25 +02:00
1c8f1c18f4 Fix constant name and key description 2025-06-25 09:59:34 +02:00
c4a96b40eb Remove KeysGet from AllGet 2025-06-24 17:40:06 +02:00
2d6dc83940 Format the code 2025-06-19 15:55:12 +02:00
ab768f379f Fix comment 2025-06-19 15:49:34 +02:00
705e9a9e5e Make the uuids random again to prevent abuse using rainbow tables 2025-06-19 15:45:09 +02:00
67f2a30d7c Fix test 2025-06-19 13:10:08 +02:00
99732f4084 Fix some tests 2025-06-19 13:04:55 +02:00
5081d837ea Fix AllGet action being included in All 2025-06-19 12:12:30 +02:00
9e1cb792f4 Rename Action::AllRead to AllGet 2025-06-19 11:55:25 +02:00
b6b7ede266 Rename Action *.read to *.get 2025-06-19 11:53:42 +02:00
f50e586a4f Allow management key to read other keys 2025-06-19 11:52:58 +02:00
11fedea788 Set static uuids to keys 2025-06-19 11:42:45 +02:00
032b34c377 Add a default management key 2025-06-19 11:29:32 +02:00
b421c8e7de Add an AllRead key 2025-06-19 11:29:16 +02:00
00eb258a53 Fix comment 2025-06-19 11:16:07 +02:00
116 changed files with 7191 additions and 2450 deletions

View File

@ -1,26 +1,28 @@
---
name: New feature issue
about: ⚠️ Should only be used by the internal Meili team ⚠️
name: New sprint issue
about: ⚠️ Should only be used by the engine team ⚠️
title: ''
labels: 'impacts docs, impacts integrations'
labels: 'missing usage in PRD, impacts docs'
assignees: ''
---
Related product team resources: [PRD]() (_internal only_)
Related product discussion:
## Motivation
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
## Usage
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
TBD
## TODO
<!---If necessary, create a list with technical/product steps-->
### Are you modifying a database?
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
@ -52,5 +54,5 @@ TBD
## Impacted teams
<!---Ping the related teams. Ask on Slack if any hesitation-->
<!---@meilisearch/docs-team and @meilisearch/integration-team when there is any API change, e.g. settings addition-->
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->

View File

@ -1,13 +0,0 @@
## Related issue
Fixes #...
## Requirements
⚠️ Ensure the following requirements before merging ⚠️
- [] Automated tests have been added.
- [] If some tests cannot be automated, manual rigorous tests should be applied.
- [] ⚠️ If there is an change in the DB: it's mandatory to manually test the `--experimental-dumpless-upgrade` on a DB of the previous Meilisearch minor version (e.g. v1.13 for the v1.14 release).
- [] If necessary, the feature have been tested in the Cloud production environment (with [prototypes](./documentation/prototypes.md)) and the Cloud UI is ready.
- [] If necessary, the [documentation](https://github.com/meilisearch/documentation) related to the implemented feature in the PR is ready.
- [] If necessary, the [integrations](https://github.com/meilisearch/integration-guides) related to the implemented feature in the PR are ready.

View File

@ -1,23 +0,0 @@
name-template: 'v$RESOLVED_VERSION'
tag-template: 'v$RESOLVED_VERSION'
exclude-labels:
- 'skip changelog'
version-resolver:
major:
labels:
- 'breaking-change'
minor:
labels:
- 'enhancement'
default: patch
template: |
$CHANGES
Thanks again to $CONTRIBUTORS! 🎉
no-changes-template: 'Changes are coming soon 😎'
sort-direction: 'ascending'
replacers:
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
replace: ''
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
replace: ''

View File

@ -1,22 +0,0 @@
This issue is about updating Meilisearch dependencies:
- [ ] Update Meilisearch dependencies with the help of `cargo +nightly udeps --all-targets` (remove unused dependencies) and `cargo upgrade` (upgrade dependencies versions) - ⚠️ Some repositories may contain subdirectories (like heed, charabia, or deserr). Take care of updating these in the main crate as well. This won't be done automatically by `cargo upgrade`.
- [ ] [deserr](https://github.com/meilisearch/deserr)
- [ ] [charabia](https://github.com/meilisearch/charabia/)
- [ ] [heed](https://github.com/meilisearch/heed/)
- [ ] [roaring-rs](https://github.com/RoaringBitmap/roaring-rs/)
- [ ] [obkv](https://github.com/meilisearch/obkv)
- [ ] [grenad](https://github.com/meilisearch/grenad/)
- [ ] [arroy](https://github.com/meilisearch/arroy/)
- [ ] [segment](https://github.com/meilisearch/segment)
- [ ] [bumparaw-collections](https://github.com/meilisearch/bumparaw-collections)
- [ ] [bbqueue](https://github.com/meilisearch/bbqueue)
- [ ] Finally, [Meilisearch](https://github.com/meilisearch/MeiliSearch)
- [ ] If new Rust versions have been released, update the minimal Rust version in use at Meilisearch:
- [ ] in this [GitHub Action file](https://github.com/meilisearch/meilisearch/blob/main/.github/workflows/test-suite.yml), by changing the `toolchain` field of the `rustfmt` job to the latest available nightly (of the day before or the current day).
- [ ] in every [GitHub Action files](https://github.com/meilisearch/meilisearch/blob/main/.github/workflows), by changing all the `dtolnay/rust-toolchain@` references to use the latest stable version.
- [ ] in this [`rust-toolchain.toml`](https://github.com/meilisearch/meilisearch/blob/main/rust-toolchain.toml), by changing the `channel` field to the latest stable version.
- [ ] in the [Dockerfile](https://github.com/meilisearch/meilisearch/blob/main/Dockerfile), by changing the base image to `rust:<target_rust_version>-alpine<alpine_version>`. Check that the image exists on [Dockerhub](https://hub.docker.com/_/rust/tags?page=1&name=alpine). Also, build and run the image to check everything still works!
⚠️ This issue should be prioritized to avoid any deprecation and vulnerability issues.
The GitHub action dependencies are managed by [Dependabot](https://github.com/meilisearch/meilisearch/blob/main/.github/dependabot.yml), so no need to update them when solving this issue.

View File

@ -0,0 +1,100 @@
name: PR Milestone Check
on:
pull_request:
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
branches:
- "main"
- "release-v*.*.*"
jobs:
check-milestone:
name: Check PR Milestone
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Validate PR milestone
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Get PR number directly from the event payload
const prNumber = context.payload.pull_request.number;
// Get PR details
const { data: prData } = await github.rest.pulls.get({
owner: 'meilisearch',
repo: 'meilisearch',
pull_number: prNumber
});
// Get base branch name
const baseBranch = prData.base.ref;
console.log(`Base branch: ${baseBranch}`);
// Get PR milestone
const prMilestone = prData.milestone;
if (!prMilestone) {
core.setFailed('PR must have a milestone assigned');
return;
}
console.log(`PR milestone: ${prMilestone.title}`);
// Validate milestone format: vx.y.z
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
if (!milestoneRegex.test(prMilestone.title)) {
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
return;
}
// For main branch PRs, check if the milestone is the highest one
if (baseBranch === 'main') {
// Get all milestones
const { data: milestones } = await github.rest.issues.listMilestones({
owner: 'meilisearch',
repo: 'meilisearch',
state: 'open',
sort: 'due_on',
direction: 'desc'
});
// Sort milestones by version number (vx.y.z)
const sortedMilestones = milestones
.filter(m => milestoneRegex.test(m.title))
.sort((a, b) => {
const versionA = a.title.substring(1).split('.').map(Number);
const versionB = b.title.substring(1).split('.').map(Number);
// Compare major version
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
// Compare minor version
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
// Compare patch version
return versionB[2] - versionA[2];
});
if (sortedMilestones.length === 0) {
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
return;
}
const highestMilestone = sortedMilestones[0];
console.log(`Highest milestone: ${highestMilestone.title}`);
if (prMilestone.title !== highestMilestone.title) {
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
return;
}
} else {
// For release branches, the milestone should match the branch version
const branchVersion = baseBranch.substring(8); // remove 'release-'
if (prMilestone.title !== branchVersion) {
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
return;
}
}
console.log('PR milestone validation passed!');

View File

@ -15,7 +15,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/meilisearch/main/.github/templates/dependency-issue.md > $ISSUE_TEMPLATE
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
- name: Create issue
run: |
gh issue create \

View File

@ -3,7 +3,7 @@ name: Look for flaky tests
on:
workflow_dispatch:
schedule:
- cron: '0 4 * * *' # Every day at 4:00AM
- cron: "0 12 * * FRI" # Every Friday at 12:00PM
jobs:
flaky:

224
.github/workflows/milestone-workflow.yml vendored Normal file
View File

@ -0,0 +1,224 @@
name: Milestone's workflow
# /!\ No git flow are handled here
# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
# - the roadmap issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/roadmap-issue.md
# - the changelog issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/changelog-issue.md
# - update the ruleset to add the current release version to the list of allowed versions and be able to use the merge queue.
# For each Milestone closed
# - the `release_version` label is created
# - this label is applied to all issues/PRs in the Milestone
on:
milestone:
types: [created, closed]
env:
MILESTONE_VERSION: ${{ github.event.milestone.title }}
MILESTONE_URL: ${{ github.event.milestone.html_url }}
MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }}
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
# -----------------
# MILESTONE CREATED
# -----------------
get-release-version:
if: github.event.action == 'created'
runs-on: ubuntu-latest
outputs:
is-patch: ${{ steps.check-patch.outputs.is-patch }}
steps:
- uses: actions/checkout@v3
- name: Check if this release is a patch release only
id: check-patch
run: |
echo version: $MILESTONE_VERSION
if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then
echo 'This is NOT a patch release'
echo "is-patch=false" >> $GITHUB_OUTPUT
elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo 'This is a patch release'
echo "is-patch=true" >> $GITHUB_OUTPUT
else
echo "Not a valid format of release, check the Milestone's title."
echo 'Should be vX.Y.Z'
exit 1
fi
create-roadmap-issue:
needs: get-release-version
# Create the roadmap issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
# Replace all <<milestone_id>> occurrences
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
# Replace release date if exists
if [[ ! -z $MILESTONE_DUE_ON ]]; then
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE
fi
- name: Create the issue
run: |
gh issue create \
--title "$MILESTONE_VERSION ROADMAP" \
--label 'epic,impacts docs,impacts integrations,impacts cloud' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
create-changelog-issue:
needs: get-release-version
# Create the changelog issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
# Replace all <<milestone_id>> occurrences
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Create release changelogs for $MILESTONE_VERSION" \
--label 'impacts docs,documentation' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION \
--assignee curquiza
create-update-version-issue:
needs: get-release-version
# Create the update-version issue even if the release is a patch release
if: github.event.action == 'created'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
create-update-openapi-issue:
needs: get-release-version
# Create the openAPI issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-openapi-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update Open API file for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
update-ruleset:
runs-on: ubuntu-latest
if: github.event.action == 'created'
steps:
- uses: actions/checkout@v3
- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq
- name: Update ruleset
env:
# gh api repos/meilisearch/meilisearch/rulesets --jq '.[] | {name: .name, id: .id}'
RULESET_ID: 4253297
BRANCH_NAME: ${{ github.event.inputs.branch_name }}
run: |
echo "RULESET_ID: ${{ env.RULESET_ID }}"
echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
# Get current ruleset conditions
CONDITIONS=$(gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} --jq '{ conditions: .conditions }')
# Update the conditions by appending the milestone version
UPDATED_CONDITIONS=$(echo $CONDITIONS | jq '.conditions.ref_name.include += ["refs/heads/release-'${{ env.MILESTONE_VERSION }}'"]')
# Update the ruleset from stdin (-)
echo $UPDATED_CONDITIONS |
gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} \
--method PUT \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
--input -
# ----------------
# MILESTONE CLOSED
# ----------------
create-release-label:
if: github.event.action == 'closed'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Create the ${{ env.MILESTONE_VERSION }} label
run: |
label_description="PRs/issues solved in $MILESTONE_VERSION"
if [[ ! -z $MILESTONE_DUE_ON ]]; then
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
label_description="$label_description released on $date"
fi
gh api repos/meilisearch/meilisearch/labels \
--method POST \
-H "Accept: application/vnd.github+json" \
-f name="$MILESTONE_VERSION" \
-f description="$label_description" \
-f color='ff5ba3'
labelize-all-milestone-content:
if: github.event.action == 'closed'
needs: create-release-label
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone
run: |
prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
for pr in $prs; do
gh pr edit $pr --add-label $MILESTONE_VERSION
done
- name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone
run: |
issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
for issue in $issues; do
gh issue edit $issue --add-label $MILESTONE_VERSION
done

View File

@ -16,8 +16,6 @@ on:
jobs:
docker:
runs-on: docker
permissions:
id-token: write # This is needed to use Cosign in keyless mode
steps:
- uses: actions/checkout@v3
@ -64,9 +62,6 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install cosign
uses: sigstore/cosign-installer@3454372f43399081ed03b604cb2d021dabca52bb # tag=v3.8.2
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
@ -90,7 +85,6 @@ jobs:
- name: Build and push
uses: docker/build-push-action@v6
id: build-and-push
with:
push: true
platforms: linux/amd64,linux/arm64
@ -100,17 +94,6 @@ jobs:
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
- name: Sign the images with GitHub OIDC Token
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
TAGS: ${{ steps.meta.outputs.tags }}
run: |
images=""
for tag in ${TAGS}; do
images+="${tag}@${DIGEST} "
done
cosign sign --yes ${images}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)

View File

@ -1,16 +0,0 @@
name: Release Drafter
on:
push:
branches:
- main
jobs:
update_release_draft:
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v6
with:
config-name: release-draft-template.yml
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFTER_TOKEN }}

View File

@ -9,7 +9,7 @@ on:
required: false
default: nightly
schedule:
- cron: '0 6 * * *' # Every day at 6:00am
- cron: "0 6 * * MON" # Every Monday at 6:00AM
env:
MEILI_MASTER_KEY: 'masterKey'
@ -344,23 +344,15 @@ jobs:
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
env:
RAILS_VERSION: '7.0'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Install SQLite dependencies
run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev
- name: Set up Ruby
- name: Set up Ruby 3
uses: ruby/setup-ruby@v1
with:
ruby-version: 3
bundler-cache: true
- name: Start MongoDB
uses: supercharge/mongodb-github-action@1.12.0
with:
mongodb-version: 8.0
- name: Run tests
run: bundle exec rspec

View File

@ -3,7 +3,7 @@ name: Test suite
on:
workflow_dispatch:
schedule:
# Every day at 5:00am
# Everyday at 5:00am
- cron: "0 5 * * *"
pull_request:
merge_group:

8
.gitignore vendored
View File

@ -11,12 +11,18 @@
/bench
/_xtask_benchmark.ms
/benchmarks
.DS_Store
# Snapshots
## ... large
*.full.snap
## ... unreviewed
## ... unreviewed
*.snap.new
## ... pending
*.pending-snap
# Tmp files
.tmp*
# Database snapshot
crates/meilisearch/db.snapshot

View File

@ -106,13 +106,7 @@ Run `cargo xtask --help` from the root of the repository to find out what is ava
#### Update the openAPI file if the APIchanged
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api):
- Pull the latest version of the latest rc of Meilisearch `git checkout release-vX.Y.Z; git pull`
- Starts Meilisearch with the `swagger` feature flag: `cargo run --features swagger`
- On a browser, open the following URL: http://localhost:7700/scalar
- Click the « Download openAPI file »
- Open a PR replacing [this file](https://github.com/meilisearch/open-api/blob/main/open-api.json) with the one downloaded
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
### Logging
@ -166,37 +160,25 @@ Some notes on GitHub PRs:
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
## Merging PRs
This project uses GitHub Merge Queues that helps us manage pull requests merging.
Before merging a PR, the maintainer should ensure the following requirements are met
- Automated tests have been added.
- If some tests cannot be automated, manual rigorous tests should be applied.
- ⚠️ If there is an change in the DB: it's mandatory to manually test the `--experimental-dumpless-upgrade` on a DB of the previous Meilisearch minor version (e.g. v1.13 for the v1.14 release).
- If necessary, the feature have been tested in the Cloud production environment (with [prototypes](./documentation/prototypes.md)) and the Cloud UI is ready.
- If necessary, the [documentation](https://github.com/meilisearch/documentation) related to the implemented feature in the PR is ready.
- If necessary, the [integrations](https://github.com/meilisearch/integration-guides) related to the implemented feature in the PR are ready.
## Publish Process (for internal team only)
## Release Process (for internal team only)
Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org/).
### How to publish a new release
### Automation to rebase and Merge the PRs
The full Meilisearch release process is described in [this guide](./documentation/release.md).
This project uses GitHub Merge Queues that helps us manage pull requests merging.
### How to Publish a new Release
The full Meilisearch release process is described in [this guide](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md). Please follow it carefully before doing any release.
### How to publish a prototype
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
This happens in two steps:
- [Release the prototype](./documentation/prototypes.md#how-to-publish-a-prototype)
- [Communicate about it](./documentation/prototypes.md#communication)
### How to implement and publish an experimental feature
Here is our [guidelines and process](./documentation/experimental-features.md) to implement and publish an experimental feature.
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
### Release assets

View File

@ -119,6 +119,6 @@ Meilisearch is, and will always be, open-source! If you want to contribute to th
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](./documentation/versioning-policy.md).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
Differently from the binaries, crates in this repository are not currently available on [crates.io](https://crates.io/) and do not follow [SemVer conventions](https://semver.org).

View File

@ -51,3 +51,8 @@ harness = false
[[bench]]
name = "indexing"
harness = false
[[bench]]
name = "sort"
harness = false

View File

@ -0,0 +1,114 @@
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
//!
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let sortable_fields =
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_sortable_fields(sortable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: "jsonl",
configure: base_conf,
primary_key: Some("geonameid"),
queries: &[""],
offsets: &[
Some((0, 20)), // The most common query in the real world
Some((0, 500)), // A query that ranges over many documents
Some((980, 20)), // The worst query that could happen in the real world
Some((800_000, 20)) // The worst query
],
get_documents: true,
..Conf::BASE
};
fn bench_sort(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
utils::Conf {
group_name: "without sort",
sort: None,
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values",
sort: Some(vec!["name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values",
sort: Some(vec!["timezone:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar then different values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different then similar values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "geo sort",
sample_size: Some(10),
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values then geo sort",
sample_size: Some(50),
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values then geo sort",
sample_size: Some(50),
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many fields",
sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]),
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_sort);
criterion_main!(benches);

View File

@ -9,6 +9,7 @@ use anyhow::Context;
use bumpalo::Bump;
use criterion::BenchmarkId;
use memmap2::Mmap;
use milli::documents::sort::recursive_sort;
use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
@ -35,6 +36,12 @@ pub struct Conf<'a> {
pub configure: fn(&mut Settings),
pub filter: Option<&'a str>,
pub sort: Option<Vec<&'a str>>,
/// set to skip documents (offset, limit)
pub offsets: &'a [Option<(usize, usize)>],
/// enable if you want to bench getting documents without querying
pub get_documents: bool,
/// configure the benchmark sample size
pub sample_size: Option<usize>,
/// enable or disable the optional words on the query
pub optional_words: bool,
/// primary key, if there is None we'll auto-generate docids for every documents
@ -52,6 +59,9 @@ impl Conf<'_> {
configure: |_| (),
filter: None,
sort: None,
offsets: &[None],
get_documents: false,
sample_size: None,
optional_words: true,
primary_key: None,
};
@ -145,25 +155,79 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
let name = format!("{}: {}", file_name, conf.group_name);
let mut group = c.benchmark_group(&name);
if let Some(sample_size) = conf.sample_size {
group.sample_size(sample_size);
}
for &query in conf.queries {
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
let _ids = search.execute().unwrap();
});
});
for offset in conf.offsets {
let parameter = match offset {
None => query.to_string(),
Some((offset, limit)) => format!("{query}[{offset}:{limit}]"),
};
group.bench_with_input(
BenchmarkId::from_parameter(parameter),
&query,
|b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search
.query(query)
.terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
if let Some((offset, limit)) = offset {
search.offset(*offset).limit(*limit);
}
let _ids = search.execute().unwrap();
});
},
);
}
}
if conf.get_documents {
for offset in conf.offsets {
let parameter = match offset {
None => String::from("get_documents"),
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
};
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
let all_docs = index.documents_ids(&rtxn).unwrap();
let facet_sort =
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
let iter = facet_sort.iter().unwrap();
if let Some((offset, limit)) = offset {
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = iter.collect::<Vec<_>>();
}
} else {
let all_docs = index.documents_ids(&rtxn).unwrap();
if let Some((offset, limit)) = offset {
let _results =
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = all_docs.iter().collect::<Vec<_>>();
}
}
});
});
}
}
group.finish();
index.prepare_for_closing().wait();

View File

@ -60,7 +60,7 @@ use nom::combinator::{cut, eof, map, opt};
use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, terminated, tuple};
use nom::Finish;
use nom::{Finish, Slice};
use nom_locate::LocatedSpan;
pub(crate) use value::parse_value;
use value::word_exact;
@ -121,6 +121,16 @@ impl<'a> Token<'a> {
Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
}
}
/// Split the token by a delimiter and return an iterator of tokens.
/// Each token in the iterator will have its own span that corresponds to a slice of the original token's span.
pub fn split(&self, delimiter: &'a str) -> impl Iterator<Item = Token<'a>> + '_ {
let original_addr = self.value().as_ptr() as usize;
self.value().split(delimiter).map(move |part| {
let offset = part.as_ptr() as usize - original_addr;
Token::new(self.span.slice(offset..offset + part.len()), Some(part.to_string()))
})
}
}
impl<'a> From<Span<'a>> for Token<'a> {
@ -179,6 +189,25 @@ impl<'a> FilterCondition<'a> {
}
}
pub fn use_vector_filter(&self) -> Option<&Token> {
match self {
FilterCondition::Condition { fid, op: _ } => {
if fid.value().starts_with("_vectors.") || fid.value() == "_vectors" {
Some(fid)
} else {
None
}
}
FilterCondition::Not(this) => this.use_vector_filter(),
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
seq.iter().find_map(|filter| filter.use_vector_filter())
}
FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::In { .. } => None,
}
}
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
if depth == 0 {
return Box::new(std::iter::empty());
@ -1043,4 +1072,103 @@ pub mod tests {
let token: Token = s.into();
assert_eq!(token.value(), s);
}
#[test]
fn split() {
let s = "test string that should not be parsed\n newline";
let token: Token = s.into();
let parts: Vec<_> = token.split(" ").collect();
insta::assert_snapshot!(format!("{parts:#?}"), @r#"
[
Token {
span: LocatedSpan {
offset: 0,
line: 1,
fragment: "test",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"test",
),
},
Token {
span: LocatedSpan {
offset: 5,
line: 1,
fragment: "string",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"string",
),
},
Token {
span: LocatedSpan {
offset: 12,
line: 1,
fragment: "that",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"that",
),
},
Token {
span: LocatedSpan {
offset: 17,
line: 1,
fragment: "should",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"should",
),
},
Token {
span: LocatedSpan {
offset: 24,
line: 1,
fragment: "not",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"not",
),
},
Token {
span: LocatedSpan {
offset: 28,
line: 1,
fragment: "be",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"be",
),
},
Token {
span: LocatedSpan {
offset: 31,
line: 1,
fragment: "parsed\n",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"parsed\n",
),
},
Token {
span: LocatedSpan {
offset: 39,
line: 2,
fragment: "newline",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"newline",
),
},
]
"#);
}
}

View File

@ -62,13 +62,14 @@ impl IndexScheduler {
let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?;
let index_rtxn = index.read_txn()?;
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
// First, check if the index already exists
let url = format!("{base_url}/indexes/{uid}");
let response = retry(&must_stop_processing, || {
let mut request = agent.get(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
@ -90,8 +91,8 @@ impl IndexScheduler {
let url = format!("{base_url}/indexes");
retry(&must_stop_processing, || {
let mut request = agent.post(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
@ -103,8 +104,8 @@ impl IndexScheduler {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
@ -122,7 +123,6 @@ impl IndexScheduler {
}
// Retry logic for sending settings
let url = format!("{base_url}/indexes/{uid}/settings");
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
@ -167,10 +167,10 @@ impl IndexScheduler {
},
);
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(50 * 1024 * 1024); // defaults to 50 MiB
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
request_threads()
let results = request_threads()
.broadcast(|ctx| {
let index_rtxn = index
.read_txn()
@ -265,9 +265,8 @@ impl IndexScheduler {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(api_key) = api_key {
request = request
.set("Authorization", &(format!("Bearer {api_key}")));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
@ -276,7 +275,7 @@ impl IndexScheduler {
}
buffer.extend_from_slice(&tmp_buffer);
if i % 100 == 0 {
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
@ -284,8 +283,8 @@ impl IndexScheduler {
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(api_key) = api_key {
request = request.set("Authorization", &(format!("Bearer {api_key}")));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
@ -298,6 +297,9 @@ impl IndexScheduler {
Some(uid.to_string()),
)
})?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
}

View File

@ -736,7 +736,7 @@ fn test_document_addition_mixed_rights_with_index() {
#[test]
fn test_document_addition_mixed_right_without_index_starts_with_cant_create() {
// We're going to autobatch multiple document addition.
// - The index does not exists
// - The index does not exist
// - The first document addition don't have the right to create an index
// - The second do. They should not batch together.
// - The second should batch with everything else as it's going to create an index.

View File

@ -158,7 +158,7 @@ impl AuthController {
self.store.delete_all_keys()
}
/// Delete all the keys in the DB.
/// Insert a key directly into the store.
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
self.store.put_api_key(key)?;
Ok(())
@ -351,6 +351,7 @@ pub struct IndexSearchRules {
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
store.put_api_key(Key::default_chat())?;
store.put_api_key(Key::default_read_only_admin())?;
store.put_api_key(Key::default_admin())?;
store.put_api_key(Key::default_search())?;

View File

@ -88,7 +88,13 @@ impl HeedAuthStore {
let mut actions = HashSet::new();
for action in &key.actions {
match action {
Action::All => actions.extend(enum_iterator::all::<Action>()),
Action::All => {
actions.extend(enum_iterator::all::<Action>());
actions.remove(&Action::AllGet);
}
Action::AllGet => {
actions.extend(enum_iterator::all::<Action>().filter(|a| a.is_read()))
}
Action::DocumentsAll => {
actions.extend(
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]

View File

@ -237,6 +237,7 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
@ -415,6 +416,7 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
}
@ -476,7 +478,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort,
UserError::InvalidSearchableAttribute { .. } => {
Code::InvalidSearchAttributesToSearchOn
}
@ -492,7 +495,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::SortError { search: true, .. } => Code::InvalidSearchSort,
UserError::SortError { search: false, .. } => Code::InvalidDocumentSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance
}

View File

@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
use crate::error::{Code, ResponseError};
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
"Search the database for relevant JSON documents using an optional query.";
"Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}";
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
@ -161,18 +162,31 @@ impl ChatCompletionSource {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct ChatCompletionPrompts {
#[serde(default)]
pub system: String,
#[serde(default)]
pub search_description: String,
#[serde(default)]
pub search_q_param: String,
#[serde(default = "default_search_filter_param")]
pub search_filter_param: String,
#[serde(default)]
pub search_index_uid_param: String,
}
/// This function is used for when the search_filter_param is
/// not provided and this can happen when the database is in v1.15.
fn default_search_filter_param() -> String {
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string()
}
impl Default for ChatCompletionPrompts {
fn default() -> Self {
Self {
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
}
}

View File

@ -144,6 +144,21 @@ impl Key {
}
}
pub fn default_read_only_admin() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
Self {
name: Some("Default Read-Only Admin API Key".to_string()),
description: Some("Use it to read information across the whole database. Caution! Do not expose this key on a public frontend".to_string()),
uid,
actions: vec![Action::AllGet, Action::KeysGet],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: now,
updated_at: now,
}
}
pub fn default_search() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
@ -218,6 +233,9 @@ pub enum Action {
#[serde(rename = "*")]
#[deserr(rename = "*")]
All = 0,
#[serde(rename = "*.get")]
#[deserr(rename = "*.get")]
AllGet,
#[serde(rename = "search")]
#[deserr(rename = "search")]
Search,
@ -399,6 +417,52 @@ impl Action {
}
}
/// Whether the action should be included in [Action::AllRead].
pub fn is_read(&self) -> bool {
use Action::*;
// It's using an exhaustive match to force the addition of new actions.
match self {
// Any action that expands to others must return false, as it wouldn't be able to expand recursively.
All | AllGet | DocumentsAll | IndexesAll | ChatsAll | TasksAll | SettingsAll
| StatsAll | MetricsAll | DumpsAll | SnapshotsAll | ChatsSettingsAll => false,
Search => true,
DocumentsAdd => false,
DocumentsGet => true,
DocumentsDelete => false,
Export => true,
IndexesAdd => false,
IndexesGet => true,
IndexesUpdate => false,
IndexesDelete => false,
IndexesSwap => false,
TasksCancel => false,
TasksDelete => false,
TasksGet => true,
SettingsGet => true,
SettingsUpdate => false,
StatsGet => true,
MetricsGet => true,
DumpsCreate => false,
SnapshotsCreate => false,
Version => true,
KeysAdd => false,
KeysGet => false, // Disabled in order to prevent privilege escalation
KeysUpdate => false,
KeysDelete => false,
ExperimentalFeaturesGet => true,
ExperimentalFeaturesUpdate => false,
NetworkGet => true,
NetworkUpdate => false,
ChatCompletions => false, // Disabled because it might trigger generation of new chats
ChatsGet => true,
ChatsDelete => false,
ChatsSettingsGet => true,
ChatsSettingsUpdate => false,
}
}
pub const fn repr(&self) -> u8 {
*self as u8
}
@ -408,6 +472,7 @@ pub mod actions {
use super::Action::*;
pub(crate) const ALL: u8 = All.repr();
pub const ALL_GET: u8 = AllGet.repr();
pub const SEARCH: u8 = Search.repr();
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();

Binary file not shown.

View File

@ -104,6 +104,4 @@ impl Analytics for MockAnalytics {
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
}

View File

@ -73,12 +73,6 @@ pub enum DocumentDeletionKind {
PerFilter,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
}
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
pub trait Aggregate: 'static + mopa::Any + Send {
/// The name of the event that will be sent to segment.

View File

@ -49,7 +49,7 @@ pub enum MeilisearchHttpError {
TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(if *.0 % 1024 == 0 { UnitType::Binary } else { UnitType::Decimal }))]
PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()

View File

@ -27,9 +27,10 @@ use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts,
ChatCompletionSource as DbChatCompletionSource, SystemRole,
};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget};
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
use meilisearch_types::{Document, Index};
use serde::Deserialize;
use serde_json::json;
@ -169,6 +170,7 @@ fn setup_search_tool(
let mut index_uids = Vec::new();
let mut function_description = prompts.search_description.clone();
let mut filter_description = prompts.search_filter_param.clone();
index_scheduler.try_for_each_index::<_, ()>(|name, index| {
// Make sure to skip unauthorized indexes
if !filters.is_index_authorized(name) {
@ -180,16 +182,22 @@ fn setup_search_tool(
let index_description = chat_config.description;
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
index_uids.push(name.to_string());
let facet_distributions = format_facet_distributions(index, &rtxn, 10).unwrap(); // TODO do not unwrap
let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index");
let _ = writeln!(&mut filter_description, "{facet_distributions}");
Ok(())
})?;
tracing::debug!("LLM function description: {function_description}");
tracing::debug!("LLM filter description: {filter_description}");
let tool = ChatCompletionToolArgs::default()
.r#type(ChatCompletionToolType::Function)
.function(
FunctionObjectArgs::default()
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
.description(&function_description)
.description(function_description)
.parameters(json!({
"type": "object",
"properties": {
@ -203,9 +211,13 @@ fn setup_search_tool(
// "type": ["string", "null"],
"type": "string",
"description": prompts.search_q_param,
},
"filter": {
"type": "string",
"description": filter_description,
}
},
"required": ["index_uid", "q"],
"required": ["index_uid", "q", "filter"],
"additionalProperties": false,
}))
.strict(true)
@ -247,11 +259,19 @@ async fn process_search_request(
auth_token: &str,
index_uid: String,
q: Option<String>,
filter: Option<String>,
) -> Result<(Index, Vec<Document>, String), ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.static_read_txn()?;
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) };
let mut query = SearchQuery {
q,
filter: filter.map(serde_json::Value::from),
..SearchQuery::from(search_parameters)
};
tracing::debug!("LLM query: {:?}", query);
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
auth_ctrl,
auth_token,
@ -280,14 +300,23 @@ async fn process_search_request(
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
search_from_kind(index_uid, search_kind, search)
.map(|(search_results, _)| (rtxn, search_results))
.map_err(ResponseError::from)
match search_from_kind(index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
Err(MeilisearchHttpError::Milli {
error: meilisearch_types::milli::Error::UserError(user_error),
index_name: _,
}) => Ok((rtxn, Err(user_error))),
Err(err) => Err(ResponseError::from(err)),
}
})
.await;
permit.drop().await;
let output = output?;
let output = match output? {
Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)),
Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())),
Err(err) => Err(err),
};
let mut documents = Vec::new();
if let Ok((ref rtxn, ref search_result)) = output {
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
@ -395,16 +424,19 @@ async fn non_streamed_chat(
for call in meili_calls {
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
)
.await
.map_err(|e| e.to_string()),
Ok(SearchInIndexParameters { index_uid, q, filter }) => {
process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
.map_err(|e| e.to_string())
}
Err(err) => Err(err.to_string()),
};
@ -719,13 +751,14 @@ async fn handle_meili_tools(
let mut error = None;
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request(
Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request(
index_scheduler,
auth_ctrl.clone(),
search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
{
@ -801,4 +834,42 @@ struct SearchInIndexParameters {
index_uid: String,
/// The query parameter to use.
q: Option<String>,
/// The filter parameter to use.
filter: Option<String>,
}
fn format_facet_distributions(
index: &Index,
rtxn: &RoTxn,
max_values_per_facet: usize,
) -> meilisearch_types::milli::Result<String> {
let universe = index.documents_ids(rtxn)?;
let rules = index.filterable_attributes_rules(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_attributes = fields_ids_map
.names()
.filter(|name| rules.iter().any(|rule| matches!(rule.match_str(name), PatternMatch::Match)))
.map(|name| (name, OrderBy::Count));
let facets_distribution = index
.facets_distribution(rtxn)
.max_values_per_facet(max_values_per_facet)
.candidates(universe)
.facets(filterable_attributes)
.execute()?;
let mut output = String::new();
for (facet_name, entries) in facets_distribution {
let _ = write!(&mut output, "{}: ", facet_name);
let total_entries = entries.len();
for (i, (value, _count)) in entries.into_iter().enumerate() {
let _ = if total_entries.saturating_sub(1) == i {
write!(&mut output, "{value}.")
} else {
write!(&mut output, "{value}, ")
};
}
let _ = writeln!(&mut output);
}
Ok(output)
}

View File

@ -8,8 +8,8 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT,
DEFAULT_CHAT_SYSTEM_PROMPT,
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT,
DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT,
};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
@ -84,6 +84,11 @@ async fn patch_settings(
Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_q_param,
},
search_filter_param: match new_prompts.search_filter_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_filter_param,
},
search_index_uid_param: match new_prompts.search_index_uid_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
@ -252,6 +257,10 @@ pub struct ChatPrompts {
#[schema(value_type = Option<String>, example = json!("This is query parameter..."))]
pub search_q_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchFilterParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is filter parameter..."))]
pub search_filter_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
pub search_index_uid_param: Setting<String>,

View File

@ -1,6 +1,7 @@
use std::collections::HashSet;
use std::io::{ErrorKind, Seek as _};
use std::marker::PhantomData;
use std::str::FromStr;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
@ -17,9 +18,10 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::documents::sort::recursive_sort;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::milli::{AscDesc, DocumentId};
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
@ -42,6 +44,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
};
@ -135,6 +138,10 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
per_document_id: bool,
// if a filter was used
per_filter: bool,
with_vector_filter: bool,
// if documents were sorted
sort: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
@ -151,39 +158,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
marker: std::marker::PhantomData<Method>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
}
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
pub fn from_query(query: &DocumentFetchKind) -> Self {
let (limit, offset, retrieve_vectors) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
(*limit, *offset, *retrieve_vectors)
}
};
let ids = match query {
DocumentFetchKind::Normal { ids, .. } => *ids,
DocumentFetchKind::PerDocumentId { .. } => 0,
};
Self {
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
retrieve_vectors,
max_document_ids: ids,
marker: PhantomData,
}
}
}
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
@ -193,6 +167,8 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
Box::new(Self {
per_document_id: self.per_document_id | new.per_document_id,
per_filter: self.per_filter | new.per_filter,
with_vector_filter: self.with_vector_filter | new.with_vector_filter,
sort: self.sort | new.sort,
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset),
@ -276,6 +252,8 @@ pub async fn get_document(
retrieve_vectors: param_retrieve_vectors.0,
per_document_id: true,
per_filter: false,
with_vector_filter: false,
sort: false,
max_limit: 0,
max_offset: 0,
max_document_ids: 0,
@ -406,6 +384,8 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentSort>)]
sort: Option<String>,
}
#[derive(Debug, Deserr, ToSchema)]
@ -430,6 +410,9 @@ pub struct BrowseQuery {
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
#[schema(default, value_type = Option<Vec<String>>, example = json!(["title:asc", "rating:desc"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentSort>)]
sort: Option<Vec<String>>,
}
/// Get documents with POST
@ -495,6 +478,11 @@ pub async fn documents_by_query_post(
analytics.publish(
DocumentsFetchAggregator::<DocumentsPOST> {
per_filter: body.filter.is_some(),
with_vector_filter: body
.filter
.as_ref()
.is_some_and(|f| f.to_string().contains("_vectors")),
sort: body.sort.is_some(),
retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit,
max_offset: body.offset,
@ -571,7 +559,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } =
params.into_inner();
let filter = match filter {
@ -582,20 +570,24 @@ pub async fn get_documents(
None => None,
};
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery {
offset: offset.0,
limit: limit.0,
fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0,
filter,
ids,
ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()),
sort: sort.map(|attr| fix_sort_query_parameters(&attr)),
};
analytics.publish(
DocumentsFetchAggregator::<DocumentsGET> {
per_filter: query.filter.is_some(),
with_vector_filter: query
.filter
.as_ref()
.is_some_and(|f| f.to_string().contains("_vectors")),
sort: query.sort.is_some(),
retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit,
max_offset: query.offset,
@ -615,7 +607,7 @@ fn documents_by_query(
query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
@ -633,6 +625,18 @@ fn documents_by_query(
None
};
let sort_criteria = if let Some(sort) = &sort {
let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::SortError::from(asc_desc_error).into_document_error().into())
}
};
Some(sorts)
} else {
None
};
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(
&index,
@ -643,6 +647,7 @@ fn documents_by_query(
fields,
retrieve_vectors,
index_scheduler.features(),
sort_criteria,
)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@ -1461,8 +1466,6 @@ fn some_documents<'a, 't: 'a>(
document.remove("_vectors");
}
RetrieveVectors::Retrieve => {
// Clippy is simply wrong
#[allow(clippy::manual_unwrap_or_default)]
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
@ -1494,6 +1497,7 @@ fn retrieve_documents<S: AsRef<str>>(
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
sort_criteria: Option<Vec<AscDesc>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
@ -1526,15 +1530,32 @@ fn retrieve_documents<S: AsRef<str>>(
})?
}
let (it, number_of_documents) = {
let (it, number_of_documents) = if let Some(sort) = sort_criteria {
let number_of_documents = candidates.len();
let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
let iter = facet_sort.iter()?;
let mut documents = Vec::with_capacity(limit);
for result in iter.skip(offset).take(limit) {
documents.push(result?);
}
(
itertools::Either::Left(some_documents(
index,
&rtxn,
documents.into_iter(),
retrieve_vectors,
)?),
number_of_documents,
)
} else {
let number_of_documents = candidates.len();
(
some_documents(
itertools::Either::Right(some_documents(
index,
&rtxn,
candidates.into_iter().skip(offset).take(limit),
retrieve_vectors,
)?,
)?),
number_of_documents,
)
};

View File

@ -40,6 +40,7 @@ pub struct SearchAggregator<Method: AggregateMethod> {
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
filter_on_vectors: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
@ -163,6 +164,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
let stringified_filters = filter.to_string();
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
ret.filter_on_vectors = stringified_filters.contains("_vectors");
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
}
@ -260,6 +262,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_on_vectors,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
@ -314,6 +317,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
// filter
self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
self.filter_on_vectors |= filter_on_vectors;
self.filter_sum_of_criteria_terms =
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
self.filter_total_number_of_criteria =
@ -388,6 +392,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_on_vectors,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
@ -445,6 +450,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
"filter": {
"with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box,
"on_vectors": filter_on_vectors,
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},

View File

@ -336,7 +336,7 @@ impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "Task :taskUid not found.",
"code": "task_not_found",
@ -430,7 +430,7 @@ async fn cancel_tasks(
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "Task :taskUid not found.",
"code": "task_not_found",
@ -611,7 +611,7 @@ async fn get_tasks(
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "Task :taskUid not found.",
"code": "task_not_found",
@ -665,7 +665,7 @@ async fn get_task(
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "Task :taskUid not found.",
"code": "task_not_found",

View File

@ -745,10 +745,9 @@ impl SearchByIndex {
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(milli::SortError::from(
asc_desc_error,
))
.into())
return Err(milli::SortError::from(asc_desc_error)
.into_search_error()
.into())
}
};
Some(sorts)

View File

@ -1051,6 +1051,7 @@ pub fn prepare_search<'t>(
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
search.max_total_hits(Some(max_total_hits));
search.scoring_strategy(
if query.show_ranking_score
|| query.show_ranking_score_details
@ -1091,7 +1092,7 @@ pub fn prepare_search<'t>(
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(SortError::from(asc_desc_error)).into())
return Err(SortError::from(asc_desc_error).into_search_error().into())
}
};
@ -2077,7 +2078,7 @@ pub(crate) fn parse_filter(
})?;
if let Some(ref filter) = filter {
// If the contains operator is used while the contains filter features is not enabled, errors out
// If the contains operator is used while the contains filter feature is not enabled, errors out
if let Some((token, error)) =
filter.use_contains_operator().zip(features.check_contains_filter().err())
{
@ -2088,6 +2089,18 @@ pub(crate) fn parse_filter(
}
}
if let Some(ref filter) = filter {
// If a vector filter is used while the multi modal feature is not enabled, errors out
if let Some((token, error)) =
filter.use_vector_filter().zip(features.check_multimodal("using a vector filter").err())
{
return Err(ResponseError::from_msg(
token.as_external_error(error).to_string(),
Code::FeatureNotEnabled,
));
}
}
Ok(filter)
}

View File

@ -419,14 +419,14 @@ async fn error_add_api_key_invalid_parameters_actions() {
let (response, code) = server.add_api_key(content).await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
{
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
}
"###);
"#);
}
#[actix_rt::test]
@ -790,7 +790,7 @@ async fn list_api_keys() {
meili_snap::snapshot!(code, @"201 Created");
let (response, code) = server.list_api_keys("").await;
meili_snap::snapshot!(meili_snap::json_string!(response, { ".results[].createdAt" => "[ignored]", ".results[].updatedAt" => "[ignored]", ".results[].uid" => "[ignored]", ".results[].key" => "[ignored]" }), @r###"
meili_snap::snapshot!(meili_snap::json_string!(response, { ".results[].createdAt" => "[ignored]", ".results[].updatedAt" => "[ignored]", ".results[].uid" => "[ignored]", ".results[].key" => "[ignored]" }), @r#"
{
"results": [
{
@ -850,6 +850,22 @@ async fn list_api_keys() {
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Read-Only Admin API Key",
"description": "Use it to read information across the whole database. Caution! Do not expose this key on a public frontend",
"key": "[ignored]",
"uid": "[ignored]",
"actions": [
"*.get",
"keys.get"
],
"indexes": [
"*"
],
"expiresAt": null,
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Chat API Key",
"description": "Use it to chat and search from the frontend",
@ -869,9 +885,9 @@ async fn list_api_keys() {
],
"offset": 0,
"limit": 20,
"total": 4
"total": 5
}
"###);
"#);
meili_snap::snapshot!(code, @"200 OK");
}

View File

@ -304,7 +304,7 @@ async fn access_authorized_stats_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@ -344,7 +344,7 @@ async fn access_authorized_stats_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@ -384,7 +384,7 @@ async fn list_authorized_indexes_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@ -425,7 +425,7 @@ async fn list_authorized_indexes_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@ -507,10 +507,10 @@ async fn access_authorized_index_patterns() {
server.use_api_key(MASTER_KEY);
// refer to products_1 with a modified api key.
// refer to products_1 with modified api key.
let index_1 = server.index("products_1");
server.wait_task(task_id).await;
index_1.wait_task(task_id).await;
let (response, code) = index_1.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@ -578,19 +578,19 @@ async fn raise_error_non_authorized_index_patterns() {
assert_eq!(202, code, "{:?}", &response);
let task2_id = response["taskUid"].as_u64().unwrap();
// Adding a document to test index. Should Fail with 403 -- invalid_api_key
// Adding document to test index. Should Fail with 403 -- invalid_api_key
let (response, code) = test_index.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
server.use_api_key(MASTER_KEY);
// refer to products_1 with a modified api key.
// refer to products_1 with modified api key.
let product_1_index = server.index("products_1");
// refer to products_2 with a modified api key.
// let product_2_index = server.index("products_2");
// refer to products_2 with modified api key.
let product_2_index = server.index("products_2");
server.wait_task(task1_id).await;
server.wait_task(task2_id).await;
product_1_index.wait_task(task1_id).await;
product_2_index.wait_task(task2_id).await;
let (response, code) = product_1_index.get_task(task1_id).await;
assert_eq!(200, code, "{:?}", &response);
@ -603,7 +603,7 @@ async fn raise_error_non_authorized_index_patterns() {
#[actix_rt::test]
async fn pattern_indexes() {
// Create a server with master key
// Create server with master key
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
@ -650,7 +650,7 @@ async fn list_authorized_tasks_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@ -690,7 +690,7 @@ async fn list_authorized_tasks_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@ -757,7 +757,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = server.wait_task(task_id).await;
let response = index.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@ -768,7 +768,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = server.wait_task(task_id).await;
let response = index.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@ -778,7 +778,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = server.wait_task(task_id).await;
let response = index.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@ -830,7 +830,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@ -844,7 +844,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@ -856,7 +856,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@ -911,7 +911,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@ -929,7 +929,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@ -949,7 +949,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
server.wait_task(task_id).await;
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);

View File

@ -91,14 +91,14 @@ async fn create_api_key_bad_actions() {
// can't parse
let (response, code) = server.add_api_key(json!({ "actions": ["doggo"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(json_string!(response), @r#"
{
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
}
"###);
"#);
}
#[actix_rt::test]

View File

@ -100,11 +100,11 @@ macro_rules! compute_authorized_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task1,_status_code) = index.add_documents(documents, None).await;
server.wait_task(task1.uid()).await.succeeded();
index.wait_task(task1.uid()).await.succeeded();
let (task2,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
server.wait_task(task2.uid()).await.succeeded();
index.wait_task(task2.uid()).await.succeeded();
drop(index);
for key_content in ACCEPTED_KEYS.iter() {
@ -147,7 +147,7 @@ macro_rules! compute_forbidden_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task, _status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
drop(index);
for key_content in $parent_keys.iter() {

View File

@ -268,21 +268,21 @@ macro_rules! compute_authorized_single_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (add_task,_status_code) = index.add_documents(documents, None).await;
server.wait_task(add_task.uid()).await.succeeded();
index.wait_task(add_task.uid()).await.succeeded();
let (update_task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
server.wait_task(update_task.uid()).await.succeeded();
index.wait_task(update_task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (add_task2,_status_code) = index.add_documents(documents, None).await;
server.wait_task(add_task2.uid()).await.succeeded();
index.wait_task(add_task2.uid()).await.succeeded();
let (update_task2,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
server.wait_task(update_task2.uid()).await.succeeded();
index.wait_task(update_task2.uid()).await.succeeded();
drop(index);
@ -339,21 +339,21 @@ macro_rules! compute_authorized_multiple_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
drop(index);
@ -423,21 +423,21 @@ macro_rules! compute_forbidden_single_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");
@ -499,21 +499,21 @@ macro_rules! compute_forbidden_multiple_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,15 @@
use std::fmt::Write;
use std::marker::PhantomData;
use std::panic::{catch_unwind, resume_unwind, UnwindSafe};
use std::time::Duration;
use actix_web::http::StatusCode;
use tokio::time::sleep;
use urlencoding::encode as urlencode;
use super::encoder::Encoder;
use super::service::Service;
use super::{Owned, Server, Shared, Value};
use super::{Owned, Shared, Value};
use crate::json;
pub struct Index<'a, State = Owned> {
@ -31,7 +33,7 @@ impl<'a> Index<'a, Owned> {
Index { uid: self.uid.clone(), service: self.service, encoder, marker: PhantomData }
}
pub async fn load_test_set<State>(&self, waiter: &Server<State>) -> u64 {
pub async fn load_test_set(&self) -> u64 {
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
let (response, code) = self
.service
@ -42,12 +44,12 @@ impl<'a> Index<'a, Owned> {
)
.await;
assert_eq!(code, 202);
let update_id = response["taskUid"].as_u64().unwrap();
waiter.wait_task(update_id).await;
update_id
let update_id = response["taskUid"].as_i64().unwrap();
self.wait_task(update_id as u64).await;
update_id as u64
}
pub async fn load_test_set_ndjson<State>(&self, waiter: &Server<State>) -> u64 {
pub async fn load_test_set_ndjson(&self) -> u64 {
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
let (response, code) = self
.service
@ -58,9 +60,9 @@ impl<'a> Index<'a, Owned> {
)
.await;
assert_eq!(code, 202);
let update_id = response["taskUid"].as_u64().unwrap();
waiter.wait_task(update_id).await;
update_id
let update_id = response["taskUid"].as_i64().unwrap();
self.wait_task(update_id as u64).await;
update_id as u64
}
pub async fn create(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
@ -265,14 +267,10 @@ impl Index<'_, Shared> {
/// You cannot modify the content of a shared index, thus the delete_document_by_filter call
/// must fail. If the task successfully enqueue itself, we'll wait for the task to finishes,
/// and if it succeed the function will panic.
pub async fn delete_document_by_filter_fail<State>(
&self,
body: Value,
waiter: &Server<State>,
) -> (Value, StatusCode) {
pub async fn delete_document_by_filter_fail(&self, body: Value) -> (Value, StatusCode) {
let (mut task, code) = self._delete_document_by_filter(body).await;
if code.is_success() {
task = waiter.wait_task(task.uid()).await;
task = self.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`delete_document_by_filter_fail` succeeded: {}",
@ -283,10 +281,10 @@ impl Index<'_, Shared> {
(task, code)
}
pub async fn delete_index_fail<State>(&self, waiter: &Server<State>) -> (Value, StatusCode) {
pub async fn delete_index_fail(&self) -> (Value, StatusCode) {
let (mut task, code) = self._delete().await;
if code.is_success() {
task = waiter.wait_task(task.uid()).await;
task = self.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`delete_index_fail` succeeded: {}",
@ -297,14 +295,10 @@ impl Index<'_, Shared> {
(task, code)
}
pub async fn update_index_fail<State>(
&self,
primary_key: Option<&str>,
waiter: &Server<State>,
) -> (Value, StatusCode) {
pub async fn update_index_fail(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
let (mut task, code) = self._update(primary_key).await;
if code.is_success() {
task = waiter.wait_task(task.uid()).await;
task = self.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`update_index_fail` succeeded: {}",
@ -370,6 +364,23 @@ impl<State> Index<'_, State> {
self.service.delete(url).await
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);
for _ in 0..100 {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;
}
// wait 0.5 second.
sleep(Duration::from_millis(500)).await;
}
panic!("Timeout waiting for update id");
}
pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) {
let url = format!("/tasks/{}", update_id);
self.service.get(url).await
@ -551,5 +562,7 @@ pub struct GetAllDocumentsOptions {
pub offset: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Vec<&'static str>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sort: Option<Vec<&'static str>>,
pub retrieve_vectors: bool,
}

View File

@ -3,8 +3,12 @@ pub mod index;
pub mod server;
pub mod service;
use std::fmt::{self, Display};
use std::{
collections::BTreeMap,
fmt::{self, Display},
};
use actix_http::StatusCode;
#[allow(unused)]
pub use index::GetAllDocumentsOptions;
use meili_snap::json_string;
@ -13,6 +17,10 @@ use serde::{Deserialize, Serialize};
#[allow(unused)]
pub use server::{default_settings, Server};
use tokio::sync::OnceCell;
use wiremock::{
matchers::{method, path},
Mock, MockServer, Request, ResponseTemplate,
};
use crate::common::index::Index;
@ -38,15 +46,6 @@ impl Value {
self["uid"].as_u64().is_some() || self["taskUid"].as_u64().is_some()
}
#[track_caller]
pub fn batch_uid(&self) -> u32 {
if let Some(batch_uid) = self["batchUid"].as_u64() {
batch_uid as u32
} else {
panic!("Didn't find `batchUid` in: {self}");
}
}
/// Return `true` if the `status` field is set to `succeeded`.
/// Panic if the `status` field doesn't exists.
#[track_caller]
@ -190,7 +189,7 @@ pub async fn shared_empty_index() -> &'static Index<'static, Shared> {
let server = Server::new_shared();
let index = server._index("EMPTY_INDEX").to_shared();
let (response, _code) = index._create(None).await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
index
})
.await
@ -238,13 +237,13 @@ pub async fn shared_index_with_documents() -> &'static Index<'static, Shared> {
let index = server._index("SHARED_DOCUMENTS").to_shared();
let documents = DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["id", "title"], "sortableAttributes": ["id", "title"]}),
)
.await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
index
}).await
}
@ -281,13 +280,13 @@ pub async fn shared_index_with_score_documents() -> &'static Index<'static, Shar
let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared();
let documents = SCORE_DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["id", "title"], "sortableAttributes": ["id", "title"]}),
)
.await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
index
}).await
}
@ -358,13 +357,13 @@ pub async fn shared_index_with_nested_documents() -> &'static Index<'static, Sha
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
let documents = NESTED_DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["father", "doggos", "cattos"], "sortableAttributes": ["doggos"]}),
)
.await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
index
}).await
}
@ -458,7 +457,7 @@ pub async fn shared_index_with_test_set() -> &'static Index<'static, Shared> {
)
.await;
assert_eq!(code, 202);
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
index
})
.await
@ -505,15 +504,178 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
let server = Server::new_shared();
let index = server._index("SHARED_GEO_DOCUMENTS").to_shared();
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None).await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["_geo"], "sortableAttributes": ["_geo"]}),
)
.await;
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
index
})
.await
}
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
let (server, uid) = INDEX
.get_or_init(|| async {
let (server, uid, _) = init_fragments_index().await;
(server.into_shared(), uid)
})
.await;
server._index(uid).to_shared()
}
async fn fragment_mock_server() -> String {
let text_to_embedding: BTreeMap<_, _> = vec![
("kefir", [0.5, -0.5, 0.0]),
("intel", [1.0, 1.0, 0.0]),
("dustin", [-0.5, 0.5, 0.0]),
("bulldog", [0.0, 0.0, 1.0]),
("labrador", [0.0, 0.0, -1.0]),
("{{ doc.", [-9999.0, -9999.0, -9999.0]), // If a template didn't render
]
.into_iter()
.collect();
let mock_server = Box::leak(Box::new(MockServer::start().await));
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
let text = String::from_utf8_lossy(&req.body).to_string();
let mut data = [0.0, 0.0, 0.0];
for (inner_text, inner_data) in &text_to_embedding {
if text.contains(inner_text) {
for (i, &value) in inner_data.iter().enumerate() {
data[i] += value;
}
}
}
ResponseTemplate::new(200).set_body_json(json!({ "data": data }))
})
.mount(mock_server)
.await;
mock_server.uri()
}
pub async fn init_fragments_index() -> (Server<Owned>, String, crate::common::Value) {
let url = fragment_mock_server().await;
let server = Server::new().await;
let index = server.unique_index();
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
assert_eq!(code, StatusCode::OK);
// Configure the index to use our mock embedder
let settings = json!({
"embedders": {
"rest": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"indexingFragments": {
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
"basic": {"value": "{{ doc.name }} is a dog"},
},
"searchFragments": {
"justBreed": {"value": "It's a {{ media.breed }}"},
"justName": {"value": "{{ media.name }} is a dog"},
"query": {"value": "Some pre-prompt for query {{ q }}"},
}
},
},
});
let (response, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, StatusCode::ACCEPTED);
server.wait_task(response.uid()).await.succeeded();
// Send documents
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel", "breed": "labrador"},
{"id": 3, "name": "dustin", "breed": "bulldog"},
]);
let (value, code) = index.add_documents(documents, None).await;
assert_eq!(code, StatusCode::ACCEPTED);
let _task = index.wait_task(value.uid()).await.succeeded();
let uid = index.uid.clone();
(server, uid, settings)
}
pub async fn init_fragments_index_composite() -> (Server<Owned>, String, crate::common::Value) {
let url = fragment_mock_server().await;
let server = Server::new().await;
let index = server.unique_index();
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
assert_eq!(code, StatusCode::OK);
let (_response, code) = server.set_features(json!({"compositeEmbedders": true})).await;
assert_eq!(code, StatusCode::OK);
// Configure the index to use our mock embedder
let settings = json!({
"embedders": {
"rest": {
"source": "composite",
"searchEmbedder": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"searchFragments": {
"query": {"value": "Some pre-prompt for query {{ q }}"},
}
},
"indexingEmbedder": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"indexingFragments": {
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
"basic": {"value": "{{ doc.name }} is a dog"},
}
},
},
},
});
let (response, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, StatusCode::ACCEPTED);
server.wait_task(response.uid()).await.succeeded();
// Send documents
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel", "breed": "labrador"},
{"id": 3, "name": "dustin", "breed": "bulldog"},
]);
let (value, code) = index.add_documents(documents, None).await;
assert_eq!(code, StatusCode::ACCEPTED);
index.wait_task(value.uid()).await.succeeded();
let uid = index.uid.clone();
(server, uid, settings)
}

View File

@ -35,7 +35,7 @@ pub struct Server<State = Owned> {
pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
impl Server<Owned> {
fn into_shared(self) -> Server<Shared> {
pub(super) fn into_shared(self) -> Server<Shared> {
Server { service: self.service, _dir: self._dir, _marker: PhantomData }
}
@ -97,6 +97,7 @@ impl Server<Owned> {
self.use_api_key(master_key);
let (response, code) = self.list_api_keys("").await;
assert_eq!(200, code, "{:?}", response);
// TODO: relying on the order of keys is not ideal, we should use the name instead
let admin_key = &response["results"][1]["key"];
self.use_api_key(admin_key.as_str().unwrap());
}
@ -408,12 +409,12 @@ impl<State> Server<State> {
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{update_id}");
let max_attempts = 400; // 200 seconds in total, 0.5secs per attempt
let url = format!("/tasks/{}", update_id);
let max_attempts = 400; // 200 seconds total, 0.5s per attempt
for i in 0..max_attempts {
let (response, status_code) = self.service.get(url.clone()).await;
assert_eq!(200, status_code, "response: {response}");
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;

View File

@ -1318,7 +1318,7 @@ async fn add_no_documents() {
async fn add_larger_dataset() {
let server = Server::new_shared();
let index = server.unique_index();
let update_id = index.load_test_set(server).await;
let update_id = index.load_test_set().await;
let (response, code) = index.get_task(update_id).await;
assert_eq!(code, 200);
assert_eq!(response["status"], "succeeded");
@ -1333,7 +1333,7 @@ async fn add_larger_dataset() {
// x-ndjson add large test
let index = server.unique_index();
let update_id = index.load_test_set_ndjson(server).await;
let update_id = index.load_test_set_ndjson().await;
let (response, code) = index.get_task(update_id).await;
assert_eq!(code, 200);
assert_eq!(response["status"], "succeeded");

View File

@ -7,8 +7,7 @@ use crate::json;
async fn delete_one_document_unexisting_index() {
let server = Server::new_shared();
let index = shared_does_not_exists_index().await;
let (task, code) =
index.delete_document_by_filter_fail(json!({"filter": "a = b"}), server).await;
let (task, code) = index.delete_document_by_filter_fail(json!({"filter": "a = b"})).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.failed();

View File

@ -557,9 +557,9 @@ async fn delete_document_by_filter() {
"###);
let index = shared_does_not_exists_index().await;
// index does not exists
// index does not exist
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
@ -589,7 +589,7 @@ async fn delete_document_by_filter() {
// no filterable are set
let index = shared_empty_index().await;
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
@ -619,7 +619,7 @@ async fn delete_document_by_filter() {
// not filterable while there is a filterable attribute
let index = shared_index_with_documents().await;
let (response, code) =
index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"}), server).await;
index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"})).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await.failed();
snapshot!(response, @r###"

View File

@ -5,8 +5,8 @@ use urlencoding::encode as urlencode;
use crate::common::encoder::Encoder;
use crate::common::{
shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set,
GetAllDocumentsOptions, Server, Value,
shared_does_not_exists_index, shared_empty_index, shared_index_with_geo_documents,
shared_index_with_test_set, GetAllDocumentsOptions, Server, Value,
};
use crate::json;
@ -83,6 +83,311 @@ async fn get_document() {
);
}
#[actix_rt::test]
async fn get_document_sorted() {
let server = Server::new_shared();
let index = server.unique_index();
index.load_test_set().await;
let (task, _status_code) =
index.update_settings_sortable_attributes(json!(["age", "email", "gender", "name"])).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "age", "email"]),
sort: Some(vec!["age:asc", "email:desc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 5,
"age": 20,
"email": "warrenwatson@chorizon.com"
},
{
"id": 6,
"age": 20,
"email": "sheliaberry@chorizon.com"
},
{
"id": 57,
"age": 20,
"email": "kaitlinconner@chorizon.com"
},
{
"id": 45,
"age": 20,
"email": "irenebennett@chorizon.com"
},
{
"id": 40,
"age": 21,
"email": "staffordemerson@chorizon.com"
},
{
"id": 41,
"age": 21,
"email": "salinasgamble@chorizon.com"
},
{
"id": 63,
"age": 21,
"email": "knowleshebert@chorizon.com"
},
{
"id": 50,
"age": 21,
"email": "guerramcintyre@chorizon.com"
},
{
"id": 44,
"age": 22,
"email": "jonispears@chorizon.com"
},
{
"id": 56,
"age": 23,
"email": "tuckerbarry@chorizon.com"
},
{
"id": 51,
"age": 23,
"email": "keycervantes@chorizon.com"
},
{
"id": 60,
"age": 23,
"email": "jodyherrera@chorizon.com"
},
{
"id": 70,
"age": 23,
"email": "glassperkins@chorizon.com"
},
{
"id": 75,
"age": 24,
"email": "emmajacobs@chorizon.com"
},
{
"id": 68,
"age": 24,
"email": "angelinadyer@chorizon.com"
},
{
"id": 17,
"age": 25,
"email": "ortegabrennan@chorizon.com"
},
{
"id": 76,
"age": 25,
"email": "claricegardner@chorizon.com"
},
{
"id": 43,
"age": 25,
"email": "arnoldbender@chorizon.com"
},
{
"id": 12,
"age": 25,
"email": "aidakirby@chorizon.com"
},
{
"id": 9,
"age": 26,
"email": "kellimendez@chorizon.com"
}
]
"#);
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "gender", "name"]),
sort: Some(vec!["gender:asc", "name:asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 3,
"name": "Adeline Flynn",
"gender": "female"
},
{
"id": 12,
"name": "Aida Kirby",
"gender": "female"
},
{
"id": 68,
"name": "Angelina Dyer",
"gender": "female"
},
{
"id": 15,
"name": "Aurelia Contreras",
"gender": "female"
},
{
"id": 36,
"name": "Barbra Valenzuela",
"gender": "female"
},
{
"id": 23,
"name": "Blanca Mcclain",
"gender": "female"
},
{
"id": 53,
"name": "Caitlin Burnett",
"gender": "female"
},
{
"id": 71,
"name": "Candace Sawyer",
"gender": "female"
},
{
"id": 65,
"name": "Carole Rowland",
"gender": "female"
},
{
"id": 33,
"name": "Cecilia Greer",
"gender": "female"
},
{
"id": 1,
"name": "Cherry Orr",
"gender": "female"
},
{
"id": 38,
"name": "Christina Short",
"gender": "female"
},
{
"id": 7,
"name": "Chrystal Boyd",
"gender": "female"
},
{
"id": 76,
"name": "Clarice Gardner",
"gender": "female"
},
{
"id": 73,
"name": "Eleanor Shepherd",
"gender": "female"
},
{
"id": 75,
"name": "Emma Jacobs",
"gender": "female"
},
{
"id": 16,
"name": "Estella Bass",
"gender": "female"
},
{
"id": 62,
"name": "Estelle Ramirez",
"gender": "female"
},
{
"id": 20,
"name": "Florence Long",
"gender": "female"
},
{
"id": 42,
"name": "Graciela Russell",
"gender": "female"
}
]
"#);
}
#[actix_rt::test]
async fn get_document_geosorted() {
let index = shared_index_with_geo_documents().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
}
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
}
},
{
"id": 3,
"name": "Crêpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
]
"#);
}
#[actix_rt::test]
async fn get_document_sort_the_unsortable() {
let index = shared_index_with_test_set().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "name"]),
sort: Some(vec!["name:asc"]),
..Default::default()
})
.await;
snapshot!(json_string!(response), @r#"
{
"message": "Attribute `name` is not sortable. This index does not have configured sortable attributes.",
"code": "invalid_document_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_sort"
}
"#);
}
#[actix_rt::test]
async fn error_get_unexisting_index_all_documents() {
let index = shared_does_not_exists_index().await;
@ -334,7 +639,7 @@ async fn get_document_s_nested_attributes_to_retrieve() {
async fn get_documents_displayed_attributes_is_ignored() {
let server = Server::new_shared();
let index = server.unique_index();
index.load_test_set(server).await;
index.load_test_set().await;
index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;

View File

@ -2366,7 +2366,7 @@ async fn generate_and_import_dump_containing_vectors() {
))
.await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await;
let response = index.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = index
.add_documents(
@ -2381,12 +2381,12 @@ async fn generate_and_import_dump_containing_vectors() {
)
.await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await;
let response = index.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = server.create_dump().await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await;
let response = index.wait_task(response.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// ========= We made a dump, now we should clear the DB and try to import our dump

View File

@ -161,9 +161,9 @@ async fn test_create_multiple_indexes() {
let (task2, _) = index2.create(None).await;
let (task3, _) = index3.create(None).await;
server.wait_task(task1.uid()).await.succeeded();
server.wait_task(task2.uid()).await.succeeded();
server.wait_task(task3.uid()).await.succeeded();
index1.wait_task(task1.uid()).await.succeeded();
index2.wait_task(task2.uid()).await.succeeded();
index3.wait_task(task3.uid()).await.succeeded();
assert_eq!(index1.get().await.1, 200);
assert_eq!(index2.get().await.1, 200);

View File

@ -26,7 +26,7 @@ async fn create_and_delete_index() {
async fn error_delete_unexisting_index() {
let server = Server::new_shared();
let index = shared_does_not_exists_index().await;
let (task, code) = index.delete_index_fail(server).await;
let (task, code) = index.delete_index_fail().await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.failed();

View File

@ -60,8 +60,8 @@ async fn list_multiple_indexes() {
let index_with_key = server.unique_index();
let (response_with_key, _status_code) = index_with_key.create(Some("key")).await;
server.wait_task(response_without_key.uid()).await.succeeded();
server.wait_task(response_with_key.uid()).await.succeeded();
index_without_key.wait_task(response_without_key.uid()).await.succeeded();
index_with_key.wait_task(response_with_key.uid()).await.succeeded();
let (response, code) = server.list_indexes(None, Some(1000)).await;
assert_eq!(code, 200);
@ -81,9 +81,8 @@ async fn get_and_paginate_indexes() {
let server = Server::new().await;
const NB_INDEXES: usize = 50;
for i in 0..NB_INDEXES {
let (task, code) = server.index(format!("test_{i:02}")).create(None).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await;
server.index(format!("test_{i:02}")).create(None).await;
server.index(format!("test_{i:02}")).wait_task(i as u64).await;
}
// basic

View File

@ -1,5 +1,4 @@
use crate::common::{shared_does_not_exists_index, Server};
use crate::json;
#[actix_rt::test]

View File

@ -72,7 +72,7 @@ async fn error_update_existing_primary_key() {
let server = Server::new_shared();
let index = shared_index_with_documents().await;
let (update_task, code) = index.update_index_fail(Some("primary"), server).await;
let (update_task, code) = index.update_index_fail(Some("primary")).await;
assert_eq!(code, 202);
let response = server.wait_task(update_task.uid()).await.failed();
@ -91,7 +91,7 @@ async fn error_update_existing_primary_key() {
async fn error_update_unexisting_index() {
let server = Server::new_shared();
let index = shared_does_not_exists_index().await;
let (task, code) = index.update_index_fail(Some("my-primary-key"), server).await;
let (task, code) = index.update_index_fail(Some("my-primary-key")).await;
assert_eq!(code, 202);

View File

@ -304,7 +304,7 @@ async fn search_bad_filter() {
let server = Server::new_shared();
let index = server.unique_index();
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
// index does not exists error.
// index does not exist error.
let (response, _code) = index.create(None).await;
server.wait_task(response.uid()).await.succeeded();
@ -1263,7 +1263,7 @@ async fn search_with_contains_without_enabling_the_feature() {
let server = Server::new_shared();
let index = server.unique_index();
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
// index does not exists error.
// index does not exist error.
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();

View File

@ -4,8 +4,8 @@ use tempfile::TempDir;
use super::test_settings_documents_indexing_swapping_and_search;
use crate::common::{
default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server,
DOCUMENTS, NESTED_DOCUMENTS,
default_settings, shared_index_for_fragments, shared_index_with_documents,
shared_index_with_nested_documents, Server, DOCUMENTS, NESTED_DOCUMENTS,
};
use crate::json;
@ -731,3 +731,457 @@ async fn test_filterable_attributes_priority() {
)
.await;
}
#[actix_rt::test]
async fn vector_filter_all_embedders() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "kefir"
},
{
"name": "echo"
},
{
"name": "intel"
},
{
"name": "dustin"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 4
}
"#);
}
#[actix_rt::test]
async fn vector_filter_missing_fragment() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.\n15:24 _vectors.rest.fragments EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
}
#[actix_rt::test]
async fn vector_filter_non_existant_embedder() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.other EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
}
#[actix_rt::test]
async fn vector_filter_all_embedders_user_provided() {
let index = shared_index_for_fragments().await;
// This one is counterintuitive, but it is the same as the previous one.
// It's because userProvided is interpreted as an embedder name
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.userProvided EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
}
#[actix_rt::test]
async fn vector_filter_specific_embedder() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "kefir"
},
{
"name": "echo"
},
{
"name": "intel"
},
{
"name": "dustin"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 4
}
"#);
}
#[actix_rt::test]
async fn vector_filter_user_provided() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.userProvided EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "echo"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"#);
}
#[actix_rt::test]
async fn vector_filter_specific_fragment() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.withBreed EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "intel"
},
{
"name": "dustin"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
"#);
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.basic EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "kefir"
},
{
"name": "echo"
},
{
"name": "intel"
},
{
"name": "dustin"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 4
}
"#);
}
#[actix_rt::test]
async fn vector_filter_non_existant_fragment() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.other EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: The fragment `other` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`.\n25:30 _vectors.rest.fragments.other EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
}
#[actix_rt::test]
async fn vector_filter_specific_fragment_user_provided() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.other.userProvided EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: Vector filter cannot have both `other` and `userProvided`.\n31:43 _vectors.rest.fragments.other.userProvided EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
}
#[actix_rt::test]
async fn vector_filter_document_template_but_fragments_used() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.documentTemplate EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0
}
"#);
}
#[actix_rt::test]
async fn vector_filter_document_template() {
let (_mock, setting) = crate::vector::create_mock().await;
let server = crate::vector::get_server_vector().await;
let index = server.index("doggo");
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
snapshot!(code, @"200 OK");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": setting,
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel"},
{"id": 3, "name": "iko" }
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.documentTemplate EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "kefir"
},
{
"name": "intel"
},
{
"name": "iko"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"#);
}
#[actix_rt::test]
async fn vector_filter_feature_gate() {
let index = shared_index_with_documents().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"message": "using a vector filter requires enabling the `multimodal` experimental feature. See https://github.com/orgs/meilisearch/discussions/846\n1:9 _vectors EXISTS",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"#);
}
#[actix_rt::test]
async fn vector_filter_negation() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.userProvided NOT EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "kefir"
},
{
"name": "intel"
},
{
"name": "dustin"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"#);
}
#[actix_rt::test]
async fn vector_filter_or_combination() {
let index = shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.withBreed EXISTS OR _vectors.rest.userProvided EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "echo"
},
{
"name": "intel"
},
{
"name": "dustin"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"#);
}
#[actix_rt::test]
async fn vector_filter_regenerate() {
let index = shared_index_for_fragments().await;
for selector in ["_vectors.rest.regenerate", "_vectors.rest.fragments.basic.regenerate"] {
let (value, _code) = index
.search_post(json!({
"filter": format!("{selector} EXISTS"),
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "kefir"
},
{
"name": "intel"
},
{
"name": "dustin"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"#);
}
}

View File

@ -158,11 +158,11 @@ async fn remote_sharding() {
let index1 = ms1.index("test");
let index2 = ms2.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index2.add_documents(json!(documents[3..5]), None).await;
ms2.wait_task(task.uid()).await.succeeded();
index2.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -454,9 +454,9 @@ async fn error_unregistered_remote() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -572,9 +572,9 @@ async fn error_no_weighted_score() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -705,9 +705,9 @@ async fn error_bad_response() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -842,9 +842,9 @@ async fn error_bad_request() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -972,10 +972,10 @@ async fn error_bad_request_facets_by_index() {
let index0 = ms0.index("test0");
let index1 = ms1.index("test1");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -1113,13 +1113,13 @@ async fn error_bad_request_facets_by_index_facet() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index0.update_settings_filterable_attributes(json!(["id"])).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -1224,7 +1224,6 @@ async fn error_bad_request_facets_by_index_facet() {
}
#[actix_rt::test]
#[ignore]
async fn error_remote_does_not_answer() {
let ms0 = Server::new().await;
let ms1 = Server::new().await;
@ -1263,9 +1262,9 @@ async fn error_remote_does_not_answer() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -1464,9 +1463,9 @@ async fn error_remote_404() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -1659,9 +1658,9 @@ async fn error_remote_sharding_auth() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
ms1.clear_api_key();
@ -1819,9 +1818,9 @@ async fn remote_sharding_auth() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
ms1.clear_api_key();
@ -1974,9 +1973,9 @@ async fn error_remote_500() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -2153,9 +2152,9 @@ async fn error_remote_500_once() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -2336,9 +2335,9 @@ async fn error_remote_timeout() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
ms0.wait_task(task.uid()).await.succeeded();
index0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
ms1.wait_task(task.uid()).await.succeeded();
index1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@ -2500,7 +2499,7 @@ pub struct LocalMeiliParams {
/// A server that exploits [`MockServer`] to provide an URL for testing network and the network.
pub struct LocalMeili {
mock_server: MockServer,
mock_server: &'static MockServer,
}
impl LocalMeili {
@ -2509,7 +2508,7 @@ impl LocalMeili {
}
pub async fn with_params(server: Arc<Server>, params: LocalMeiliParams) -> Self {
let mock_server = MockServer::start().await;
let mock_server = Box::leak(Box::new(MockServer::start().await));
// tokio won't let us execute asynchronous code from a sync function inside of an async test,
// so instead we spawn another thread that will call the service on a brand new tokio runtime
@ -2573,7 +2572,7 @@ impl LocalMeili {
response.set_body_json(value)
}
})
.mount(&mock_server)
.mount(mock_server)
.await;
Self { mock_server }
}

View File

@ -1,6 +1,7 @@
use super::shared_index_with_documents;
use crate::common::Server;
use crate::json;
use meili_snap::{json_string, snapshot};
#[actix_rt::test]
async fn default_search_should_return_estimated_total_hit() {
@ -133,3 +134,61 @@ async fn ensure_placeholder_search_hit_count_valid() {
.await;
}
}
#[actix_rt::test]
async fn test_issue_5274() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = json!([
{
"id": 1,
"title": "Document 1",
"content": "This is the first."
},
{
"id": 2,
"title": "Document 2",
"content": "This is the second doc."
}
]);
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
// Find out the lowest ranking score among the documents
let (rep, _status) = index
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 2, "showRankingScore": true}))
.await;
let hits = rep["hits"].as_array().expect("Missing hits array");
let second_hit = hits.get(1).expect("Missing second hit");
let ranking_score = second_hit
.get("_rankingScore")
.expect("Missing _rankingScore field")
.as_f64()
.expect("Expected _rankingScore to be a f64");
// Search with a ranking score threshold just above and expect to be a single hit
let (rep, _status) = index
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 1, "rankingScoreThreshold": ranking_score + 0.0001}))
.await;
snapshot!(json_string!(rep, {
".processingTimeMs" => "[ignored]",
}), @r#"
{
"hits": [
{
"id": 2,
"title": "Document 2",
"content": "This is the second doc."
}
],
"query": "doc",
"processingTimeMs": "[ignored]",
"hitsPerPage": 1,
"page": 1,
"totalPages": 1,
"totalHits": 1
}
"#);
}

View File

@ -692,3 +692,68 @@ async fn granular_filterable_attributes() {
]
"###);
}
#[actix_rt::test]
async fn test_searchable_attributes_order() {
let server = Server::new_shared();
let index = server.unique_index();
// 1) Create an index with settings "searchableAttributes": ["title", "overview"]
let (response, code) = index.create(None).await;
assert_eq!(code, 202, "{response}");
server.wait_task(response.uid()).await.succeeded();
let (task, code) = index
.update_settings(json!({
"searchableAttributes": ["title", "overview"]
}))
.await;
assert_eq!(code, 202, "{task}");
server.wait_task(task.uid()).await.succeeded();
// 2) Add documents in the index
let documents = json!([
{
"id": 1,
"title": "The Matrix",
"overview": "A computer hacker learns from mysterious rebels about the true nature of his reality."
},
{
"id": 2,
"title": "Inception",
"overview": "A thief who steals corporate secrets through dream-sharing technology."
}
]);
let (response, code) = index.add_documents(documents, None).await;
assert_eq!(code, 202, "{response}");
server.wait_task(response.uid()).await.succeeded();
// 3) Modify the settings "searchableAttributes": ["overview", "title"] (overview is put first)
let (task, code) = index
.update_settings(json!({
"searchableAttributes": ["overview", "title"]
}))
.await;
assert_eq!(code, 202, "{task}");
server.wait_task(task.uid()).await.succeeded();
// 4) Check if it has been applied
let (response, code) = index.settings().await;
assert_eq!(code, 200, "{response}");
assert_eq!(response["searchableAttributes"], json!(["overview", "title"]));
// 5) Re-modify the settings "searchableAttributes": ["title", "overview"] (title is put first)
let (task, code) = index
.update_settings(json!({
"searchableAttributes": ["title", "overview"]
}))
.await;
assert_eq!(code, 202, "{task}");
server.wait_task(task.uid()).await.succeeded();
// 6) Check if it has been applied
let (response, code) = index.settings().await;
assert_eq!(code, 200, "{response}");
assert_eq!(response["searchableAttributes"], json!(["title", "overview"]));
}

View File

@ -298,7 +298,7 @@ async fn similar_bad_filter() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let (response, code) =
index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
@ -335,7 +335,7 @@ async fn filter_invalid_syntax_object() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
index
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
@ -373,7 +373,7 @@ async fn filter_invalid_syntax_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
index
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
@ -411,7 +411,7 @@ async fn filter_invalid_syntax_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
@ -451,7 +451,7 @@ async fn filter_invalid_attribute_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
index
.similar(
@ -492,7 +492,7 @@ async fn filter_invalid_attribute_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
index
.similar(
@ -533,7 +533,7 @@ async fn filter_reserved_geo_attribute_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
@ -573,7 +573,7 @@ async fn filter_reserved_geo_attribute_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
@ -613,7 +613,7 @@ async fn filter_reserved_attribute_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
@ -653,7 +653,7 @@ async fn filter_reserved_attribute_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
@ -693,7 +693,7 @@ async fn filter_reserved_geo_point_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
@ -733,7 +733,7 @@ async fn filter_reserved_geo_point_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
@ -825,7 +825,7 @@ async fn similar_bad_embedder() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await;
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Cannot find embedder with name `auto`.",

View File

@ -51,12 +51,12 @@ async fn perform_snapshot() {
}))
.await;
index.load_test_set(&server).await;
index.load_test_set().await;
let (task, code) = server.index("test1").create(Some("prim")).await;
meili_snap::snapshot!(code, @"202 Accepted");
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
// wait for the _next task_ to process, aka the snapshot that should be enqueued at some point
@ -128,13 +128,13 @@ async fn perform_on_demand_snapshot() {
}))
.await;
index.load_test_set(&server).await;
index.load_test_set().await;
let (task, _status_code) = server.index("doggo").create(Some("bone")).await;
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = server.index("doggo").create(Some("bone")).await;
server.wait_task(task.uid()).await.failed();
index.wait_task(task.uid()).await.failed();
let (task, code) = server.create_snapshot().await;
snapshot!(code, @"202 Accepted");
@ -147,7 +147,7 @@ async fn perform_on_demand_snapshot() {
"enqueuedAt": "[date]"
}
"###);
let task = server.wait_task(task.uid()).await;
let task = index.wait_task(task.uid()).await;
snapshot!(json_string!(task, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 4,

View File

@ -32,7 +32,7 @@ async fn stats() {
let (task, code) = index.create(Some("id")).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.succeeded();
index.wait_task(task.uid()).await.succeeded();
let (response, code) = server.stats().await;
@ -58,7 +58,7 @@ async fn stats() {
assert_eq!(code, 202, "{response}");
assert_eq!(response["taskUid"], 1);
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let timestamp = OffsetDateTime::now_utc();
let (response, code) = server.stats().await;
@ -107,7 +107,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@ -135,7 +135,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@ -163,7 +163,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@ -192,7 +192,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@ -245,7 +245,7 @@ async fn add_remove_embedded_documents() {
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@ -269,7 +269,7 @@ async fn add_remove_embedded_documents() {
// delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
let (response, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@ -305,7 +305,7 @@ async fn update_embedder_settings() {
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {

View File

@ -61,7 +61,16 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"pagination": {
"maxTotalHits": 15
},
"embedders": {},
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"pooling": "forceMean",
"documentTemplate": "{{doc.description}}",
"documentTemplateMaxBytes": 400
}
},
"searchCutoffMs": 8000,
"localizedAttributes": [
{

View File

@ -0,0 +1,40 @@
---
source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
---
[
{
"id": 1,
"name": "kefir",
"surname": [
"kef",
"kefkef",
"kefirounet",
"boubou"
],
"age": 1.4,
"description": "kefir est un petit chien blanc très mignon",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 2,
"name": "intel",
"surname": [
"untel",
"tétel",
"iouiou"
],
"age": 11.5,
"description": "intel est un grand beagle très mignon",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": false
}
}
}
]

View File

@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@ -348,179 +497,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z",
"batchStrategy": "unspecified"
},
{
"uid": 10,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007391353S",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z",
"batchStrategy": "unspecified"
},
{
"uid": 9,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007445825S",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z",
"batchStrategy": "unspecified"
},
{
"uid": 8,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.012020083S",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z",
"batchStrategy": "unspecified"
},
{
"uid": 7,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007440092S",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z",
"batchStrategy": "unspecified"
},
{
"uid": 6,
"progress": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007565161S",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z",
"batchStrategy": "unspecified"
},
{
"uid": 5,
"progress": null,
"details": {
"stopWords": [
"le",
"un"
]
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.016307263S",
"startedAt": "2025-01-16T16:53:19.913351957Z",
"finishedAt": "2025-01-16T16:53:19.92965922Z",
"batchStrategy": "unspecified"
}
],
"total": 23,
"total": 29,
"limit": 20,
"from": 24,
"next": 4
"from": 30,
"next": 10
}

View File

@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@ -348,179 +497,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z",
"batchStrategy": "unspecified"
},
{
"uid": 10,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007391353S",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z",
"batchStrategy": "unspecified"
},
{
"uid": 9,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007445825S",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z",
"batchStrategy": "unspecified"
},
{
"uid": 8,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.012020083S",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z",
"batchStrategy": "unspecified"
},
{
"uid": 7,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007440092S",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z",
"batchStrategy": "unspecified"
},
{
"uid": 6,
"progress": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007565161S",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z",
"batchStrategy": "unspecified"
},
{
"uid": 5,
"progress": null,
"details": {
"stopWords": [
"le",
"un"
]
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.016307263S",
"startedAt": "2025-01-16T16:53:19.913351957Z",
"finishedAt": "2025-01-16T16:53:19.92965922Z",
"batchStrategy": "unspecified"
}
],
"total": 23,
"total": 29,
"limit": 20,
"from": 24,
"next": 4
"from": 30,
"next": 10
}

View File

@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@ -348,179 +497,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z",
"batchStrategy": "unspecified"
},
{
"uid": 10,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007391353S",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z",
"batchStrategy": "unspecified"
},
{
"uid": 9,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007445825S",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z",
"batchStrategy": "unspecified"
},
{
"uid": 8,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.012020083S",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z",
"batchStrategy": "unspecified"
},
{
"uid": 7,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007440092S",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z",
"batchStrategy": "unspecified"
},
{
"uid": 6,
"progress": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007565161S",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z",
"batchStrategy": "unspecified"
},
{
"uid": 5,
"progress": null,
"details": {
"stopWords": [
"le",
"un"
]
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.016307263S",
"startedAt": "2025-01-16T16:53:19.913351957Z",
"finishedAt": "2025-01-16T16:53:19.92965922Z",
"batchStrategy": "unspecified"
}
],
"total": 23,
"total": 29,
"limit": 20,
"from": 24,
"next": 4
"from": 30,
"next": 10
}

View File

@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@ -264,134 +376,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"enqueuedAt": "2025-01-16T17:02:52.527382964Z",
"startedAt": "2025-01-16T17:02:52.539749853Z",
"finishedAt": "2025-01-16T17:02:52.547390016Z"
},
{
"uid": 11,
"batchUid": 11,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchCutoffMs": 8000
},
"error": null,
"duration": "PT0.007307840S",
"enqueuedAt": "2025-01-16T17:01:14.100316617Z",
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z"
},
{
"uid": 10,
"batchUid": 10,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"error": null,
"duration": "PT0.007391353S",
"enqueuedAt": "2025-01-16T17:00:29.188815062Z",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z"
},
{
"uid": 9,
"batchUid": 9,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"error": null,
"duration": "PT0.007445825S",
"enqueuedAt": "2025-01-16T17:00:15.759501709Z",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z"
},
{
"uid": 8,
"batchUid": 8,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"error": null,
"duration": "PT0.012020083S",
"enqueuedAt": "2025-01-16T16:59:42.727292501Z",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z"
},
{
"uid": 7,
"batchUid": 7,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"error": null,
"duration": "PT0.007440092S",
"enqueuedAt": "2025-01-16T16:58:41.203145044Z",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z"
},
{
"uid": 6,
"batchUid": 6,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"error": null,
"duration": "PT0.007565161S",
"enqueuedAt": "2025-01-16T16:54:51.927866243Z",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z"
}
],
"total": 24,
"total": 30,
"limit": 20,
"from": 25,
"next": 5
"from": 31,
"next": 11
}

View File

@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@ -264,134 +376,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"enqueuedAt": "2025-01-16T17:02:52.527382964Z",
"startedAt": "2025-01-16T17:02:52.539749853Z",
"finishedAt": "2025-01-16T17:02:52.547390016Z"
},
{
"uid": 11,
"batchUid": 11,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchCutoffMs": 8000
},
"error": null,
"duration": "PT0.007307840S",
"enqueuedAt": "2025-01-16T17:01:14.100316617Z",
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z"
},
{
"uid": 10,
"batchUid": 10,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"error": null,
"duration": "PT0.007391353S",
"enqueuedAt": "2025-01-16T17:00:29.188815062Z",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z"
},
{
"uid": 9,
"batchUid": 9,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"error": null,
"duration": "PT0.007445825S",
"enqueuedAt": "2025-01-16T17:00:15.759501709Z",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z"
},
{
"uid": 8,
"batchUid": 8,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"error": null,
"duration": "PT0.012020083S",
"enqueuedAt": "2025-01-16T16:59:42.727292501Z",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z"
},
{
"uid": 7,
"batchUid": 7,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"error": null,
"duration": "PT0.007440092S",
"enqueuedAt": "2025-01-16T16:58:41.203145044Z",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z"
},
{
"uid": 6,
"batchUid": 6,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"error": null,
"duration": "PT0.007565161S",
"enqueuedAt": "2025-01-16T16:54:51.927866243Z",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z"
}
],
"total": 24,
"total": 30,
"limit": 20,
"from": 25,
"next": 5
"from": 31,
"next": 11
}

View File

@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@ -264,134 +376,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"enqueuedAt": "2025-01-16T17:02:52.527382964Z",
"startedAt": "2025-01-16T17:02:52.539749853Z",
"finishedAt": "2025-01-16T17:02:52.547390016Z"
},
{
"uid": 11,
"batchUid": 11,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchCutoffMs": 8000
},
"error": null,
"duration": "PT0.007307840S",
"enqueuedAt": "2025-01-16T17:01:14.100316617Z",
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z"
},
{
"uid": 10,
"batchUid": 10,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"error": null,
"duration": "PT0.007391353S",
"enqueuedAt": "2025-01-16T17:00:29.188815062Z",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z"
},
{
"uid": 9,
"batchUid": 9,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"error": null,
"duration": "PT0.007445825S",
"enqueuedAt": "2025-01-16T17:00:15.759501709Z",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z"
},
{
"uid": 8,
"batchUid": 8,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"error": null,
"duration": "PT0.012020083S",
"enqueuedAt": "2025-01-16T16:59:42.727292501Z",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z"
},
{
"uid": 7,
"batchUid": 7,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"error": null,
"duration": "PT0.007440092S",
"enqueuedAt": "2025-01-16T16:58:41.203145044Z",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z"
},
{
"uid": 6,
"batchUid": 6,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"error": null,
"duration": "PT0.007565161S",
"enqueuedAt": "2025-01-16T16:54:51.927866243Z",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z"
}
],
"total": 24,
"total": 30,
"limit": 20,
"from": 25,
"next": 5
"from": 31,
"next": 11
}

View File

@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@ -642,8 +791,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"batchStrategy": "unspecified"
}
],
"total": 25,
"total": 31,
"limit": 1000,
"from": 24,
"from": 30,
"next": null
}

View File

@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@ -497,8 +609,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "2025-01-16T16:45:16.131303739Z"
}
],
"total": 26,
"total": 32,
"limit": 1000,
"from": 25,
"from": 31,
"next": null
}

View File

@ -114,13 +114,13 @@ async fn check_the_index_scheduler(server: &Server) {
// All the indexes are still present
let (indexes, _) = server.list_indexes(None, None).await;
snapshot!(indexes, @r#"
snapshot!(indexes, @r###"
{
"results": [
{
"uid": "kefir",
"createdAt": "2025-01-16T16:45:16.020663157Z",
"updatedAt": "2025-01-23T11:36:22.634859166Z",
"updatedAt": "2025-07-07T13:43:08.835381Z",
"primaryKey": "id"
}
],
@ -128,7 +128,7 @@ async fn check_the_index_scheduler(server: &Server) {
"limit": 20,
"total": 1
}
"#);
"###);
// And their metadata are still right
let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, {
@ -141,21 +141,21 @@ async fn check_the_index_scheduler(server: &Server) {
{
"databaseSize": "[bytes]",
"usedDatabaseSize": "[bytes]",
"lastUpdate": "2025-01-23T11:36:22.634859166Z",
"lastUpdate": "2025-07-07T13:43:08.835381Z",
"indexes": {
"kefir": {
"numberOfDocuments": 1,
"numberOfDocuments": 2,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"age": 1,
"description": 1,
"id": 1,
"name": 1,
"surname": 1
"age": 2,
"description": 2,
"id": 2,
"name": 2,
"surname": 2
}
}
}
@ -227,21 +227,21 @@ async fn check_the_index_scheduler(server: &Server) {
{
"databaseSize": "[bytes]",
"usedDatabaseSize": "[bytes]",
"lastUpdate": "2025-01-23T11:36:22.634859166Z",
"lastUpdate": "2025-07-07T13:43:08.835381Z",
"indexes": {
"kefir": {
"numberOfDocuments": 1,
"numberOfDocuments": 2,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"age": 1,
"description": 1,
"id": 1,
"name": 1,
"surname": 1
"age": 2,
"description": 2,
"id": 2,
"name": 2,
"surname": 2
}
}
}
@ -254,18 +254,18 @@ async fn check_the_index_scheduler(server: &Server) {
".avgDocumentSize" => "[bytes]",
}), @r###"
{
"numberOfDocuments": 1,
"numberOfDocuments": 2,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"age": 1,
"description": 1,
"id": 1,
"name": 1,
"surname": 1
"age": 2,
"description": 2,
"id": 2,
"name": 2,
"surname": 2
}
}
"###);
@ -295,4 +295,8 @@ async fn check_the_index_features(server: &Server) {
let (results, _status) =
kefir.search_post(json!({ "sort": ["age:asc"], "filter": "surname = kefirounet" })).await;
snapshot!(results, name: "search_with_sort_and_filter");
// ensuring we can get the vectors and their `regenerate` is still good.
let (results, _status) = kefir.search_post(json!({"retrieveVectors": true})).await;
snapshot!(json_string!(results["hits"], {"[]._vectors.doggo_embedder.embeddings" => "[vector]"}), name: "search_with_retrieve_vectors");
}

View File

@ -88,7 +88,7 @@ async fn binary_quantize_before_sending_documents() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
// Make sure the documents are binary quantized
let (documents, _code) = index
@ -161,7 +161,7 @@ async fn binary_quantize_after_sending_documents() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
@ -305,7 +305,7 @@ async fn binary_quantize_clear_documents() {
server.wait_task(response.uid()).await.succeeded();
let (value, _code) = index.clear_all_documents().await;
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
// Make sure the documents DB has been cleared
let (documents, _code) = index

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
mod binary_quantized;
mod fragments;
#[cfg(feature = "test-ollama")]
mod ollama;
mod openai;
@ -13,8 +14,9 @@ use meilisearch::option::MaxThreads;
use crate::common::index::Index;
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
use crate::json;
pub use rest::create_mock;
async fn get_server_vector() -> Server {
pub async fn get_server_vector() -> Server {
Server::new().await
}
@ -42,7 +44,7 @@ async fn add_remove_user_provided() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
@ -95,7 +97,7 @@ async fn add_remove_user_provided() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
@ -138,7 +140,7 @@ async fn add_remove_user_provided() {
let (value, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
@ -187,7 +189,7 @@ async fn user_provide_mismatched_embedding_dimension() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -218,7 +220,7 @@ async fn user_provide_mismatched_embedding_dimension() {
]);
let (response, code) = index.add_documents(new_document, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
let task = index.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -270,7 +272,7 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
index
}
@ -285,7 +287,7 @@ async fn user_provided_embeddings_error() {
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -315,7 +317,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -346,7 +348,7 @@ async fn user_provided_embeddings_error() {
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -375,7 +377,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true, "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -404,7 +406,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true], "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -433,7 +435,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]], "regenerate": false }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -462,20 +464,20 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -504,7 +506,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -533,7 +535,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -574,7 +576,7 @@ async fn user_provided_vectors_error() {
let documents = json!([{"id": 40, "name": "kefir"}, {"id": 41, "name": "intel"}, {"id": 42, "name": "max"}, {"id": 43, "name": "venus"}, {"id": 44, "name": "eva"}]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -604,7 +606,7 @@ async fn user_provided_vectors_error() {
let documents = json!({"id": 42, "name": "kefir", "_vector": { "manaul": [0, 0, 0] }});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -634,7 +636,7 @@ async fn user_provided_vectors_error() {
let documents = json!({"id": 42, "name": "kefir", "_vectors": { "manaul": [0, 0, 0] }});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -667,7 +669,7 @@ async fn clear_documents() {
let index = generate_default_user_provided_documents(&server).await;
let (value, _code) = index.clear_all_documents().await;
server.wait_task(value.uid()).await.succeeded();
index.wait_task(value.uid()).await.succeeded();
// Make sure the documents DB has been cleared
let (documents, _code) = index
@ -723,7 +725,7 @@ async fn add_remove_one_vector_4588() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-added");
let documents = json!([
@ -731,7 +733,7 @@ async fn add_remove_one_vector_4588() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-deleted");
let (documents, _code) = index

View File

@ -117,7 +117,7 @@ async fn test_both_apis() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",

View File

@ -136,7 +136,7 @@ fn long_text() -> &'static str {
})
}
async fn create_mock_tokenized() -> (MockServer, Value) {
async fn create_mock_tokenized() -> (&'static MockServer, Value) {
create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false, false).await
}
@ -145,8 +145,8 @@ async fn create_mock_with_template(
model_dimensions: ModelDimensions,
fallible: bool,
slow: bool,
) -> (MockServer, Value) {
let mock_server = MockServer::start().await;
) -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
const API_KEY: &str = "my-api-key";
const API_KEY_BEARER: &str = "Bearer my-api-key";
@ -299,7 +299,7 @@ async fn create_mock_with_template(
}
}))
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -321,27 +321,27 @@ const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée
Un chien nommé {{doc.name}}, né en {{doc.birthyear}}
{%- endif %}, de race {{doc.breed}}."#;
async fn create_mock() -> (MockServer, Value) {
async fn create_mock() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false, false).await
}
async fn create_mock_dimensions() -> (MockServer, Value) {
async fn create_mock_dimensions() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false, false).await
}
async fn create_mock_small_embedding_model() -> (MockServer, Value) {
async fn create_mock_small_embedding_model() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false, false).await
}
async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
async fn create_mock_legacy_embedding_model() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false, false).await
}
async fn create_fallible_mock() -> (MockServer, Value) {
async fn create_fallible_mock() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, false).await
}
async fn create_slow_mock() -> (MockServer, Value) {
async fn create_slow_mock() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, true).await
}
@ -370,7 +370,7 @@ async fn it_works() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -601,7 +601,7 @@ async fn tokenize_long_text() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -657,7 +657,7 @@ async fn bad_api_key() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
@ -805,7 +805,7 @@ async fn bad_model() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
@ -883,7 +883,7 @@ async fn bad_dimensions() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
@ -992,7 +992,7 @@ async fn smaller_dimensions() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -1224,7 +1224,7 @@ async fn small_embedding_model() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -1455,7 +1455,7 @@ async fn legacy_embedding_model() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -1687,7 +1687,7 @@ async fn it_still_works() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -1916,7 +1916,7 @@ async fn timeout() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",

View File

@ -12,8 +12,8 @@ use crate::common::Value;
use crate::json;
use crate::vector::{get_server_vector, GetAllDocumentsOptions};
async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
pub async fn create_mock() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@ -32,7 +32,7 @@ async fn create_mock() -> (MockServer, Value) {
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -50,8 +50,8 @@ async fn create_mock() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_mock_default_template() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_default_template() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@ -73,7 +73,7 @@ async fn create_mock_default_template() -> (MockServer, Value) {
.set_body_json(json!({"error": "text not found", "text": text})),
}
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -106,8 +106,8 @@ struct SingleResponse {
embedding: Vec<f32>,
}
async fn create_mock_multiple() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_multiple() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@ -146,7 +146,7 @@ async fn create_mock_multiple() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(response)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -176,8 +176,8 @@ struct SingleRequest {
input: String,
}
async fn create_mock_single_response_in_array() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_single_response_in_array() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@ -212,7 +212,7 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(response)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -236,8 +236,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_raw_with_custom_header() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@ -277,7 +277,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(output)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -293,8 +293,8 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_mock_raw() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_raw() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@ -321,7 +321,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(output)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -337,8 +337,8 @@ async fn create_mock_raw() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let count = AtomicUsize::new(0);
Mock::given(method("POST"))
@ -355,7 +355,7 @@ async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (MockServer, Value)
ResponseTemplate::new(500).set_body_string("Service Unavailable")
}
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@ -1099,7 +1099,7 @@ async fn add_vector_and_user_provided() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -1616,7 +1616,7 @@ async fn server_returns_multiple() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -1722,7 +1722,7 @@ async fn server_single_input_returns_in_array() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@ -1828,7 +1828,7 @@ async fn server_raw() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(value.uid()).await;
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",

View File

@ -101,14 +101,7 @@ async fn reset_embedder_documents() {
server.wait_task(response.uid()).await;
// Make sure the documents are still present
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: None,
offset: None,
retrieve_vectors: false,
fields: None,
})
.await;
let (documents, _code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(json_string!(documents), @r###"
{
"results": [

View File

@ -168,6 +168,16 @@ pub enum SortError {
ReservedNameForFilter { name: String },
}
impl SortError {
pub fn into_search_error(self) -> Error {
Error::UserError(UserError::SortError { error: self, search: true })
}
pub fn into_document_error(self) -> Error {
Error::UserError(UserError::SortError { error: self, search: false })
}
}
impl From<AscDescError> for SortError {
fn from(error: AscDescError) -> Self {
match error {
@ -190,12 +200,6 @@ impl From<AscDescError> for SortError {
}
}
impl From<SortError> for Error {
fn from(error: SortError) -> Self {
Self::UserError(UserError::SortError(error))
}
}
#[cfg(test)]
mod tests {
use big_s::S;

View File

@ -0,0 +1,294 @@
use crate::{
distance_between_two_points,
heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec},
lat_lng_to_xyz,
search::new::{facet_string_values, facet_values_prefix_key},
GeoPoint, Index,
};
use heed::{
types::{Bytes, Unit},
RoPrefix, RoTxn,
};
use roaring::RoaringBitmap;
use rstar::RTree;
use std::collections::VecDeque;
#[derive(Debug, Clone, Copy)]
pub struct GeoSortParameter {
// Define the strategy used by the geo sort
pub strategy: GeoSortStrategy,
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
pub max_bucket_size: u64,
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
pub distance_error_margin: f64,
}
impl Default for GeoSortParameter {
fn default() -> Self {
Self {
strategy: GeoSortStrategy::default(),
max_bucket_size: 1000,
distance_error_margin: 1.0,
}
}
}
/// Define the strategy used by the geo sort.
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
/// the point where we move from using the iterative strategy to the rtree.
#[derive(Debug, Clone, Copy)]
pub enum GeoSortStrategy {
AlwaysIterative(usize),
AlwaysRtree(usize),
Dynamic(usize),
}
impl Default for GeoSortStrategy {
fn default() -> Self {
GeoSortStrategy::Dynamic(1000)
}
}
impl GeoSortStrategy {
pub fn use_rtree(&self, candidates: usize) -> bool {
match self {
GeoSortStrategy::AlwaysIterative(_) => false,
GeoSortStrategy::AlwaysRtree(_) => true,
GeoSortStrategy::Dynamic(i) => candidates >= *i,
}
}
pub fn cache_size(&self) -> usize {
match self {
GeoSortStrategy::AlwaysIterative(i)
| GeoSortStrategy::AlwaysRtree(i)
| GeoSortStrategy::Dynamic(i) => *i,
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn fill_cache(
index: &Index,
txn: &RoTxn<heed::AnyTls>,
strategy: GeoSortStrategy,
ascending: bool,
target_point: [f64; 2],
field_ids: &Option<[u16; 2]>,
rtree: &mut Option<RTree<GeoPoint>>,
geo_candidates: &RoaringBitmap,
cached_sorted_docids: &mut VecDeque<(u32, [f64; 2])>,
) -> crate::Result<()> {
debug_assert!(cached_sorted_docids.is_empty());
// lazily initialize the rtree if needed by the strategy, and cache it in `self.rtree`
let rtree = if strategy.use_rtree(geo_candidates.len() as usize) {
if let Some(rtree) = rtree.as_ref() {
// get rtree from cache
Some(rtree)
} else {
let rtree2 = index.geo_rtree(txn)?.expect("geo candidates but no rtree");
// insert rtree in cache and returns it.
// Can't use `get_or_insert_with` because getting the rtree from the DB is a fallible operation.
Some(&*rtree.insert(rtree2))
}
} else {
None
};
let cache_size = strategy.cache_size();
if let Some(rtree) = rtree {
if ascending {
let point = lat_lng_to_xyz(&target_point);
for point in rtree.nearest_neighbor_iter(&point) {
if geo_candidates.contains(point.data.0) {
cached_sorted_docids.push_back(point.data);
if cached_sorted_docids.len() >= cache_size {
break;
}
}
}
} else {
// in the case of the desc geo sort we look for the closest point to the opposite of the queried point
// and we insert the points in reverse order they get reversed when emptying the cache later on
let point = lat_lng_to_xyz(&opposite_of(target_point));
for point in rtree.nearest_neighbor_iter(&point) {
if geo_candidates.contains(point.data.0) {
cached_sorted_docids.push_front(point.data);
if cached_sorted_docids.len() >= cache_size {
break;
}
}
}
}
} else {
// the iterative version
let [lat, lng] = field_ids.expect("fill_buffer can't be called without the lat&lng");
let mut documents = geo_candidates
.iter()
.map(|id| -> crate::Result<_> { Ok((id, geo_value(id, lat, lng, index, txn)?)) })
.collect::<crate::Result<Vec<(u32, [f64; 2])>>>()?;
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&target_point, p) as usize);
cached_sorted_docids.extend(documents);
};
Ok(())
}
#[allow(clippy::too_many_arguments)]
pub fn next_bucket(
index: &Index,
txn: &RoTxn<heed::AnyTls>,
universe: &RoaringBitmap,
ascending: bool,
target_point: [f64; 2],
field_ids: &Option<[u16; 2]>,
rtree: &mut Option<RTree<GeoPoint>>,
cached_sorted_docids: &mut VecDeque<(u32, [f64; 2])>,
geo_candidates: &RoaringBitmap,
parameter: GeoSortParameter,
) -> crate::Result<Option<(RoaringBitmap, Option<[f64; 2]>)>> {
let mut geo_candidates = geo_candidates & universe;
if geo_candidates.is_empty() {
return Ok(Some((universe.clone(), None)));
}
let next = |cache: &mut VecDeque<_>| {
if ascending {
cache.pop_front()
} else {
cache.pop_back()
}
};
let put_back = |cache: &mut VecDeque<_>, x: _| {
if ascending {
cache.push_front(x)
} else {
cache.push_back(x)
}
};
let mut current_bucket = RoaringBitmap::new();
// current_distance stores the first point and distance in current bucket
let mut current_distance: Option<([f64; 2], f64)> = None;
loop {
// The loop will only exit when we have found all points with equal distance or have exhausted the candidates.
if let Some((id, point)) = next(cached_sorted_docids) {
if geo_candidates.contains(id) {
let distance = distance_between_two_points(&target_point, &point);
if let Some((point0, bucket_distance)) = current_distance.as_ref() {
if (bucket_distance - distance).abs() > parameter.distance_error_margin {
// different distance, point belongs to next bucket
put_back(cached_sorted_docids, (id, point));
return Ok(Some((current_bucket, Some(point0.to_owned()))));
} else {
// same distance, point belongs to current bucket
current_bucket.insert(id);
// remove from candidates to prevent it from being added to the cache again
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == parameter.max_bucket_size {
return Ok(Some((current_bucket, Some(point0.to_owned()))));
}
}
} else {
// first doc in current bucket
current_distance = Some((point, distance));
current_bucket.insert(id);
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == parameter.max_bucket_size {
return Ok(Some((current_bucket, Some(point.to_owned()))));
}
}
}
} else {
// cache exhausted, we need to refill it
fill_cache(
index,
txn,
parameter.strategy,
ascending,
target_point,
field_ids,
rtree,
&geo_candidates,
cached_sorted_docids,
)?;
if cached_sorted_docids.is_empty() {
// candidates exhausted, exit
if let Some((point0, _)) = current_distance.as_ref() {
return Ok(Some((current_bucket, Some(point0.to_owned()))));
} else {
return Ok(Some((universe.clone(), None)));
}
}
}
}
}
/// Return an iterator over each number value in the given field of the given document.
fn facet_number_values<'a>(
docid: u32,
field_id: u16,
index: &Index,
txn: &'a RoTxn<'a>,
) -> crate::Result<RoPrefix<'a, FieldDocIdFacetCodec<OrderedF64Codec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid);
let iter = index
.field_id_docid_facet_f64s
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_key_type();
Ok(iter)
}
/// Extracts the lat and long values from a single document.
///
/// If it is not able to find it in the facet number index it will extract it
/// from the facet string index and parse it as f64 (as the geo extraction behaves).
pub(crate) fn geo_value(
docid: u32,
field_lat: u16,
field_lng: u16,
index: &Index,
rtxn: &RoTxn<'_>,
) -> crate::Result<[f64; 2]> {
let extract_geo = |geo_field: u16| -> crate::Result<f64> {
match facet_number_values(docid, geo_field, index, rtxn)?.next() {
Some(Ok(((_, _, geo), ()))) => Ok(geo),
Some(Err(e)) => Err(e.into()),
None => match facet_string_values(docid, geo_field, index, rtxn)?.next() {
Some(Ok((_, geo))) => {
Ok(geo.parse::<f64>().expect("cannot parse geo field as f64"))
}
Some(Err(e)) => Err(e.into()),
None => panic!("A geo faceted document doesn't contain any lat or lng"),
},
}
};
let lat = extract_geo(field_lat)?;
let lng = extract_geo(field_lng)?;
Ok([lat, lng])
}
/// Compute the antipodal coordinate of `coord`
pub(crate) fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] {
coord[0] *= -1.;
// in the case of x,0 we want to return x,180
if coord[1] > 0. {
coord[1] -= 180.;
} else {
coord[1] += 180.;
}
coord
}

View File

@ -1,8 +1,10 @@
mod builder;
mod enriched;
pub mod geo_sort;
mod primary_key;
mod reader;
mod serde_impl;
pub mod sort;
use std::fmt::Debug;
use std::io;
@ -19,6 +21,7 @@ pub use primary_key::{
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
use serde::{Deserialize, Serialize};
pub use self::geo_sort::{GeoSortParameter, GeoSortStrategy};
use crate::error::{FieldIdMapMissingEntry, InternalError};
use crate::{FieldId, Object, Result};

View File

@ -0,0 +1,444 @@
use std::collections::{BTreeSet, VecDeque};
use crate::{
constants::RESERVED_GEO_FIELD_NAME,
documents::{geo_sort::next_bucket, GeoSortParameter},
heed_codec::{
facet::{FacetGroupKeyCodec, FacetGroupValueCodec},
BytesRefCodec,
},
is_faceted,
search::facet::{ascending_facet_sort, descending_facet_sort},
AscDesc, DocumentId, Member, UserError,
};
use heed::Database;
use roaring::RoaringBitmap;
#[derive(Debug, Clone, Copy)]
enum AscDescId {
Facet { field_id: u16, ascending: bool },
Geo { field_ids: [u16; 2], target_point: [f64; 2], ascending: bool },
}
/// A [`SortedDocumentsIterator`] allows efficient access to a continuous range of sorted documents.
/// This is ideal in the context of paginated queries in which only a small number of documents are needed at a time.
/// Search operations will only be performed upon access.
pub enum SortedDocumentsIterator<'ctx> {
Leaf {
/// The exact number of documents remaining
size: usize,
values: Box<dyn Iterator<Item = DocumentId> + 'ctx>,
},
Branch {
/// The current child, got from the children iterator
current_child: Option<Box<SortedDocumentsIterator<'ctx>>>,
/// The exact number of documents remaining, excluding documents in the current child
next_children_size: usize,
/// Iterators to become the current child once it is exhausted
next_children:
Box<dyn Iterator<Item = crate::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx>,
},
}
impl SortedDocumentsIterator<'_> {
/// Takes care of updating the current child if it is `None`, and also updates the size
fn update_current<'ctx>(
current_child: &mut Option<Box<SortedDocumentsIterator<'ctx>>>,
next_children_size: &mut usize,
next_children: &mut Box<
dyn Iterator<Item = crate::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx,
>,
) -> crate::Result<()> {
if current_child.is_none() {
*current_child = match next_children.next() {
Some(Ok(builder)) => {
let next_child = Box::new(builder.build()?);
*next_children_size -= next_child.size_hint().0;
Some(next_child)
}
Some(Err(e)) => return Err(e),
None => return Ok(()),
};
}
Ok(())
}
}
impl Iterator for SortedDocumentsIterator<'_> {
type Item = crate::Result<DocumentId>;
/// Implementing the `nth` method allows for efficient access to the nth document in the sorted order.
/// It's used by `skip` internally.
/// The default implementation of `nth` would iterate over all children, which is inefficient for large datasets.
/// This implementation will jump over whole chunks of children until it gets close.
fn nth(&mut self, n: usize) -> Option<Self::Item> {
if n == 0 {
return self.next();
}
// If it's at the leaf level, just forward the call to the values iterator
let (current_child, next_children, next_children_size) = match self {
SortedDocumentsIterator::Leaf { values, size } => {
*size = size.saturating_sub(n);
return values.nth(n).map(Ok);
}
SortedDocumentsIterator::Branch {
current_child,
next_children,
next_children_size,
} => (current_child, next_children, next_children_size),
};
// Otherwise don't directly iterate over children, skip them if we know we will go further
let mut to_skip = n - 1;
while to_skip > 0 {
if let Err(e) = SortedDocumentsIterator::update_current(
current_child,
next_children_size,
next_children,
) {
return Some(Err(e));
}
let Some(inner) = current_child else {
return None; // No more inner iterators, everything has been consumed.
};
if to_skip >= inner.size_hint().0 {
// The current child isn't large enough to contain the nth element.
// Skip it and continue with the next one.
to_skip -= inner.size_hint().0;
*current_child = None;
continue;
} else {
// The current iterator is large enough, so we can forward the call to it.
return inner.nth(to_skip + 1);
}
}
self.next()
}
/// Iterators need to keep track of their size so that they can be skipped efficiently by the `nth` method.
fn size_hint(&self) -> (usize, Option<usize>) {
let size = match self {
SortedDocumentsIterator::Leaf { size, .. } => *size,
SortedDocumentsIterator::Branch {
next_children_size,
current_child: Some(current_child),
..
} => current_child.size_hint().0 + next_children_size,
SortedDocumentsIterator::Branch { next_children_size, current_child: None, .. } => {
*next_children_size
}
};
(size, Some(size))
}
fn next(&mut self) -> Option<Self::Item> {
match self {
SortedDocumentsIterator::Leaf { values, size } => {
let result = values.next().map(Ok);
if result.is_some() {
*size -= 1;
}
result
}
SortedDocumentsIterator::Branch {
current_child,
next_children_size,
next_children,
} => {
let mut result = None;
while result.is_none() {
// Ensure we have selected an iterator to work with
if let Err(e) = SortedDocumentsIterator::update_current(
current_child,
next_children_size,
next_children,
) {
return Some(Err(e));
}
let Some(inner) = current_child else {
return None;
};
result = inner.next();
// If the current iterator is exhausted, we need to try the next one
if result.is_none() {
*current_child = None;
}
}
result
}
}
}
}
/// Builder for a [`SortedDocumentsIterator`].
/// Most builders won't ever be built, because pagination will skip them.
pub struct SortedDocumentsIteratorBuilder<'ctx> {
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
fields: &'ctx [AscDescId],
candidates: RoaringBitmap,
geo_candidates: &'ctx RoaringBitmap,
}
impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
/// Performs the sort and builds a [`SortedDocumentsIterator`].
fn build(self) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let size = self.candidates.len() as usize;
match self.fields {
[] => Ok(SortedDocumentsIterator::Leaf {
size,
values: Box::new(self.candidates.into_iter()),
}),
[AscDescId::Facet { field_id, ascending }, next_fields @ ..] => {
SortedDocumentsIteratorBuilder::build_facet(
self.index,
self.rtxn,
self.number_db,
self.string_db,
next_fields,
self.candidates,
self.geo_candidates,
*field_id,
*ascending,
)
}
[AscDescId::Geo { field_ids, target_point, ascending }, next_fields @ ..] => {
SortedDocumentsIteratorBuilder::build_geo(
self.index,
self.rtxn,
self.number_db,
self.string_db,
next_fields,
self.candidates,
self.geo_candidates,
*field_ids,
*target_point,
*ascending,
)
}
}
}
/// Builds a [`SortedDocumentsIterator`] based on the results of a facet sort.
#[allow(clippy::too_many_arguments)]
fn build_facet(
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
next_fields: &'ctx [AscDescId],
candidates: RoaringBitmap,
geo_candidates: &'ctx RoaringBitmap,
field_id: u16,
ascending: bool,
) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let size = candidates.len() as usize;
// Perform the sort on the first field
let (number_iter, string_iter) = if ascending {
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
} else {
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
// Create builders for the next level of the tree
let number_iter = number_iter.map(|r| r.map(|(d, _)| d));
let string_iter = string_iter.map(|r| r.map(|(d, _)| d));
let next_children = number_iter.chain(string_iter).map(move |r| {
Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: r?,
geo_candidates,
})
});
Ok(SortedDocumentsIterator::Branch {
current_child: None,
next_children_size: size,
next_children: Box::new(next_children),
})
}
/// Builds a [`SortedDocumentsIterator`] based on the (lazy) results of a geo sort.
#[allow(clippy::too_many_arguments)]
fn build_geo(
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
next_fields: &'ctx [AscDescId],
candidates: RoaringBitmap,
geo_candidates: &'ctx RoaringBitmap,
field_ids: [u16; 2],
target_point: [f64; 2],
ascending: bool,
) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let mut cache = VecDeque::new();
let mut rtree = None;
let size = candidates.len() as usize;
let not_geo_candidates = candidates.clone() - geo_candidates;
let mut geo_remaining = size - not_geo_candidates.len() as usize;
let mut not_geo_candidates = Some(not_geo_candidates);
let next_children = std::iter::from_fn(move || {
// Find the next bucket of geo-sorted documents.
// next_bucket loops and will go back to the beginning so we use a variable to track how many are left.
if geo_remaining > 0 {
if let Ok(Some((docids, _point))) = next_bucket(
index,
rtxn,
&candidates,
ascending,
target_point,
&Some(field_ids),
&mut rtree,
&mut cache,
geo_candidates,
GeoSortParameter::default(),
) {
geo_remaining -= docids.len() as usize;
return Some(Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: docids,
geo_candidates,
}));
}
}
// Once all geo candidates have been processed, we can return the others
if let Some(not_geo_candidates) = not_geo_candidates.take() {
if !not_geo_candidates.is_empty() {
return Some(Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: not_geo_candidates,
geo_candidates,
}));
}
}
None
});
Ok(SortedDocumentsIterator::Branch {
current_child: None,
next_children_size: size,
next_children: Box::new(next_children),
})
}
}
/// A structure owning the data needed during the lifetime of a [`SortedDocumentsIterator`].
pub struct SortedDocuments<'ctx> {
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
fields: Vec<AscDescId>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
candidates: &'ctx RoaringBitmap,
geo_candidates: RoaringBitmap,
}
impl<'ctx> SortedDocuments<'ctx> {
pub fn iter(&'ctx self) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let builder = SortedDocumentsIteratorBuilder {
index: self.index,
rtxn: self.rtxn,
number_db: self.number_db,
string_db: self.string_db,
fields: &self.fields,
candidates: self.candidates.clone(),
geo_candidates: &self.geo_candidates,
};
builder.build()
}
}
pub fn recursive_sort<'ctx>(
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
sort: Vec<AscDesc>,
candidates: &'ctx RoaringBitmap,
) -> crate::Result<SortedDocuments<'ctx>> {
let sortable_fields: BTreeSet<_> = index.sortable_fields(rtxn)?.into_iter().collect();
let fields_ids_map = index.fields_ids_map(rtxn)?;
// Retrieve the field ids that are used for sorting
let mut fields = Vec::new();
let mut need_geo_candidates = false;
for asc_desc in sort {
let (field, geofield) = match asc_desc {
AscDesc::Asc(Member::Field(field)) => (Some((field, true)), None),
AscDesc::Desc(Member::Field(field)) => (Some((field, false)), None),
AscDesc::Asc(Member::Geo(target_point)) => (None, Some((target_point, true))),
AscDesc::Desc(Member::Geo(target_point)) => (None, Some((target_point, false))),
};
if let Some((field, ascending)) = field {
if is_faceted(&field, &sortable_fields) {
if let Some(field_id) = fields_ids_map.id(&field) {
fields.push(AscDescId::Facet { field_id, ascending });
continue;
}
}
return Err(UserError::InvalidDocumentSortableAttribute {
field: field.to_string(),
sortable_fields: sortable_fields.clone(),
}
.into());
}
if let Some((target_point, ascending)) = geofield {
if sortable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if let (Some(lat), Some(lng)) =
(fields_ids_map.id("_geo.lat"), fields_ids_map.id("_geo.lng"))
{
need_geo_candidates = true;
fields.push(AscDescId::Geo { field_ids: [lat, lng], target_point, ascending });
continue;
}
}
return Err(UserError::InvalidDocumentSortableAttribute {
field: RESERVED_GEO_FIELD_NAME.to_string(),
sortable_fields: sortable_fields.clone(),
}
.into());
}
}
let geo_candidates = if need_geo_candidates {
index.geo_faceted_documents_ids(rtxn)?
} else {
RoaringBitmap::new()
};
let number_db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let string_db =
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
Ok(SortedDocuments { index, rtxn, fields, number_db, string_db, candidates, geo_candidates })
}

View File

@ -191,7 +191,21 @@ and can not be more than 511 bytes.", .document_id.to_string()
),
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
InvalidSearchSortableAttribute {
field: String,
valid_fields: BTreeSet<String>,
hidden_fields: bool,
},
#[error("Attribute `{}` is not sortable. {}",
.field,
match .sortable_fields.is_empty() {
true => "This index does not have configured sortable attributes.".to_string(),
false => format!("Available sortable attributes are: `{}`.",
sortable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
),
}
)]
InvalidDocumentSortableAttribute { field: String, sortable_fields: BTreeSet<String> },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field,
match (.valid_patterns.is_empty(), .matching_rule_index) {
@ -272,8 +286,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
PrimaryKeyCannotBeChanged(String),
#[error(transparent)]
SerdeJson(serde_json::Error),
#[error(transparent)]
SortError(#[from] SortError),
#[error("{error}")]
SortError { error: SortError, search: bool },
#[error("An unknown internal document id have been used: `{document_id}`.")]
UnknownInternalDocumentId { document_id: DocumentId },
#[error("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {0}` and twoTypos: {1}`.")]
@ -616,7 +630,7 @@ fn conditionally_lookup_for_error_message() {
];
for (list, suffix) in messages {
let err = UserError::InvalidSortableAttribute {
let err = UserError::InvalidSearchSortableAttribute {
field: "name".to_string(),
valid_fields: list,
hidden_fields: false,

View File

@ -111,7 +111,7 @@ impl FilterableAttributesFeatures {
self.filter.is_filterable_null()
}
/// Check if `IS EXISTS` is allowed
/// Check if `EXISTS` is allowed
pub fn is_filterable_exists(&self) -> bool {
self.filter.is_filterable_exists()
}

View File

@ -43,12 +43,13 @@ use std::fmt;
use std::hash::BuildHasherDefault;
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
pub use documents::GeoSortStrategy;
pub use filter_parser::{Condition, FilterCondition, Span, Token};
use fxhash::{FxHasher32, FxHasher64};
pub use grenad::CompressionType;
pub use search::new::{
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext,
SearchLogger, VisualSearchLogger,
execute_search, filtered_universe, DefaultSearchLogger, SearchContext, SearchLogger,
VisualSearchLogger,
};
use serde_json::Value;
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};

View File

@ -10,7 +10,7 @@ use memchr::memmem::Finder;
use roaring::{MultiOps, RoaringBitmap};
use serde_json::Value;
use super::facet_range_search;
use super::{facet_range_search, filter_vector::VectorFilter};
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
@ -228,6 +228,10 @@ impl<'a> Filter<'a> {
pub fn use_contains_operator(&self) -> Option<&Token> {
self.condition.use_contains_operator()
}
pub fn use_vector_filter(&self) -> Option<&Token> {
self.condition.use_vector_filter()
}
}
impl<'a> Filter<'a> {
@ -235,10 +239,12 @@ impl<'a> Filter<'a> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
let attribute = fid.value();
if matching_features(attribute, &filterable_attributes_rules)
.is_some_and(|(_, features)| features.is_filterable())
|| VectorFilter::matches(attribute)
{
continue;
}
@ -542,7 +548,18 @@ impl<'a> Filter<'a> {
.union()
}
FilterCondition::Condition { fid, op } => {
let Some(field_id) = field_ids_map.id(fid.value()) else {
let value = fid.value();
if VectorFilter::matches(value) {
if !matches!(op, Condition::Exists) {
return Err(Error::UserError(UserError::InvalidFilter(String::from(
"Vector filter can only be used with the `exists` operator",
))));
}
let vector_filter = VectorFilter::parse(fid)?;
return vector_filter.evaluate(rtxn, index, universe);
}
let Some(field_id) = field_ids_map.id(value) else {
return Ok(RoaringBitmap::new());
};
let Some((rule_index, features)) =

View File

@ -0,0 +1,276 @@
use filter_parser::Token;
use roaring::{MultiOps, RoaringBitmap};
use crate::error::{Error, UserError};
use crate::vector::db::IndexEmbeddingConfig;
use crate::vector::{ArroyStats, ArroyWrapper};
use crate::Index;
#[derive(Debug)]
enum VectorFilterInner<'a> {
Fragment { embedder_token: Token<'a>, fragment_token: Token<'a> },
DocumentTemplate { embedder_token: Token<'a> },
UserProvided { embedder_token: Token<'a> },
FullEmbedder { embedder_token: Token<'a> },
}
#[derive(Debug)]
pub(super) struct VectorFilter<'a> {
inner: Option<VectorFilterInner<'a>>,
regenerate: bool,
}
#[derive(Debug, thiserror::Error)]
pub enum VectorFilterError<'a> {
#[error("Vector filter cannot be empty.")]
EmptyFilter,
#[error("Vector filter must start with `_vectors` but found `{}`.", _0.value())]
InvalidPrefix(Token<'a>),
#[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")]
MissingFragmentName(Token<'a>),
#[error("Vector filter cannot have both `{}` and `{}`.", _0.0.value(), _0.1.value())]
ExclusiveOptions(Box<(Token<'a>, Token<'a>)>),
#[error("Vector filter has leftover token: `{}`.", _0.value())]
LeftoverToken(Token<'a>),
#[error("The embedder `{}` does not exist. {}", embedder.value(), {
if available.is_empty() {
String::from("This index does not have any configured embedders.")
} else {
let mut available = available.clone();
available.sort_unstable();
format!("Available embedders are: {}.", available.iter().map(|e| format!("`{e}`")).collect::<Vec<_>>().join(", "))
}
})]
EmbedderDoesNotExist { embedder: &'a Token<'a>, available: Vec<String> },
#[error("The fragment `{}` does not exist on embedder `{}`. {}", fragment.value(), embedder.value(), {
if available.is_empty() {
String::from("This embedder does not have any configured fragments.")
} else {
let mut available = available.clone();
available.sort_unstable();
format!("Available fragments on this embedder are: {}.", available.iter().map(|f| format!("`{f}`")).collect::<Vec<_>>().join(", "))
}
})]
FragmentDoesNotExist {
embedder: &'a Token<'a>,
fragment: &'a Token<'a>,
available: Vec<String>,
},
}
use VectorFilterError::*;
impl<'a> From<VectorFilterError<'a>> for Error {
fn from(err: VectorFilterError<'a>) -> Self {
match &err {
EmptyFilter => Error::UserError(UserError::InvalidFilter(err.to_string())),
InvalidPrefix(token) | MissingFragmentName(token) | LeftoverToken(token) => {
token.clone().as_external_error(err).into()
}
ExclusiveOptions(tokens) => tokens.1.clone().as_external_error(err).into(),
EmbedderDoesNotExist { embedder: token, .. }
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
}
}
}
impl<'a> VectorFilter<'a> {
pub(super) fn matches(value: &str) -> bool {
value.starts_with("_vectors.") || value == "_vectors"
}
/// Parses a vector filter string.
///
/// Valid formats:
/// - `_vectors`
/// - `_vectors.{embedder_name}`
/// - `_vectors.{embedder_name}.regenerate`
/// - `_vectors.{embedder_name}.userProvided`
/// - `_vectors.{embedder_name}.userProvided.regenerate`
/// - `_vectors.{embedder_name}.documentTemplate`
/// - `_vectors.{embedder_name}.documentTemplate.regenerate`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}.regenerate`
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
let mut split = s.split(".").peekable();
match split.next() {
Some(token) if token.value() == "_vectors" => (),
Some(token) => return Err(InvalidPrefix(token)),
None => return Err(EmptyFilter),
}
let embedder_name = split.next();
let mut fragment_name = None;
if split.peek().map(|t| t.value()) == Some("fragments") {
let token = split.next().expect("it was peeked before");
fragment_name = Some(split.next().ok_or(MissingFragmentName(token))?);
}
let mut user_provided_token = None;
if split.peek().map(|t| t.value()) == Some("userProvided") {
user_provided_token = split.next();
}
let mut document_template_token = None;
if split.peek().map(|t| t.value()) == Some("documentTemplate") {
document_template_token = split.next();
}
let mut regenerate_token = None;
if split.peek().map(|t| t.value()) == Some("regenerate") {
regenerate_token = split.next();
}
let inner = match (fragment_name, user_provided_token, document_template_token) {
(Some(fragment_name), None, None) => Some(VectorFilterInner::Fragment {
embedder_token: embedder_name
.expect("embedder name comes before fragment so it's always Some"),
fragment_token: fragment_name,
}),
(None, Some(_), None) => Some(VectorFilterInner::UserProvided {
embedder_token: embedder_name
.expect("embedder name comes before userProvided so it's always Some"),
}),
(None, None, Some(_)) => Some(VectorFilterInner::DocumentTemplate {
embedder_token: embedder_name
.expect("embedder name comes before documentTemplate so it's always Some"),
}),
(Some(a), Some(b), _) | (_, Some(a), Some(b)) | (Some(a), None, Some(b)) => {
return Err(ExclusiveOptions(Box::new((a, b))));
}
(None, None, None) => embedder_name
.map(|embedder_token| VectorFilterInner::FullEmbedder { embedder_token }),
};
if let Some(next) = split.next() {
return Err(LeftoverToken(next))?;
}
Ok(Self { inner, regenerate: regenerate_token.is_some() })
}
pub(super) fn evaluate(
self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
universe: Option<&RoaringBitmap>,
) -> crate::Result<RoaringBitmap> {
let index_embedding_configs = index.embedding_configs();
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
let inners = match self.inner {
Some(inner) => vec![inner],
None => embedding_configs
.iter()
.map(|config| VectorFilterInner::FullEmbedder {
embedder_token: Token::from(config.name.as_str()),
})
.collect(),
};
let mut docids = inners
.iter()
.map(|i| i.evaluate_inner(rtxn, index, &embedding_configs, self.regenerate))
.union()?;
if let Some(universe) = universe {
docids &= universe;
}
Ok(docids)
}
}
impl VectorFilterInner<'_> {
fn evaluate_inner(
&self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
embedding_configs: &[IndexEmbeddingConfig],
regenerate: bool,
) -> crate::Result<RoaringBitmap> {
let embedder = match self {
VectorFilterInner::Fragment { embedder_token, .. } => embedder_token,
VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token,
VectorFilterInner::UserProvided { embedder_token } => embedder_token,
VectorFilterInner::FullEmbedder { embedder_token } => embedder_token,
};
let embedder_name = embedder.value();
let available_embedders =
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
let embedding_config = embedding_configs
.iter()
.find(|config| config.name == embedder_name)
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
let embedder_info = index
.embedding_configs()
.embedder_info(rtxn, embedder_name)?
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
let arroy_wrapper = ArroyWrapper::new(
index.vector_arroy,
embedder_info.embedder_id,
embedding_config.config.quantized(),
);
let mut docids = match self {
VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => {
let fragment_name = fragment.value();
let fragment_config = embedding_config
.fragments
.as_slice()
.iter()
.find(|fragment| fragment.name == fragment_name)
.ok_or_else(|| FragmentDoesNotExist {
embedder,
fragment,
available: embedding_config
.fragments
.as_slice()
.iter()
.map(|f| f.name.clone())
.collect(),
})?;
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
}
VectorFilterInner::DocumentTemplate { .. } => {
if !embedding_config.fragments.as_slice().is_empty() {
return Ok(RoaringBitmap::new());
}
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
let mut stats = ArroyStats::default();
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
stats.documents - user_provided_docsids.clone()
}
VectorFilterInner::UserProvided { .. } => {
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
user_provided_docsids.clone()
}
VectorFilterInner::FullEmbedder { .. } => {
let mut stats = ArroyStats::default();
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
stats.documents
}
};
if regenerate {
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
docids -= skip_regenerate;
}
Ok(docids)
}
}

View File

@ -17,6 +17,7 @@ mod facet_range_search;
mod facet_sort_ascending;
mod facet_sort_descending;
mod filter;
mod filter_vector;
mod search;
fn facet_extreme_value<'t>(

View File

@ -210,6 +210,7 @@ impl Search<'_> {
scoring_strategy: ScoringStrategy::Detailed,
words_limit: self.words_limit,
exhaustive_number_hits: self.exhaustive_number_hits,
max_total_hits: self.max_total_hits,
rtxn: self.rtxn,
index: self.index,
semantic: self.semantic.clone(),

View File

@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
use crate::documents::GeoSortParameter;
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::index::MatchingStrategy;
use crate::score_details::{ScoreDetails, ScoringStrategy};
@ -47,11 +48,12 @@ pub struct Search<'a> {
sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>,
searchable_attributes: Option<&'a [String]>,
geo_param: new::GeoSortParameter,
geo_param: GeoSortParameter,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
words_limit: usize,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
rtxn: &'a heed::RoTxn<'a>,
index: &'a Index,
semantic: Option<SemanticSearch>,
@ -70,10 +72,11 @@ impl<'a> Search<'a> {
sort_criteria: None,
distinct: None,
searchable_attributes: None,
geo_param: new::GeoSortParameter::default(),
geo_param: GeoSortParameter::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
scoring_strategy: Default::default(),
exhaustive_number_hits: false,
max_total_hits: None,
words_limit: 10,
rtxn,
index,
@ -147,7 +150,7 @@ impl<'a> Search<'a> {
}
#[cfg(test)]
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
pub fn geo_sort_strategy(&mut self, strategy: crate::GeoSortStrategy) -> &mut Search<'a> {
self.geo_param.strategy = strategy;
self
}
@ -165,6 +168,11 @@ impl<'a> Search<'a> {
self
}
pub fn max_total_hits(&mut self, max_total_hits: Option<usize>) -> &mut Search<'a> {
self.max_total_hits = max_total_hits;
self
}
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
self.time_budget = time_budget;
self
@ -243,6 +251,8 @@ impl<'a> Search<'a> {
&mut ctx,
vector,
self.scoring_strategy,
self.exhaustive_number_hits,
self.max_total_hits,
universe,
&self.sort_criteria,
&self.distinct,
@ -261,6 +271,7 @@ impl<'a> Search<'a> {
self.terms_matching_strategy,
self.scoring_strategy,
self.exhaustive_number_hits,
self.max_total_hits,
universe,
&self.sort_criteria,
&self.distinct,
@ -314,6 +325,7 @@ impl fmt::Debug for Search<'_> {
scoring_strategy,
words_limit,
exhaustive_number_hits,
max_total_hits,
rtxn: _,
index: _,
semantic,
@ -333,6 +345,7 @@ impl fmt::Debug for Search<'_> {
.field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy)
.field("exhaustive_number_hits", exhaustive_number_hits)
.field("max_total_hits", max_total_hits)
.field("words_limit", words_limit)
.field(
"semantic.embedder_name",

View File

@ -32,6 +32,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger: &mut dyn SearchLogger<Q>,
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
) -> Result<BucketSortOutput> {
logger.initial_query(query);
logger.ranking_rules(&ranking_rules);
@ -159,7 +161,13 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
};
}
while valid_docids.len() < length {
let max_len_to_evaluate =
match (max_total_hits, exhaustive_number_hits && ranking_score_threshold.is_some()) {
(Some(max_total_hits), true) => max_total_hits,
_ => length,
};
while valid_docids.len() < max_len_to_evaluate {
if time_budget.exceeded() {
loop {
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);

Some files were not shown because too many files have changed in this diff Show More