mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-02 18:55:36 +00:00
Compare commits
282 Commits
refactor-s
...
measure-so
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4d92df1b95 | ||
|
|
ca332883cc | ||
|
|
6264dbf326 | ||
|
|
2b7b18fb5f | ||
|
|
b03ec3f603 | ||
|
|
91ec0bdaf4 | ||
|
|
56329633d5 | ||
|
|
507bce791b | ||
|
|
7adc715783 | ||
|
|
f355cf6985 | ||
|
|
2603d8d0d0 | ||
|
|
9b3d303b08 | ||
|
|
16b4545d23 | ||
|
|
0e08906fcb | ||
|
|
a3beaa90c5 | ||
|
|
02fff51902 | ||
|
|
54e2e2aa4a | ||
|
|
092a383419 | ||
|
|
98d55e0d4d | ||
|
|
8319552e7d | ||
|
|
5d5769fd8a | ||
|
|
eafc097a85 | ||
|
|
f17cb2ef5b | ||
|
|
48bc797dce | ||
|
|
c6b33fd407 | ||
|
|
6e9d0de8b7 | ||
|
|
1bfb16386c | ||
|
|
ea73615abf | ||
|
|
02c61eabfa | ||
|
|
56b60ec7a0 | ||
|
|
8f416e8f34 | ||
|
|
cf760cbfb1 | ||
|
|
2af9481804 | ||
|
|
7a292b572a | ||
|
|
8d6ac261ae | ||
|
|
b4c8b01c88 | ||
|
|
24240934f9 | ||
|
|
f4c94ac57f | ||
|
|
4087a88dbe | ||
|
|
5adacf2f45 | ||
|
|
65d0c32aa7 | ||
|
|
82647bcded | ||
|
|
1582c7e788 | ||
|
|
20094eba06 | ||
|
|
c35904d6e8 | ||
|
|
2cacc448b6 | ||
|
|
a61b852695 | ||
|
|
3167411e98 | ||
|
|
83d71662aa | ||
|
|
5c323cecc7 | ||
|
|
77b9347fff | ||
|
|
c85dd9f635 | ||
|
|
7da95d62e2 | ||
|
|
2cda1360ee | ||
|
|
5f9c05b944 | ||
|
|
d3a6d2a6fa | ||
|
|
2123d76089 | ||
|
|
edab4e75b0 | ||
|
|
b9982587d4 | ||
|
|
e83da00446 | ||
|
|
7fb3e378ff | ||
|
|
12a7a45930 | ||
|
|
677ed6bbf6 | ||
|
|
29b44e5541 | ||
|
|
6e80364c50 | ||
|
|
603676cb3b | ||
|
|
23e102ca71 | ||
|
|
f36f34c2f7 | ||
|
|
3bac22fd87 | ||
|
|
ce61cb7fe6 | ||
|
|
1693d1a311 | ||
|
|
febea735ca | ||
|
|
93ba051094 | ||
|
|
cd7a20fa32 | ||
|
|
41f51adbec | ||
|
|
0ca1a4e805 | ||
|
|
50a7393c55 | ||
|
|
837274f853 | ||
|
|
487997f6ad | ||
|
|
94809090a3 | ||
|
|
01144b2c74 | ||
|
|
e97600eead | ||
|
|
767553519d | ||
|
|
aace587dd1 | ||
|
|
e706023969 | ||
|
|
bcd0c5f5a4 | ||
|
|
f35d6710f3 | ||
|
|
b7b8f564c3 | ||
|
|
862d49e4af | ||
|
|
81ec0abad1 | ||
|
|
b67d385cf0 | ||
|
|
dfecb25814 | ||
|
|
2eae2015d7 | ||
|
|
33fa17bf12 | ||
|
|
400e6b93ce | ||
|
|
f32e6c32fc | ||
|
|
f4add93043 | ||
|
|
f07256971a | ||
|
|
2fae96ac14 | ||
|
|
246f0e7130 | ||
|
|
45af18ae9c | ||
|
|
2d97164d9f | ||
|
|
efc156a4a4 | ||
|
|
ba85959642 | ||
|
|
1702b5cf44 | ||
|
|
2099b4f0dd | ||
|
|
0d5bc4578e | ||
|
|
8f60ad0a23 | ||
|
|
9570139eeb | ||
|
|
9d6885793e | ||
|
|
98cd6a865c | ||
|
|
5f4530ce57 | ||
|
|
0ecaf861fa | ||
|
|
4d5005b01a | ||
|
|
952e742321 | ||
|
|
ee9aa63044 | ||
|
|
43db4f4242 | ||
|
|
9feba5028d | ||
|
|
0a40a98bb6 | ||
|
|
aac15f6719 | ||
|
|
ea21b948b1 | ||
|
|
53a359286c | ||
|
|
47e526f5ea | ||
|
|
4aa7d386d8 | ||
|
|
84fabb9314 | ||
|
|
cd46ebd6b5 | ||
|
|
ef8d9a20f8 | ||
|
|
6afa578688 | ||
|
|
300bdfc2a7 | ||
|
|
e7e74c0099 | ||
|
|
05cc2d1fac | ||
|
|
22b9c277d0 | ||
|
|
16bde973aa | ||
|
|
13d1d78a2d | ||
|
|
b2b7a633a6 | ||
|
|
7be109cafe | ||
|
|
6ebefd1067 | ||
|
|
d25ae36e22 | ||
|
|
b64b4ab6ca | ||
|
|
427861b323 | ||
|
|
d29cb75061 | ||
|
|
128e6c7502 | ||
|
|
3129f96603 | ||
|
|
c701d89fdc | ||
|
|
3d9befd64f | ||
|
|
ee14d5196c | ||
|
|
d96372b9c4 | ||
|
|
ea67816a21 | ||
|
|
c885fcebcc | ||
|
|
b6e1a1f2f5 | ||
|
|
277f4883f6 | ||
|
|
015d90a962 | ||
|
|
0df84bbba7 | ||
|
|
e53de15b8e | ||
|
|
8c4921b9dd | ||
|
|
f6a00f4a90 | ||
|
|
ce08dc509b | ||
|
|
1daaed163a | ||
|
|
809e742253 | ||
|
|
decdfe03bc | ||
|
|
aae5c324d7 | ||
|
|
a108d8f6f3 | ||
|
|
34cf576339 | ||
|
|
eb292a7a62 | ||
|
|
e28332a904 | ||
|
|
a1dcde6b9a | ||
|
|
544e98ca99 | ||
|
|
1e4699b82c | ||
|
|
2c09c324f7 | ||
|
|
3d6b61d8d2 | ||
|
|
1374b661d1 | ||
|
|
7e3c306c54 | ||
|
|
2608a596a0 | ||
|
|
e16edb2c35 | ||
|
|
5c758438fc | ||
|
|
ab6cac2321 | ||
|
|
6fb36ed30e | ||
|
|
dcdc83946f | ||
|
|
3c4c46377b | ||
|
|
7da21bb601 | ||
|
|
13161fd7d0 | ||
|
|
b81e2951a9 | ||
|
|
d75e0098c7 | ||
|
|
27496354e2 | ||
|
|
2e0ff56f3f | ||
|
|
a74fb87d1e | ||
|
|
558b66e535 | ||
|
|
cade18bd47 | ||
|
|
298c7b0c93 | ||
|
|
606e108420 | ||
|
|
7be17b7e4c | ||
|
|
1693332cab | ||
|
|
ddd564665b | ||
|
|
2a38f5c757 | ||
|
|
133d33d72c | ||
|
|
fb683fe88b | ||
|
|
4ae11bfd31 | ||
|
|
9736e16a88 | ||
|
|
6fa4da8ae7 | ||
|
|
19d7cdc20d | ||
|
|
c229200820 | ||
|
|
bad28cc9e2 | ||
|
|
534f696b29 | ||
|
|
a04041c8f2 | ||
|
|
b347b66619 | ||
|
|
e580d6b98f | ||
|
|
8ba65e333b | ||
|
|
43875e6758 | ||
|
|
d7844a6e45 | ||
|
|
e9bf4c43a4 | ||
|
|
a8a0854421 | ||
|
|
0a8f50695e | ||
|
|
09d9b63e1c | ||
|
|
b9b938c902 | ||
|
|
6bf07d969e | ||
|
|
e35ef31738 | ||
|
|
3f212a8202 | ||
|
|
bc547dad6f | ||
|
|
3bc8f81abc | ||
|
|
a89eea233b | ||
|
|
34fabed214 | ||
|
|
fca9fe39b3 | ||
|
|
f5cf01e7d1 | ||
|
|
d1dd7e5d09 | ||
|
|
d18c1f77d7 | ||
|
|
d0b05ae691 | ||
|
|
e9bf4eb100 | ||
|
|
b368105272 | ||
|
|
e0eff08095 | ||
|
|
304a9df52d | ||
|
|
39f60abd7d | ||
|
|
1991bd03da | ||
|
|
ee39309aae | ||
|
|
0d31be1494 | ||
|
|
3493093c4f | ||
|
|
7cef2299cf | ||
|
|
600e97d9dc | ||
|
|
d1962b2b0f | ||
|
|
8b450b84f8 | ||
|
|
0502b17501 | ||
|
|
57d066595b | ||
|
|
734d1c53ad | ||
|
|
63dded3961 | ||
|
|
2cdcb703d9 | ||
|
|
6607875f49 | ||
|
|
ea61e5cbec | ||
|
|
31a793d226 | ||
|
|
d85ab23b82 | ||
|
|
b7349910d9 | ||
|
|
49fa41ce65 | ||
|
|
400cf3eb92 | ||
|
|
376b3a19a7 | ||
|
|
d92c173fdc | ||
|
|
b867829ef1 | ||
|
|
6b29676e7e | ||
|
|
caad40964a | ||
|
|
cc5dca8321 | ||
|
|
5d50850e12 | ||
|
|
a73ccc78a6 | ||
|
|
9eb6f522ea | ||
|
|
04f6523f3c | ||
|
|
30d66abf8d | ||
|
|
84e498299b | ||
|
|
7a84697570 | ||
|
|
4148fbbe85 | ||
|
|
93f5defedc | ||
|
|
33241a6b12 | ||
|
|
ff87b4db26 | ||
|
|
ba9fadc8f1 | ||
|
|
d29d4f88da | ||
|
|
17c5ceeb9d | ||
|
|
c32d746069 | ||
|
|
b9a0ff0dd6 | ||
|
|
75496af985 | ||
|
|
0e9eb9eedb | ||
|
|
3a78e988da | ||
|
|
d9e5074189 | ||
|
|
bc210bdc00 | ||
|
|
4bf83f701c | ||
|
|
db3887929f | ||
|
|
9af103a88e | ||
|
|
99211eb375 |
4
.github/workflows/bench-manual.yml
vendored
4
.github/workflows/bench-manual.yml
vendored
@@ -18,11 +18,9 @@ jobs:
|
|||||||
timeout-minutes: 180 # 3h
|
timeout-minutes: 180 # 3h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
|
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
4
.github/workflows/bench-pr.yml
vendored
4
.github/workflows/bench-pr.yml
vendored
@@ -35,11 +35,9 @@ jobs:
|
|||||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||||
|
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
4
.github/workflows/bench-push-indexing.yml
vendored
4
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,11 +12,9 @@ jobs:
|
|||||||
timeout-minutes: 180 # 3h
|
timeout-minutes: 180 # 3h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
# Run benchmarks
|
# Run benchmarks
|
||||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
||||||
|
|||||||
4
.github/workflows/benchmarks-manual.yml
vendored
4
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,11 +18,9 @@ jobs:
|
|||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
# Set variables
|
# Set variables
|
||||||
- name: Set current branch name
|
- name: Set current branch name
|
||||||
|
|||||||
4
.github/workflows/benchmarks-pr.yml
vendored
4
.github/workflows/benchmarks-pr.yml
vendored
@@ -13,11 +13,9 @@ jobs:
|
|||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
- name: Check for Command
|
- name: Check for Command
|
||||||
id: command
|
id: command
|
||||||
|
|||||||
@@ -16,11 +16,9 @@ jobs:
|
|||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
# Set variables
|
# Set variables
|
||||||
- name: Set current branch name
|
- name: Set current branch name
|
||||||
|
|||||||
@@ -15,11 +15,9 @@ jobs:
|
|||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
# Set variables
|
# Set variables
|
||||||
- name: Set current branch name
|
- name: Set current branch name
|
||||||
|
|||||||
@@ -15,11 +15,9 @@ jobs:
|
|||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
# Set variables
|
# Set variables
|
||||||
- name: Set current branch name
|
- name: Set current branch name
|
||||||
|
|||||||
@@ -15,11 +15,9 @@ jobs:
|
|||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
# Set variables
|
# Set variables
|
||||||
- name: Set current branch name
|
- name: Set current branch name
|
||||||
|
|||||||
10
.github/workflows/flaky-tests.yml
vendored
10
.github/workflows/flaky-tests.yml
vendored
@@ -1,4 +1,5 @@
|
|||||||
name: Look for flaky tests
|
name: Look for flaky tests
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
@@ -8,18 +9,15 @@ jobs:
|
|||||||
flaky:
|
flaky:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Install needed dependencies
|
- name: Install needed dependencies
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Install cargo-flaky
|
- name: Install cargo-flaky
|
||||||
run: cargo install cargo-flaky
|
run: cargo install cargo-flaky
|
||||||
- name: Run cargo flaky in the dumps
|
- name: Run cargo flaky in the dumps
|
||||||
|
|||||||
4
.github/workflows/fuzzer-indexing.yml
vendored
4
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,11 +12,9 @@ jobs:
|
|||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
|
|
||||||
# Run benchmarks
|
# Run benchmarks
|
||||||
- name: Run the fuzzer
|
- name: Run the fuzzer
|
||||||
|
|||||||
9
.github/workflows/publish-apt-brew-pkg.yml
vendored
9
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -18,17 +18,14 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: check-version
|
needs: check-version
|
||||||
container:
|
container:
|
||||||
# Use ubuntu-18.04 to compile with glibc 2.27
|
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
steps:
|
steps:
|
||||||
- name: Install needed dependencies
|
- name: Install needed dependencies
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Install cargo-deb
|
- name: Install cargo-deb
|
||||||
run: cargo install cargo-deb
|
run: cargo install cargo-deb
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|||||||
31
.github/workflows/publish-binaries.yml
vendored
31
.github/workflows/publish-binaries.yml
vendored
@@ -37,18 +37,15 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: check-version
|
needs: check-version
|
||||||
container:
|
container:
|
||||||
# Use ubuntu-18.04 to compile with glibc 2.27
|
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Install needed dependencies
|
- name: Install needed dependencies
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --release --locked
|
run: cargo build --release --locked
|
||||||
# No need to upload binaries for dry run (cron)
|
# No need to upload binaries for dry run (cron)
|
||||||
@@ -78,10 +75,7 @@ jobs:
|
|||||||
asset_name: meilisearch-windows-amd64.exe
|
asset_name: meilisearch-windows-amd64.exe
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --release --locked
|
run: cargo build --release --locked
|
||||||
# No need to upload binaries for dry run (cron)
|
# No need to upload binaries for dry run (cron)
|
||||||
@@ -107,12 +101,10 @@ jobs:
|
|||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: Installing Rust toolchain
|
- name: Installing Rust toolchain
|
||||||
uses: actions-rs/toolchain@v1
|
uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: stable
|
|
||||||
profile: minimal
|
profile: minimal
|
||||||
target: ${{ matrix.target }}
|
target: ${{ matrix.target }}
|
||||||
override: true
|
|
||||||
- name: Cargo build
|
- name: Cargo build
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@@ -132,9 +124,11 @@ jobs:
|
|||||||
name: Publish binary for aarch64
|
name: Publish binary for aarch64
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: check-version
|
needs: check-version
|
||||||
|
env:
|
||||||
|
DEBIAN_FRONTEND: noninteractive
|
||||||
container:
|
container:
|
||||||
# Use ubuntu-18.04 to compile with glibc 2.27
|
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
@@ -154,12 +148,10 @@ jobs:
|
|||||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||||
apt-get update -y && apt-get install -y docker-ce
|
apt-get update -y && apt-get install -y docker-ce
|
||||||
- name: Installing Rust toolchain
|
- name: Installing Rust toolchain
|
||||||
uses: actions-rs/toolchain@v1
|
uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: stable
|
|
||||||
profile: minimal
|
profile: minimal
|
||||||
target: ${{ matrix.target }}
|
target: ${{ matrix.target }}
|
||||||
override: true
|
|
||||||
- name: Configure target aarch64 GNU
|
- name: Configure target aarch64 GNU
|
||||||
## Environment variable is not passed using env:
|
## Environment variable is not passed using env:
|
||||||
## LD gold won't work with MUSL
|
## LD gold won't work with MUSL
|
||||||
@@ -170,6 +162,9 @@ jobs:
|
|||||||
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
|
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
|
||||||
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
|
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
|
||||||
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
|
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
|
||||||
|
- name: Install a default toolchain that will be used to build cargo cross
|
||||||
|
run: |
|
||||||
|
rustup default stable
|
||||||
- name: Cargo build
|
- name: Cargo build
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
|
|||||||
3
.github/workflows/publish-docker-images.yml
vendored
3
.github/workflows/publish-docker-images.yml
vendored
@@ -80,10 +80,11 @@ jobs:
|
|||||||
type=ref,event=tag
|
type=ref,event=tag
|
||||||
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
|
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
|
||||||
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||||
|
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
push: true
|
push: true
|
||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|||||||
51
.github/workflows/test-suite.yml
vendored
51
.github/workflows/test-suite.yml
vendored
@@ -19,11 +19,11 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test-linux:
|
test-linux:
|
||||||
name: Tests on ubuntu-18.04
|
name: Tests on ubuntu-20.04
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Install needed dependencies
|
- name: Install needed dependencies
|
||||||
@@ -31,10 +31,7 @@ jobs:
|
|||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- name: Setup test with Rust stable
|
- name: Setup test with Rust stable
|
||||||
uses: actions-rs/toolchain@v1
|
uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.1
|
uses: Swatinem/rust-cache@v2.7.1
|
||||||
- name: Run cargo check without any default features
|
- name: Run cargo check without any default features
|
||||||
@@ -59,10 +56,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.1
|
uses: Swatinem/rust-cache@v2.7.1
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Run cargo check without any default features
|
- name: Run cargo check without any default features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@@ -78,8 +72,8 @@ jobs:
|
|||||||
name: Tests almost all features
|
name: Tests almost all features
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
@@ -87,10 +81,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
apt-get update
|
apt-get update
|
||||||
apt-get install --assume-yes build-essential curl
|
apt-get install --assume-yes build-essential curl
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Run cargo build with almost all features
|
- name: Run cargo build with almost all features
|
||||||
run: |
|
run: |
|
||||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||||
@@ -102,7 +93,7 @@ jobs:
|
|||||||
name: Test disabled tokenization
|
name: Test disabled tokenization
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
@@ -110,13 +101,10 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
apt-get update
|
apt-get update
|
||||||
apt-get install --assume-yes build-essential curl
|
apt-get install --assume-yes build-essential curl
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Run cargo tree without default features and check lindera is not present
|
- name: Run cargo tree without default features and check lindera is not present
|
||||||
run: |
|
run: |
|
||||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then
|
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||||
echo "lindera has been found in the sources and it shouldn't"
|
echo "lindera has been found in the sources and it shouldn't"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
@@ -129,18 +117,15 @@ jobs:
|
|||||||
name: Run tests in debug
|
name: Run tests in debug
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||||
image: ubuntu:18.04
|
image: ubuntu:20.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Install needed dependencies
|
- name: Install needed dependencies
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.1
|
uses: Swatinem/rust-cache@v2.7.1
|
||||||
- name: Run tests in debug
|
- name: Run tests in debug
|
||||||
@@ -154,11 +139,9 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: 1.75.0
|
|
||||||
override: true
|
|
||||||
components: clippy
|
components: clippy
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.1
|
uses: Swatinem/rust-cache@v2.7.1
|
||||||
@@ -173,10 +156,10 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: nightly
|
toolchain: nightly-2024-07-09
|
||||||
override: true
|
override: true
|
||||||
components: rustfmt
|
components: rustfmt
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
|
|||||||
@@ -18,11 +18,9 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: helix-editor/rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- name: Install sd
|
- name: Install sd
|
||||||
run: cargo install sd
|
run: cargo install sd
|
||||||
- name: Update Cargo.toml file
|
- name: Update Cargo.toml file
|
||||||
|
|||||||
@@ -109,6 +109,12 @@ They are JSON files with the following structure (comments are not actually supp
|
|||||||
"run_count": 3,
|
"run_count": 3,
|
||||||
// List of arguments to add to the Meilisearch command line.
|
// List of arguments to add to the Meilisearch command line.
|
||||||
"extra_cli_args": ["--max-indexing-threads=1"],
|
"extra_cli_args": ["--max-indexing-threads=1"],
|
||||||
|
// An expression that can be parsed as a comma-separated list of targets and levels
|
||||||
|
// as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples).
|
||||||
|
// The expression is used to filter the spans that are measured for profiling purposes.
|
||||||
|
// Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is
|
||||||
|
// "search::=trace"
|
||||||
|
"target": "indexing::=trace",
|
||||||
// List of named assets that can be used in the commands.
|
// List of named assets that can be used in the commands.
|
||||||
"assets": {
|
"assets": {
|
||||||
// name of the asset.
|
// name of the asset.
|
||||||
|
|||||||
@@ -52,6 +52,20 @@ cargo test
|
|||||||
|
|
||||||
This command will be triggered to each PR as a requirement for merging it.
|
This command will be triggered to each PR as a requirement for merging it.
|
||||||
|
|
||||||
|
#### Faster build
|
||||||
|
|
||||||
|
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
|
||||||
|
It'll store some built artifacts in the directory of your choice.
|
||||||
|
|
||||||
|
We recommend using the standard `$HOME/.cache/lindera` directory:
|
||||||
|
```sh
|
||||||
|
export LINDERA_CACHE=$HOME/.cache/lindera
|
||||||
|
```
|
||||||
|
|
||||||
|
Furthermore, you can improve incremental compilation by setting the `MEILI_NO_VERGEN` environment variable.
|
||||||
|
Setting this variable will prevent the Meilisearch binary from being rebuilt each time the directory that hosts the Meilisearch repository changes.
|
||||||
|
Do not enable this environment variable for production builds (as it will break the `version` route, among other things).
|
||||||
|
|
||||||
#### Snapshot-based tests
|
#### Snapshot-based tests
|
||||||
|
|
||||||
We are using [insta](https://insta.rs) to perform snapshot-based testing.
|
We are using [insta](https://insta.rs) to perform snapshot-based testing.
|
||||||
@@ -63,7 +77,7 @@ Furthermore, we provide some macros on top of insta, notably a way to use snapsh
|
|||||||
|
|
||||||
To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:
|
To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:
|
||||||
|
|
||||||
```
|
```sh
|
||||||
export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
|
export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
1700
Cargo.lock
generated
1700
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
|||||||
# Compile
|
# Compile
|
||||||
FROM rust:1.75.0-alpine3.18 AS compiler
|
FROM rust:1.79.0-alpine3.20 AS compiler
|
||||||
|
|
||||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
RUN apk add -q --no-cache build-base openssl-dev
|
||||||
|
|
||||||
WORKDIR /
|
WORKDIR /
|
||||||
|
|
||||||
@@ -20,13 +20,12 @@ RUN set -eux; \
|
|||||||
cargo build --release -p meilisearch -p meilitool
|
cargo build --release -p meilisearch -p meilitool
|
||||||
|
|
||||||
# Run
|
# Run
|
||||||
FROM alpine:3.16
|
FROM alpine:3.20
|
||||||
|
|
||||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||||
ENV MEILI_SERVER_PROVIDER docker
|
ENV MEILI_SERVER_PROVIDER docker
|
||||||
|
|
||||||
RUN apk update --quiet \
|
RUN apk add -q --no-cache libgcc tini curl
|
||||||
&& apk add -q --no-cache libgcc tini curl
|
|
||||||
|
|
||||||
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
||||||
# and it's easy to find.
|
# and it's easy to find.
|
||||||
|
|||||||
30
README.md
30
README.md
@@ -1,9 +1,6 @@
|
|||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
|
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo" target="_blank">
|
||||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
<img src="assets/meilisearch-logo-kawaii.png">
|
||||||
</a>
|
|
||||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
|
|
||||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
@@ -25,7 +22,7 @@
|
|||||||
|
|
||||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||||
|
|
||||||
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
|
[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
|
||||||
|
|
||||||
<p align="center" name="demo">
|
<p align="center" name="demo">
|
||||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||||
@@ -36,11 +33,18 @@
|
|||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
## 🖥 Examples
|
||||||
|
|
||||||
|
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
|
||||||
|
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
|
||||||
|
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
|
||||||
|
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
|
||||||
|
|
||||||
|
See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos).
|
||||||
|
|
||||||
## ✨ Features
|
## ✨ Features
|
||||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
|
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
|
||||||
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
|
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||||
@@ -59,7 +63,7 @@ You can consult Meilisearch's documentation at [meilisearch.com/docs](https://ww
|
|||||||
|
|
||||||
## 🚀 Getting started
|
## 🚀 Getting started
|
||||||
|
|
||||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||||
|
|
||||||
## 🌍 Supercharge your Meilisearch experience
|
## 🌍 Supercharge your Meilisearch experience
|
||||||
|
|
||||||
@@ -83,7 +87,7 @@ Finally, for more in-depth information, refer to our articles explaining fundame
|
|||||||
|
|
||||||
## 📊 Telemetry
|
## 📊 Telemetry
|
||||||
|
|
||||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||||
|
|
||||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||||
|
|
||||||
@@ -105,11 +109,11 @@ Thank you for your support!
|
|||||||
|
|
||||||
## 👩💻 Contributing
|
## 👩💻 Contributing
|
||||||
|
|
||||||
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md).
|
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md).
|
||||||
|
|
||||||
## 📦 Versioning
|
## 📦 Versioning
|
||||||
|
|
||||||
Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases).
|
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
|
||||||
|
|
||||||
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
|
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
|
||||||
|
|
||||||
|
|||||||
BIN
assets/meilisearch-logo-kawaii.png
Normal file
BIN
assets/meilisearch-logo-kawaii.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 98 KiB |
@@ -11,24 +11,24 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.86"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
mimalloc = { version = "0.1.39", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rand_chacha = "0.3.1"
|
rand_chacha = "0.3.1"
|
||||||
roaring = "0.10.2"
|
roaring = "0.10.6"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.86"
|
||||||
bytes = "1.5.0"
|
bytes = "1.6.0"
|
||||||
convert_case = "0.6.0"
|
convert_case = "0.6.0"
|
||||||
flate2 = "1.0.28"
|
flate2 = "1.0.30"
|
||||||
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
|
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["milli/all-tokenizations"]
|
default = ["milli/all-tokenizations"]
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
status = [
|
status = [
|
||||||
'Tests on ubuntu-18.04',
|
'Tests on ubuntu-20.04',
|
||||||
'Tests on macos-12',
|
'Tests on macos-12',
|
||||||
'Tests on windows-2022',
|
'Tests on windows-2022',
|
||||||
'Run Clippy',
|
'Run Clippy',
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ license.workspace = true
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
time = { version = "0.3.34", features = ["parsing"] }
|
time = { version = "0.3.36", features = ["parsing"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.80"
|
anyhow = "1.0.86"
|
||||||
vergen-git2 = "1.0.0-beta.2"
|
vergen-git2 = "1.0.0"
|
||||||
|
|||||||
@@ -5,6 +5,13 @@ fn main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn emit_git_variables() -> anyhow::Result<()> {
|
fn emit_git_variables() -> anyhow::Result<()> {
|
||||||
|
println!("cargo::rerun-if-env-changed=MEILI_NO_VERGEN");
|
||||||
|
|
||||||
|
let has_vergen =
|
||||||
|
!matches!(std::env::var_os("MEILI_NO_VERGEN"), Some(x) if x != "false" && x != "0");
|
||||||
|
|
||||||
|
anyhow::ensure!(has_vergen, "disabled via `MEILI_NO_VERGEN`");
|
||||||
|
|
||||||
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
|
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
|
||||||
// in the corresponding GitHub workflow (publish_docker.yml).
|
// in the corresponding GitHub workflow (publish_docker.yml).
|
||||||
// This is due to the Dockerfile building the binary outside of the git directory.
|
// This is due to the Dockerfile building the binary outside of the git directory.
|
||||||
|
|||||||
@@ -11,22 +11,21 @@ readme.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.86"
|
||||||
flate2 = "1.0.28"
|
flate2 = "1.0.30"
|
||||||
http = "0.2.11"
|
http = "1.1.0"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
regex = "1.10.2"
|
regex = "1.10.5"
|
||||||
roaring = { version = "0.10.2", features = ["serde"] }
|
roaring = { version = "0.10.6", features = ["serde"] }
|
||||||
serde = { version = "1.0.195", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
tar = "0.4.40"
|
tar = "0.4.41"
|
||||||
tempfile = "3.9.0"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.56"
|
thiserror = "1.0.61"
|
||||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
|
|||||||
@@ -104,6 +104,11 @@ pub enum KindDump {
|
|||||||
DocumentDeletionByFilter {
|
DocumentDeletionByFilter {
|
||||||
filter: serde_json::Value,
|
filter: serde_json::Value,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
filter: Option<serde_json::Value>,
|
||||||
|
context: Option<serde_json::Map<String, serde_json::Value>>,
|
||||||
|
function: String,
|
||||||
|
},
|
||||||
Settings {
|
Settings {
|
||||||
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
|
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
|
||||||
is_deletion: bool,
|
is_deletion: bool,
|
||||||
@@ -172,6 +177,9 @@ impl From<KindWithContent> for KindDump {
|
|||||||
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
|
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
|
||||||
KindDump::DocumentDeletionByFilter { filter: filter_expr }
|
KindDump::DocumentDeletionByFilter { filter: filter_expr }
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { filter_expr, context, function, .. } => {
|
||||||
|
KindDump::DocumentEdition { filter: filter_expr, context, function }
|
||||||
|
}
|
||||||
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
|
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
|
||||||
KindWithContent::SettingsUpdate {
|
KindWithContent::SettingsUpdate {
|
||||||
new_settings,
|
new_settings,
|
||||||
|
|||||||
@@ -425,7 +425,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();
|
let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -358,7 +358,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();
|
let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -394,8 +394,8 @@ pub(crate) mod test {
|
|||||||
let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();
|
let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -442,8 +442,8 @@ pub(crate) mod test {
|
|||||||
let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();
|
let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -216,7 +216,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = DumpReader::open(dump).unwrap();
|
let mut dump = DumpReader::open(dump).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
@@ -337,7 +337,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = DumpReader::open(dump).unwrap();
|
let mut dump = DumpReader::open(dump).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
||||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
@@ -383,8 +383,8 @@ pub(crate) mod test {
|
|||||||
let mut dump = DumpReader::open(dump).unwrap();
|
let mut dump = DumpReader::open(dump).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||||
@@ -463,8 +463,8 @@ pub(crate) mod test {
|
|||||||
let mut dump = DumpReader::open(dump).unwrap();
|
let mut dump = DumpReader::open(dump).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||||
@@ -540,7 +540,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = DumpReader::open(dump).unwrap();
|
let mut dump = DumpReader::open(dump).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
@@ -633,7 +633,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = DumpReader::open(dump).unwrap();
|
let mut dump = DumpReader::open(dump).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
@@ -726,7 +726,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = DumpReader::open(dump).unwrap();
|
let mut dump = DumpReader::open(dump).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
|
|||||||
@@ -780,7 +780,7 @@ expression: document
|
|||||||
1.3484878540039063
|
1.3484878540039063
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"userProvided": false
|
"regenerate": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -779,7 +779,7 @@ expression: document
|
|||||||
1.04031240940094
|
1.04031240940094
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"userProvided": false
|
"regenerate": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -252,7 +252,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = V2Reader::open(dir).unwrap();
|
let mut dump = V2Reader::open(dir).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
@@ -349,7 +349,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = V2Reader::open(dir).unwrap();
|
let mut dump = V2Reader::open(dir).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -267,7 +267,7 @@ pub(crate) mod test {
|
|||||||
let mut dump = V3Reader::open(dir).unwrap();
|
let mut dump = V3Reader::open(dir).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -152,6 +152,7 @@ impl Settings<Unchecked> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||||
#[cfg_attr(test, derive(serde::Serialize))]
|
#[cfg_attr(test, derive(serde::Serialize))]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
|
|||||||
@@ -262,8 +262,8 @@ pub(crate) mod test {
|
|||||||
let mut dump = V4Reader::open(dir).unwrap();
|
let mut dump = V4Reader::open(dir).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -182,6 +182,7 @@ impl Settings<Unchecked> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
#[cfg_attr(test, derive(serde::Serialize))]
|
#[cfg_attr(test, derive(serde::Serialize))]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
|
|||||||
@@ -299,8 +299,8 @@ pub(crate) mod test {
|
|||||||
let mut dump = V5Reader::open(dir).unwrap();
|
let mut dump = V5Reader::open(dir).unwrap();
|
||||||
|
|
||||||
// top level infos
|
// top level infos
|
||||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
|||||||
@@ -200,6 +200,7 @@ impl std::ops::Deref for IndexUid {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
#[cfg_attr(test, derive(serde::Serialize))]
|
#[cfg_attr(test, derive(serde::Serialize))]
|
||||||
#[cfg_attr(test, serde(rename_all = "camelCase"))]
|
#[cfg_attr(test, serde(rename_all = "camelCase"))]
|
||||||
|
|||||||
@@ -281,7 +281,7 @@ pub(crate) mod test {
|
|||||||
let dump_path = dump.path();
|
let dump_path = dump.path();
|
||||||
|
|
||||||
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
|
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
|
||||||
insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
||||||
.
|
.
|
||||||
├---- indexes/
|
├---- indexes/
|
||||||
│ └---- doggos/
|
│ └---- doggos/
|
||||||
|
|||||||
Binary file not shown.
@@ -11,10 +11,7 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
tempfile = "3.9.0"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.56"
|
thiserror = "1.0.61"
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
faux = "0.1.10"
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ license.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
nom = "7.1.3"
|
nom = "7.1.3"
|
||||||
nom_locate = "4.2.0"
|
nom_locate = "4.2.0"
|
||||||
unescaper = "0.1.3"
|
unescaper = "0.1.5"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
insta = "1.34.0"
|
insta = "1.39.0"
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ pub enum Condition<'a> {
|
|||||||
LowerThan(Token<'a>),
|
LowerThan(Token<'a>),
|
||||||
LowerThanOrEqual(Token<'a>),
|
LowerThanOrEqual(Token<'a>),
|
||||||
Between { from: Token<'a>, to: Token<'a> },
|
Between { from: Token<'a>, to: Token<'a> },
|
||||||
|
Contains { keyword: Token<'a>, word: Token<'a> },
|
||||||
}
|
}
|
||||||
|
|
||||||
/// condition = value ("==" | ">" ...) value
|
/// condition = value ("==" | ">" ...) value
|
||||||
@@ -92,6 +93,34 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
|
|||||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// contains = value "CONTAINS" value
|
||||||
|
pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
|
||||||
|
let (input, (fid, contains, value)) =
|
||||||
|
tuple((parse_value, tag("CONTAINS"), cut(parse_value)))(input)?;
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
FilterCondition::Condition {
|
||||||
|
fid,
|
||||||
|
op: Contains { keyword: Token { span: contains, value: None }, word: value },
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// contains = value "NOT" WS+ "CONTAINS" value
|
||||||
|
pub fn parse_not_contains(input: Span) -> IResult<FilterCondition> {
|
||||||
|
let keyword = tuple((tag("NOT"), multispace1, tag("CONTAINS")));
|
||||||
|
let (input, (fid, (_not, _spaces, contains), value)) =
|
||||||
|
tuple((parse_value, keyword, cut(parse_value)))(input)?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
FilterCondition::Not(Box::new(FilterCondition::Condition {
|
||||||
|
fid,
|
||||||
|
op: Contains { keyword: Token { span: contains, value: None }, word: value },
|
||||||
|
})),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
/// to = value value "TO" WS+ value
|
/// to = value value "TO" WS+ value
|
||||||
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, (key, from, _, _, to)) =
|
let (input, (key, from, _, _, to)) =
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> {
|
|||||||
}
|
}
|
||||||
ErrorKind::InvalidPrimary => {
|
ErrorKind::InvalidPrimary => {
|
||||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||||
}
|
}
|
||||||
ErrorKind::InvalidEscapedNumber => {
|
ErrorKind::InvalidEscapedNumber => {
|
||||||
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
||||||
|
|||||||
@@ -48,8 +48,8 @@ use std::fmt::Debug;
|
|||||||
|
|
||||||
pub use condition::{parse_condition, parse_to, Condition};
|
pub use condition::{parse_condition, parse_to, Condition};
|
||||||
use condition::{
|
use condition::{
|
||||||
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
|
parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null,
|
||||||
parse_not_exists,
|
parse_is_null, parse_not_contains, parse_not_exists,
|
||||||
};
|
};
|
||||||
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
||||||
pub use error::{Error, ErrorKind};
|
pub use error::{Error, ErrorKind};
|
||||||
@@ -147,7 +147,37 @@ pub enum FilterCondition<'a> {
|
|||||||
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum TraversedElement<'a> {
|
||||||
|
FilterCondition(&'a FilterCondition<'a>),
|
||||||
|
Condition(&'a Condition<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> FilterCondition<'a> {
|
impl<'a> FilterCondition<'a> {
|
||||||
|
pub fn use_contains_operator(&self) -> Option<&Token> {
|
||||||
|
match self {
|
||||||
|
FilterCondition::Condition { fid: _, op } => match op {
|
||||||
|
Condition::GreaterThan(_)
|
||||||
|
| Condition::GreaterThanOrEqual(_)
|
||||||
|
| Condition::Equal(_)
|
||||||
|
| Condition::NotEqual(_)
|
||||||
|
| Condition::Null
|
||||||
|
| Condition::Empty
|
||||||
|
| Condition::Exists
|
||||||
|
| Condition::LowerThan(_)
|
||||||
|
| Condition::LowerThanOrEqual(_)
|
||||||
|
| Condition::Between { .. } => None,
|
||||||
|
Condition::Contains { keyword, word: _ } => Some(keyword),
|
||||||
|
},
|
||||||
|
FilterCondition::Not(this) => this.use_contains_operator(),
|
||||||
|
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
||||||
|
seq.iter().find_map(|filter| filter.use_contains_operator())
|
||||||
|
}
|
||||||
|
FilterCondition::GeoLowerThan { .. }
|
||||||
|
| FilterCondition::GeoBoundingBox { .. }
|
||||||
|
| FilterCondition::In { .. } => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||||
match self {
|
match self {
|
||||||
@@ -452,6 +482,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
|||||||
parse_exists,
|
parse_exists,
|
||||||
parse_not_exists,
|
parse_not_exists,
|
||||||
parse_to,
|
parse_to,
|
||||||
|
parse_contains,
|
||||||
|
parse_not_contains,
|
||||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||||
parse_geo,
|
parse_geo,
|
||||||
parse_geo_distance,
|
parse_geo_distance,
|
||||||
@@ -534,6 +566,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
|
|||||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||||
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
|
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
|
||||||
|
Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -558,127 +591,135 @@ pub mod tests {
|
|||||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[track_caller]
|
||||||
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
||||||
Fc::parse(s).unwrap().unwrap()
|
Fc::parse(s).unwrap().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_escaped() {
|
fn parse_escaped() {
|
||||||
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
|
insta::assert_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
|
||||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
|
insta::assert_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
|
||||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
|
insta::assert_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
|
||||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
||||||
// but it also works with other sequences
|
// but it also works with other sequences
|
||||||
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse() {
|
fn parse() {
|
||||||
// Test equal
|
// Test equal
|
||||||
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
|
insta::assert_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
|
||||||
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
|
insta::assert_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
|
||||||
insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
|
insta::assert_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
|
||||||
insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
|
insta::assert_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
|
||||||
insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
|
insta::assert_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
|
||||||
insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
|
insta::assert_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
|
||||||
insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
|
insta::assert_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
|
||||||
insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
insta::assert_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||||
insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
insta::assert_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||||
|
|
||||||
// Test IN
|
// Test IN
|
||||||
insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]");
|
insta::assert_snapshot!(p("colour IN[]"), @"{colour} IN[]");
|
||||||
insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
|
insta::assert_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
|
||||||
insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
|
insta::assert_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
|
||||||
insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
|
insta::assert_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||||
insta::assert_display_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
|
insta::assert_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
|
||||||
|
|
||||||
// Test IN + OR/AND/()
|
// Test IN + OR/AND/()
|
||||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||||
insta::assert_display_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
|
insta::assert_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
|
||||||
insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
|
insta::assert_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
|
||||||
|
|
||||||
// Test whitespace start/end
|
// Test whitespace start/end
|
||||||
insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}");
|
insta::assert_snapshot!(p(" colour = green "), @"{colour} = {green}");
|
||||||
insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
|
insta::assert_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
|
||||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||||
insta::assert_display_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
|
insta::assert_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
|
insta::assert_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
|
||||||
|
|
||||||
// Test conditions
|
// Test conditions
|
||||||
insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
|
insta::assert_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
|
||||||
insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
|
insta::assert_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
|
||||||
insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
|
insta::assert_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
|
||||||
insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
|
insta::assert_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
|
||||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||||
insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
|
insta::assert_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
|
||||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||||
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
insta::assert_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
||||||
|
|
||||||
// Test NOT
|
// Test NOT
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
insta::assert_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
insta::assert_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||||
|
|
||||||
// Test NULL + NOT NULL
|
// Test NULL + NOT NULL
|
||||||
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
|
insta::assert_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
|
insta::assert_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
|
||||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
insta::assert_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
||||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||||
|
|
||||||
// Test EMPTY + NOT EMPTY
|
// Test EMPTY + NOT EMPTY
|
||||||
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
insta::assert_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
insta::assert_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||||
|
|
||||||
// Test EXISTS + NOT EXITS
|
// Test EXISTS + NOT EXITS
|
||||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
insta::assert_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
||||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||||
|
|
||||||
|
// Test CONTAINS + NOT CONTAINS
|
||||||
|
insta::assert_snapshot!(p("subscribers CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
|
||||||
|
insta::assert_snapshot!(p("NOT subscribers CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
|
||||||
|
insta::assert_snapshot!(p("subscribers NOT CONTAINS hello"), @"NOT ({subscribers} CONTAINS {hello})");
|
||||||
|
insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
|
||||||
|
insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
|
||||||
|
|
||||||
// Test nested NOT
|
// Test nested NOT
|
||||||
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||||
insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
|
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
|
||||||
|
|
||||||
// Test geo radius
|
// Test geo radius
|
||||||
insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
||||||
insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
||||||
insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
||||||
|
|
||||||
// Test geo bounding box
|
// Test geo bounding box
|
||||||
insta::assert_display_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||||
insta::assert_display_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
||||||
insta::assert_display_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||||
|
|
||||||
// Test OR + AND
|
// Test OR + AND
|
||||||
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||||
insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||||
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
|
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
|
||||||
insta::assert_display_snapshot!(
|
insta::assert_snapshot!(
|
||||||
p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"),
|
p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"),
|
||||||
@"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]"
|
@"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]"
|
||||||
);
|
);
|
||||||
|
|
||||||
// Test parentheses
|
// Test parentheses
|
||||||
insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
|
insta::assert_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
|
||||||
insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
|
insta::assert_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
|
||||||
|
|
||||||
// Test recursion
|
// Test recursion
|
||||||
// This is the most that is allowed
|
// This is the most that is allowed
|
||||||
insta::assert_display_snapshot!(
|
insta::assert_snapshot!(
|
||||||
p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"),
|
p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"),
|
||||||
@"{x} = {1}"
|
@"{x} = {1}"
|
||||||
);
|
);
|
||||||
insta::assert_display_snapshot!(
|
insta::assert_snapshot!(
|
||||||
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
||||||
@"NOT ({x} = {1})"
|
@"NOT ({x} = {1})"
|
||||||
);
|
);
|
||||||
|
|
||||||
// Confusing keywords
|
// Confusing keywords
|
||||||
insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
|
insta::assert_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -689,182 +730,182 @@ pub mod tests {
|
|||||||
Fc::parse(s).unwrap_err().to_string()
|
Fc::parse(s).unwrap_err().to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###"
|
insta::assert_snapshot!(p("channel = Ponce = 12"), @r###"
|
||||||
Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule.
|
Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule.
|
||||||
17:21 channel = Ponce = 12
|
17:21 channel = Ponce = 12
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = "), @r###"
|
insta::assert_snapshot!(p("channel = "), @r###"
|
||||||
Was expecting a value but instead got nothing.
|
Was expecting a value but instead got nothing.
|
||||||
14:14 channel =
|
14:14 channel =
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = 🐻"), @r###"
|
insta::assert_snapshot!(p("channel = 🐻"), @r###"
|
||||||
Was expecting a value but instead got `🐻`.
|
Was expecting a value but instead got `🐻`.
|
||||||
11:12 channel = 🐻
|
11:12 channel = 🐻
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
|
insta::assert_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
|
||||||
Was expecting a value but instead got `🐻`.
|
Was expecting a value but instead got `🐻`.
|
||||||
11:12 channel = 🐻 AND followers < 100
|
11:12 channel = 🐻 AND followers < 100
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
insta::assert_snapshot!(p("'OR'"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||||
1:5 'OR'
|
1:5 'OR'
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("OR"), @r###"
|
insta::assert_snapshot!(p("OR"), @r###"
|
||||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||||
1:3 OR
|
1:3 OR
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
insta::assert_snapshot!(p("channel Ponce"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||||
1:14 channel Ponce
|
1:14 channel Ponce
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||||
19:19 channel = Ponce OR
|
19:19 channel = Ponce OR
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geoRadius"), @r###"
|
insta::assert_snapshot!(p("_geoRadius"), @r###"
|
||||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||||
1:11 _geoRadius
|
1:11 _geoRadius
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###"
|
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
|
||||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||||
1:16 _geoRadius = 12
|
1:16 _geoRadius = 12
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###"
|
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
|
||||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||||
1:16 _geoBoundingBox
|
1:16 _geoBoundingBox
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###"
|
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
|
||||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||||
1:21 _geoBoundingBox = 12
|
1:21 _geoBoundingBox = 12
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
|
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
|
||||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||||
1:26 _geoBoundingBox(1.0, 1.0)
|
1:26 _geoBoundingBox(1.0, 1.0)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||||
1:22 _geoPoint(12, 13, 14)
|
1:22 _geoPoint(12, 13, 14)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
||||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||||
13:34 position <= _geoPoint(12, 13, 14)
|
13:34 position <= _geoPoint(12, 13, 14)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
|
||||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||||
1:25 _geoDistance(12, 13, 14)
|
1:25 _geoDistance(12, 13, 14)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
|
||||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||||
13:37 position <= _geoDistance(12, 13, 14)
|
13:37 position <= _geoDistance(12, 13, 14)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("_geo(12, 13, 14)"), @r###"
|
||||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||||
1:17 _geo(12, 13, 14)
|
1:17 _geo(12, 13, 14)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
|
||||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||||
13:29 position <= _geo(12, 13, 14)
|
13:29 position <= _geo(12, 13, 14)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
||||||
The `_geoRadius` filter is an operation and can't be used as a value.
|
The `_geoRadius` filter is an operation and can't be used as a value.
|
||||||
13:35 position <= _geoRadius(12, 13, 14)
|
13:35 position <= _geoRadius(12, 13, 14)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = 'ponce"), @r###"
|
insta::assert_snapshot!(p("channel = 'ponce"), @r###"
|
||||||
Expression `\'ponce` is missing the following closing delimiter: `'`.
|
Expression `\'ponce` is missing the following closing delimiter: `'`.
|
||||||
11:17 channel = 'ponce
|
11:17 channel = 'ponce
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = \"ponce"), @r###"
|
insta::assert_snapshot!(p("channel = \"ponce"), @r###"
|
||||||
Expression `\"ponce` is missing the following closing delimiter: `"`.
|
Expression `\"ponce` is missing the following closing delimiter: `"`.
|
||||||
11:17 channel = "ponce
|
11:17 channel = "ponce
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
|
insta::assert_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
|
||||||
Expression `(followers >= 1000` is missing the following closing delimiter: `)`.
|
Expression `(followers >= 1000` is missing the following closing delimiter: `)`.
|
||||||
17:35 channel = mv OR (followers >= 1000
|
17:35 channel = mv OR (followers >= 1000
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
|
insta::assert_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
|
||||||
Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule.
|
Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule.
|
||||||
34:35 channel = mv OR followers >= 1000)
|
34:35 channel = mv OR followers >= 1000)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||||
1:17 colour NOT EXIST
|
1:17 colour NOT EXIST
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||||
1:23 subscribers 100 TO1000
|
1:23 subscribers 100 TO1000
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
||||||
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
||||||
17:44 channel = ponce ORdog != 'bernese mountain'
|
17:44 channel = ponce ORdog != 'bernese mountain'
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###"
|
insta::assert_snapshot!(p("colour IN blue, green]"), @r###"
|
||||||
Expected `[` after `IN` keyword.
|
Expected `[` after `IN` keyword.
|
||||||
11:23 colour IN blue, green]
|
11:23 colour IN blue, green]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
|
insta::assert_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
|
||||||
Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`.
|
Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`.
|
||||||
32:36 colour IN [blue, green, 'blue' > 2]
|
32:36 colour IN [blue, green, 'blue' > 2]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###"
|
insta::assert_snapshot!(p("colour IN [blue, green, AND]"), @r###"
|
||||||
Expected only comma-separated field names inside `IN[..]` but instead found `AND]`.
|
Expected only comma-separated field names inside `IN[..]` but instead found `AND]`.
|
||||||
25:29 colour IN [blue, green, AND]
|
25:29 colour IN [blue, green, AND]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###"
|
insta::assert_snapshot!(p("colour IN [blue, green"), @r###"
|
||||||
Expected matching `]` after the list of field names given to `IN[`
|
Expected matching `]` after the list of field names given to `IN[`
|
||||||
23:23 colour IN [blue, green
|
23:23 colour IN [blue, green
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###"
|
insta::assert_snapshot!(p("colour IN ['blue, green"), @r###"
|
||||||
Expression `\'blue, green` is missing the following closing delimiter: `'`.
|
Expression `\'blue, green` is missing the following closing delimiter: `'`.
|
||||||
12:24 colour IN ['blue, green
|
12:24 colour IN ['blue, green
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("x = EXISTS"), @r###"
|
insta::assert_snapshot!(p("x = EXISTS"), @r###"
|
||||||
Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes.
|
Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes.
|
||||||
5:11 x = EXISTS
|
5:11 x = EXISTS
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("AND = 8"), @r###"
|
insta::assert_snapshot!(p("AND = 8"), @r###"
|
||||||
Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes.
|
Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes.
|
||||||
1:4 AND = 8
|
1:4 AND = 8
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
|
insta::assert_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
|
||||||
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
||||||
51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))
|
51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(
|
insta::assert_snapshot!(
|
||||||
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
||||||
@r###"
|
@r###"
|
||||||
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
||||||
@@ -872,41 +913,41 @@ pub mod tests {
|
|||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
|
||||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
|
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||||
1:11 value NULL
|
1:11 value NULL
|
||||||
"###);
|
"###);
|
||||||
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
|
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||||
1:15 value NOT NULL
|
1:15 value NOT NULL
|
||||||
"###);
|
"###);
|
||||||
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
|
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||||
1:12 value EMPTY
|
1:12 value EMPTY
|
||||||
"###);
|
"###);
|
||||||
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||||
1:16 value NOT EMPTY
|
1:16 value NOT EMPTY
|
||||||
"###);
|
"###);
|
||||||
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||||
1:9 value IS
|
1:9 value IS
|
||||||
"###);
|
"###);
|
||||||
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||||
1:13 value IS NOT
|
1:13 value IS NOT
|
||||||
"###);
|
"###);
|
||||||
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||||
1:16 value IS EXISTS
|
1:16 value IS EXISTS
|
||||||
"###);
|
"###);
|
||||||
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||||
1:20 value IS NOT EXISTS
|
1:20 value IS NOT EXISTS
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -211,6 +211,7 @@ fn is_keyword(s: &str) -> bool {
|
|||||||
| "IS"
|
| "IS"
|
||||||
| "NULL"
|
| "NULL"
|
||||||
| "EMPTY"
|
| "EMPTY"
|
||||||
|
| "CONTAINS"
|
||||||
| "_geoRadius"
|
| "_geoRadius"
|
||||||
| "_geoBoundingBox"
|
| "_geoBoundingBox"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ license.workspace = true
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||||
clap = { version = "4.4.17", features = ["derive"] }
|
clap = { version = "4.5.9", features = ["derive"] }
|
||||||
fastrand = "2.0.1"
|
fastrand = "2.1.0"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
serde = { version = "1.0.195", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
tempfile = "3.9.0"
|
tempfile = "3.10.1"
|
||||||
|
|||||||
@@ -11,36 +11,38 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.86"
|
||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
derive_builder = "0.12.0"
|
derive_builder = "0.20.0"
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
enum-iterator = "1.5.0"
|
enum-iterator = "2.1.0"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
flate2 = "1.0.28"
|
flate2 = "1.0.30"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
page_size = "0.5.0"
|
page_size = "0.6.0"
|
||||||
rayon = "1.8.1"
|
rayon = "1.10.0"
|
||||||
roaring = { version = "0.10.2", features = ["serde"] }
|
roaring = { version = "0.10.6", features = ["serde"] }
|
||||||
serde = { version = "1.0.195", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
synchronoise = "1.0.1"
|
synchronoise = "1.0.1"
|
||||||
tempfile = "3.9.0"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.56"
|
thiserror = "1.0.61"
|
||||||
time = { version = "0.3.31", features = [
|
time = { version = "0.3.36", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
"parsing",
|
"parsing",
|
||||||
"macros",
|
"macros",
|
||||||
] }
|
] }
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = "2.9.7"
|
ureq = "2.10.0"
|
||||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
arroy = "0.4.0"
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
crossbeam = "0.8.4"
|
crossbeam = "0.8.4"
|
||||||
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||||
|
maplit = "1.0.2"
|
||||||
meili-snap = { path = "../meili-snap" }
|
meili-snap = { path = "../meili-snap" }
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ enum AutobatchKind {
|
|||||||
allow_index_creation: bool,
|
allow_index_creation: bool,
|
||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
},
|
},
|
||||||
|
DocumentEdition,
|
||||||
DocumentDeletion,
|
DocumentDeletion,
|
||||||
DocumentDeletionByFilter,
|
DocumentDeletionByFilter,
|
||||||
DocumentClear,
|
DocumentClear,
|
||||||
@@ -63,6 +64,7 @@ impl From<KindWithContent> for AutobatchKind {
|
|||||||
primary_key,
|
primary_key,
|
||||||
..
|
..
|
||||||
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
||||||
|
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||||
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
|
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
|
||||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||||
@@ -98,6 +100,9 @@ pub enum BatchKind {
|
|||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
operation_ids: Vec<TaskId>,
|
operation_ids: Vec<TaskId>,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
id: TaskId,
|
||||||
|
},
|
||||||
DocumentDeletion {
|
DocumentDeletion {
|
||||||
deletion_ids: Vec<TaskId>,
|
deletion_ids: Vec<TaskId>,
|
||||||
},
|
},
|
||||||
@@ -199,6 +204,7 @@ impl BatchKind {
|
|||||||
}),
|
}),
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
),
|
),
|
||||||
|
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||||
K::DocumentDeletion => {
|
K::DocumentDeletion => {
|
||||||
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
|
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
|
||||||
}
|
}
|
||||||
@@ -222,7 +228,7 @@ impl BatchKind {
|
|||||||
|
|
||||||
match (self, kind) {
|
match (self, kind) {
|
||||||
// We don't batch any of these operations
|
// We don't batch any of these operations
|
||||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
|
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this),
|
||||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||||
Break(this)
|
Break(this)
|
||||||
@@ -519,6 +525,7 @@ impl BatchKind {
|
|||||||
| BatchKind::IndexDeletion { .. }
|
| BatchKind::IndexDeletion { .. }
|
||||||
| BatchKind::IndexUpdate { .. }
|
| BatchKind::IndexUpdate { .. }
|
||||||
| BatchKind::IndexSwap { .. }
|
| BatchKind::IndexSwap { .. }
|
||||||
|
| BatchKind::DocumentEdition { .. }
|
||||||
| BatchKind::DocumentDeletionByFilter { .. },
|
| BatchKind::DocumentDeletionByFilter { .. },
|
||||||
_,
|
_,
|
||||||
) => {
|
) => {
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ use meilisearch_types::milli::update::{
|
|||||||
use meilisearch_types::milli::vector::parsed_vectors::{
|
use meilisearch_types::milli::vector::parsed_vectors::{
|
||||||
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
||||||
};
|
};
|
||||||
use meilisearch_types::milli::{self, Filter};
|
use meilisearch_types::milli::{self, Filter, Object};
|
||||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||||
@@ -106,6 +106,10 @@ pub(crate) enum IndexOperation {
|
|||||||
operations: Vec<DocumentOperation>,
|
operations: Vec<DocumentOperation>,
|
||||||
tasks: Vec<Task>,
|
tasks: Vec<Task>,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
index_uid: String,
|
||||||
|
task: Task,
|
||||||
|
},
|
||||||
IndexDocumentDeletionByFilter {
|
IndexDocumentDeletionByFilter {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
task: Task,
|
task: Task,
|
||||||
@@ -164,7 +168,8 @@ impl Batch {
|
|||||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||||
}
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
|
IndexOperation::DocumentEdition { task, .. }
|
||||||
|
| IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
|
||||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||||
}
|
}
|
||||||
IndexOperation::SettingsAndDocumentOperation {
|
IndexOperation::SettingsAndDocumentOperation {
|
||||||
@@ -228,6 +233,7 @@ impl IndexOperation {
|
|||||||
pub fn index_uid(&self) -> &str {
|
pub fn index_uid(&self) -> &str {
|
||||||
match self {
|
match self {
|
||||||
IndexOperation::DocumentOperation { index_uid, .. }
|
IndexOperation::DocumentOperation { index_uid, .. }
|
||||||
|
| IndexOperation::DocumentEdition { index_uid, .. }
|
||||||
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
||||||
| IndexOperation::DocumentClear { index_uid, .. }
|
| IndexOperation::DocumentClear { index_uid, .. }
|
||||||
| IndexOperation::Settings { index_uid, .. }
|
| IndexOperation::Settings { index_uid, .. }
|
||||||
@@ -243,6 +249,9 @@ impl fmt::Display for IndexOperation {
|
|||||||
IndexOperation::DocumentOperation { .. } => {
|
IndexOperation::DocumentOperation { .. } => {
|
||||||
f.write_str("IndexOperation::DocumentOperation")
|
f.write_str("IndexOperation::DocumentOperation")
|
||||||
}
|
}
|
||||||
|
IndexOperation::DocumentEdition { .. } => {
|
||||||
|
f.write_str("IndexOperation::DocumentEdition")
|
||||||
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
||||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
||||||
}
|
}
|
||||||
@@ -295,6 +304,21 @@ impl IndexScheduler {
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
BatchKind::DocumentEdition { id } => {
|
||||||
|
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
|
match &task.kind {
|
||||||
|
KindWithContent::DocumentEdition { index_uid, .. } => {
|
||||||
|
Ok(Some(Batch::IndexOperation {
|
||||||
|
op: IndexOperation::DocumentEdition {
|
||||||
|
index_uid: index_uid.clone(),
|
||||||
|
task,
|
||||||
|
},
|
||||||
|
must_create_index: false,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
BatchKind::DocumentOperation { method, operation_ids, .. } => {
|
BatchKind::DocumentOperation { method, operation_ids, .. } => {
|
||||||
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
|
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
|
||||||
let primary_key = tasks
|
let primary_key = tasks
|
||||||
@@ -909,6 +933,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||||
|
|
||||||
// 3.1. Dump the documents
|
// 3.1. Dump the documents
|
||||||
for ret in index.all_documents(&rtxn)? {
|
for ret in index.all_documents(&rtxn)? {
|
||||||
@@ -951,16 +976,21 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, embeddings) in embeddings {
|
for (embedder_name, embeddings) in embeddings {
|
||||||
// don't change the entry if it already exists, because it was user-provided
|
let user_provided = embedding_configs
|
||||||
vectors.entry(embedder_name).or_insert_with(|| {
|
.iter()
|
||||||
let embeddings = ExplicitVectors {
|
.find(|conf| conf.name == embedder_name)
|
||||||
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
|
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||||
embeddings,
|
|
||||||
),
|
let embeddings = ExplicitVectors {
|
||||||
user_provided: false,
|
embeddings: Some(
|
||||||
};
|
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||||
serde_json::to_value(embeddings).unwrap()
|
),
|
||||||
});
|
regenerate: !user_provided,
|
||||||
|
};
|
||||||
|
vectors.insert(
|
||||||
|
embedder_name,
|
||||||
|
serde_json::to_value(embeddings).unwrap(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1251,6 +1281,7 @@ impl IndexScheduler {
|
|||||||
operations,
|
operations,
|
||||||
mut tasks,
|
mut tasks,
|
||||||
} => {
|
} => {
|
||||||
|
let started_processing_at = std::time::Instant::now();
|
||||||
let mut primary_key_has_been_set = false;
|
let mut primary_key_has_been_set = false;
|
||||||
let must_stop_processing = self.must_stop_processing.clone();
|
let must_stop_processing = self.must_stop_processing.clone();
|
||||||
let indexer_config = self.index_mapper.indexer_config();
|
let indexer_config = self.index_mapper.indexer_config();
|
||||||
@@ -1365,7 +1396,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||||
let addition = builder.execute()?;
|
let addition = builder.execute()?;
|
||||||
tracing::info!(indexing_result = ?addition, "document indexing done");
|
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||||
} else if primary_key_has_been_set {
|
} else if primary_key_has_been_set {
|
||||||
// Everything failed but we've set a primary key.
|
// Everything failed but we've set a primary key.
|
||||||
// We need to remove it.
|
// We need to remove it.
|
||||||
@@ -1380,6 +1411,64 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
|
IndexOperation::DocumentEdition { mut task, .. } => {
|
||||||
|
let (filter, context, function) =
|
||||||
|
if let KindWithContent::DocumentEdition {
|
||||||
|
filter_expr, context, function, ..
|
||||||
|
} = &task.kind
|
||||||
|
{
|
||||||
|
(filter_expr, context, function)
|
||||||
|
} else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
let result_count = edit_documents_by_function(
|
||||||
|
index_wtxn,
|
||||||
|
filter,
|
||||||
|
context.clone(),
|
||||||
|
function,
|
||||||
|
self.index_mapper.indexer_config(),
|
||||||
|
self.must_stop_processing.clone(),
|
||||||
|
index,
|
||||||
|
);
|
||||||
|
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
..
|
||||||
|
}) = task.details
|
||||||
|
{
|
||||||
|
(original_filter, context, function)
|
||||||
|
} else {
|
||||||
|
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
|
||||||
|
match result_count {
|
||||||
|
Ok((deleted_documents, edited_documents)) => {
|
||||||
|
task.status = Status::Succeeded;
|
||||||
|
task.details = Some(Details::DocumentEdition {
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
deleted_documents: Some(deleted_documents),
|
||||||
|
edited_documents: Some(edited_documents),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
task.status = Status::Failed;
|
||||||
|
task.details = Some(Details::DocumentEdition {
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
deleted_documents: Some(0),
|
||||||
|
edited_documents: Some(0),
|
||||||
|
});
|
||||||
|
task.error = Some(e.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(vec![task])
|
||||||
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
||||||
let filter =
|
let filter =
|
||||||
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
||||||
@@ -1668,3 +1757,44 @@ fn delete_document_by_filter<'a>(
|
|||||||
0
|
0
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn edit_documents_by_function<'a>(
|
||||||
|
wtxn: &mut RwTxn<'a>,
|
||||||
|
filter: &Option<serde_json::Value>,
|
||||||
|
context: Option<Object>,
|
||||||
|
code: &str,
|
||||||
|
indexer_config: &IndexerConfig,
|
||||||
|
must_stop_processing: MustStopProcessing,
|
||||||
|
index: &'a Index,
|
||||||
|
) -> Result<(u64, u64)> {
|
||||||
|
let candidates = match filter.as_ref().map(Filter::from_json) {
|
||||||
|
Some(Ok(Some(filter))) => filter.evaluate(wtxn, index).map_err(|err| match err {
|
||||||
|
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||||
|
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
|
||||||
|
}
|
||||||
|
e => e.into(),
|
||||||
|
})?,
|
||||||
|
None | Some(Ok(None)) => index.documents_ids(wtxn)?,
|
||||||
|
Some(Err(e)) => return Err(e.into()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let config = IndexDocumentsConfig {
|
||||||
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut builder = milli::update::IndexDocuments::new(
|
||||||
|
wtxn,
|
||||||
|
index,
|
||||||
|
indexer_config,
|
||||||
|
config,
|
||||||
|
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||||
|
|| must_stop_processing.get(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let (new_builder, count) = builder.edit_documents(&candidates, context, code)?;
|
||||||
|
builder = new_builder;
|
||||||
|
|
||||||
|
let _ = builder.execute()?;
|
||||||
|
Ok(count.unwrap())
|
||||||
|
}
|
||||||
|
|||||||
@@ -68,6 +68,32 @@ impl RoFeatures {
|
|||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
|
||||||
|
if self.runtime.edit_documents_by_function {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(FeatureNotEnabledError {
|
||||||
|
disabled_action,
|
||||||
|
feature: "edit documents by function",
|
||||||
|
issue_link: "https://github.com/orgs/meilisearch/discussions/762",
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn check_contains_filter(&self) -> Result<()> {
|
||||||
|
if self.runtime.contains_filter {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(FeatureNotEnabledError {
|
||||||
|
disabled_action: "Using `CONTAINS` in a filter",
|
||||||
|
feature: "contains filter",
|
||||||
|
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureData {
|
impl FeatureData {
|
||||||
@@ -79,9 +105,11 @@ impl FeatureData {
|
|||||||
let txn = env.read_txn()?;
|
let txn = env.read_txn()?;
|
||||||
let persisted_features: RuntimeTogglableFeatures =
|
let persisted_features: RuntimeTogglableFeatures =
|
||||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||||
|
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||||
metrics: instance_features.metrics || persisted_features.metrics,
|
metrics: metrics || persisted_features.metrics,
|
||||||
logs_route: instance_features.logs_route || persisted_features.logs_route,
|
logs_route: logs_route || persisted_features.logs_route,
|
||||||
|
contains_filter: contains_filter || persisted_features.contains_filter,
|
||||||
..persisted_features
|
..persisted_features
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|||||||
@@ -177,6 +177,17 @@ fn snapshot_details(d: &Details) -> String {
|
|||||||
} => {
|
} => {
|
||||||
format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}")
|
format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}")
|
||||||
}
|
}
|
||||||
|
Details::DocumentEdition {
|
||||||
|
deleted_documents,
|
||||||
|
edited_documents,
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
} => {
|
||||||
|
format!(
|
||||||
|
"{{ deleted_documents: {deleted_documents:?}, edited_documents: {edited_documents:?}, context: {context:?}, function: {function:?}, original_filter: {original_filter:?} }}"
|
||||||
|
)
|
||||||
|
}
|
||||||
Details::SettingsUpdate { settings } => {
|
Details::SettingsUpdate { settings } => {
|
||||||
format!("{{ settings: {settings:?} }}")
|
format!("{{ settings: {settings:?} }}")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ use meilisearch_types::heed::byteorder::BE;
|
|||||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||||
|
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
@@ -661,7 +662,11 @@ impl IndexScheduler {
|
|||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
self.index_mapper.index(&rtxn, name)
|
self.index_mapper.index(&rtxn, name)
|
||||||
}
|
}
|
||||||
|
/// Return the boolean referring if index exists.
|
||||||
|
pub fn index_exists(&self, name: &str) -> Result<bool> {
|
||||||
|
let rtxn = self.env.read_txn()?;
|
||||||
|
self.index_mapper.index_exists(&rtxn, name)
|
||||||
|
}
|
||||||
/// Return the name of all indexes without opening them.
|
/// Return the name of all indexes without opening them.
|
||||||
pub fn index_names(&self) -> Result<Vec<String>> {
|
pub fn index_names(&self) -> Result<Vec<String>> {
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
@@ -1459,33 +1464,39 @@ impl IndexScheduler {
|
|||||||
// TODO: consider using a type alias or a struct embedder/template
|
// TODO: consider using a type alias or a struct embedder/template
|
||||||
pub fn embedders(
|
pub fn embedders(
|
||||||
&self,
|
&self,
|
||||||
embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
|
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||||
) -> Result<EmbeddingConfigs> {
|
) -> Result<EmbeddingConfigs> {
|
||||||
let res: Result<_> = embedding_configs
|
let res: Result<_> = embedding_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
|
.map(
|
||||||
let prompt =
|
|IndexEmbeddingConfig {
|
||||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
name,
|
||||||
// optimistically return existing embedder
|
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
|
||||||
{
|
..
|
||||||
let embedders = self.embedders.read().unwrap();
|
}| {
|
||||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
let prompt =
|
||||||
return Ok((name, (embedder.clone(), prompt)));
|
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||||
|
// optimistically return existing embedder
|
||||||
|
{
|
||||||
|
let embedders = self.embedders.read().unwrap();
|
||||||
|
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||||
|
return Ok((name, (embedder.clone(), prompt)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// add missing embedder
|
// add missing embedder
|
||||||
let embedder = Arc::new(
|
let embedder = Arc::new(
|
||||||
Embedder::new(embedder_options.clone())
|
Embedder::new(embedder_options.clone())
|
||||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||||
.map_err(meilisearch_types::milli::Error::from)?,
|
.map_err(meilisearch_types::milli::Error::from)?,
|
||||||
);
|
);
|
||||||
{
|
{
|
||||||
let mut embedders = self.embedders.write().unwrap();
|
let mut embedders = self.embedders.write().unwrap();
|
||||||
embedders.insert(embedder_options, embedder.clone());
|
embedders.insert(embedder_options, embedder.clone());
|
||||||
}
|
}
|
||||||
Ok((name, (embedder, prompt)))
|
Ok((name, (embedder, prompt)))
|
||||||
})
|
},
|
||||||
|
)
|
||||||
.collect();
|
.collect();
|
||||||
res.map(EmbeddingConfigs::new)
|
res.map(EmbeddingConfigs::new)
|
||||||
}
|
}
|
||||||
@@ -1596,6 +1607,14 @@ impl<'a> Dump<'a> {
|
|||||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
KindDump::DocumentEdition { filter, context, function } => {
|
||||||
|
KindWithContent::DocumentEdition {
|
||||||
|
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||||
|
filter_expr: filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
}
|
||||||
|
}
|
||||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||||
},
|
},
|
||||||
@@ -1748,6 +1767,9 @@ mod tests {
|
|||||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||||
ReplaceDocuments, UpdateDocuments,
|
ReplaceDocuments, UpdateDocuments,
|
||||||
};
|
};
|
||||||
|
use meilisearch_types::milli::update::Setting;
|
||||||
|
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||||
|
use meilisearch_types::settings::Unchecked;
|
||||||
use meilisearch_types::tasks::IndexSwap;
|
use meilisearch_types::tasks::IndexSwap;
|
||||||
use meilisearch_types::VERSION_FILE_NAME;
|
use meilisearch_types::VERSION_FILE_NAME;
|
||||||
use tempfile::{NamedTempFile, TempDir};
|
use tempfile::{NamedTempFile, TempDir};
|
||||||
@@ -1801,7 +1823,7 @@ mod tests {
|
|||||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
enable_mdb_writemap: false,
|
enable_mdb_writemap: false,
|
||||||
index_growth_amount: 1000 * 1000, // 1 MB
|
index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB
|
||||||
index_count: 5,
|
index_count: 5,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
autobatching_enabled: true,
|
autobatching_enabled: true,
|
||||||
@@ -1826,6 +1848,7 @@ mod tests {
|
|||||||
assert_eq!(breakpoint, (Init, false));
|
assert_eq!(breakpoint, (Init, false));
|
||||||
let index_scheduler_handle = IndexSchedulerHandle {
|
let index_scheduler_handle = IndexSchedulerHandle {
|
||||||
_tempdir: tempdir,
|
_tempdir: tempdir,
|
||||||
|
index_scheduler: index_scheduler.private_clone(),
|
||||||
test_breakpoint_rcv: receiver,
|
test_breakpoint_rcv: receiver,
|
||||||
last_breakpoint: breakpoint.0,
|
last_breakpoint: breakpoint.0,
|
||||||
};
|
};
|
||||||
@@ -1914,6 +1937,7 @@ mod tests {
|
|||||||
|
|
||||||
pub struct IndexSchedulerHandle {
|
pub struct IndexSchedulerHandle {
|
||||||
_tempdir: TempDir,
|
_tempdir: TempDir,
|
||||||
|
index_scheduler: IndexScheduler,
|
||||||
test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
|
test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
|
||||||
last_breakpoint: Breakpoint,
|
last_breakpoint: Breakpoint,
|
||||||
}
|
}
|
||||||
@@ -1931,9 +1955,13 @@ mod tests {
|
|||||||
{
|
{
|
||||||
Ok(b) => b,
|
Ok(b) => b,
|
||||||
Err(RecvTimeoutError::Timeout) => {
|
Err(RecvTimeoutError::Timeout) => {
|
||||||
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
|
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||||
|
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}")
|
||||||
|
}
|
||||||
|
Err(RecvTimeoutError::Disconnected) => {
|
||||||
|
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||||
|
panic!("The scheduler crashed.\n{state}")
|
||||||
}
|
}
|
||||||
Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."),
|
|
||||||
};
|
};
|
||||||
// if we've already encountered a breakpoint we're supposed to be stuck on the false
|
// if we've already encountered a breakpoint we're supposed to be stuck on the false
|
||||||
// and we expect the same variant with the true to come now.
|
// and we expect the same variant with the true to come now.
|
||||||
@@ -1952,9 +1980,13 @@ mod tests {
|
|||||||
{
|
{
|
||||||
Ok(b) => b,
|
Ok(b) => b,
|
||||||
Err(RecvTimeoutError::Timeout) => {
|
Err(RecvTimeoutError::Timeout) => {
|
||||||
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
|
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||||
|
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}")
|
||||||
|
}
|
||||||
|
Err(RecvTimeoutError::Disconnected) => {
|
||||||
|
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||||
|
panic!("The scheduler crashed.\n{state}")
|
||||||
}
|
}
|
||||||
Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."),
|
|
||||||
};
|
};
|
||||||
assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite");
|
assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite");
|
||||||
|
|
||||||
@@ -1968,9 +2000,10 @@ mod tests {
|
|||||||
fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) {
|
fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) {
|
||||||
for breakpoint in breakpoints {
|
for breakpoint in breakpoints {
|
||||||
let b = self.advance();
|
let b = self.advance();
|
||||||
|
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
b, breakpoint,
|
b, breakpoint,
|
||||||
"Was expecting the breakpoint `{:?}` but instead got `{:?}`.",
|
"Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{state}",
|
||||||
breakpoint, b
|
breakpoint, b
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -1995,6 +2028,7 @@ mod tests {
|
|||||||
// Wait for one successful batch.
|
// Wait for one successful batch.
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
fn advance_one_successful_batch(&mut self) {
|
fn advance_one_successful_batch(&mut self) {
|
||||||
|
self.index_scheduler.assert_internally_consistent();
|
||||||
self.advance_till([Start, BatchCreated]);
|
self.advance_till([Start, BatchCreated]);
|
||||||
loop {
|
loop {
|
||||||
match self.advance() {
|
match self.advance() {
|
||||||
@@ -2003,13 +2037,17 @@ mod tests {
|
|||||||
InsideProcessBatch => (),
|
InsideProcessBatch => (),
|
||||||
// the batch went successfully, we can stop the loop and go on with the next states.
|
// the batch went successfully, we can stop the loop and go on with the next states.
|
||||||
ProcessBatchSucceeded => break,
|
ProcessBatchSucceeded => break,
|
||||||
AbortedIndexation => panic!("The batch was aborted."),
|
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
||||||
ProcessBatchFailed => panic!("The batch failed."),
|
ProcessBatchFailed => {
|
||||||
|
while self.advance() != Start {}
|
||||||
|
panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler))
|
||||||
|
},
|
||||||
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.advance_till([AfterProcessing]);
|
self.advance_till([AfterProcessing]);
|
||||||
|
self.index_scheduler.assert_internally_consistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for one failed batch.
|
// Wait for one failed batch.
|
||||||
@@ -2023,8 +2061,8 @@ mod tests {
|
|||||||
InsideProcessBatch => (),
|
InsideProcessBatch => (),
|
||||||
// the batch went failed, we can stop the loop and go on with the next states.
|
// the batch went failed, we can stop the loop and go on with the next states.
|
||||||
ProcessBatchFailed => break,
|
ProcessBatchFailed => break,
|
||||||
ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)"),
|
ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
||||||
AbortedIndexation => panic!("The batch was aborted."),
|
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
||||||
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3052,8 +3090,10 @@ mod tests {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
let (_, embedding_config) = configs.first().unwrap();
|
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
|
||||||
insta::assert_json_snapshot!(embedding_config.embedder_options);
|
insta::assert_snapshot!(name, @"default");
|
||||||
|
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||||
|
insta::assert_json_snapshot!(config.embedder_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -3759,15 +3799,15 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks");
|
||||||
|
|
||||||
// The index should not exists.
|
// The index should not exist.
|
||||||
snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found.");
|
snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_document_addition_cant_create_index_without_index_without_autobatching() {
|
fn test_document_addition_cant_create_index_without_index_without_autobatching() {
|
||||||
// We're going to execute multiple document addition that don't have
|
// We're going to execute multiple document addition that don't have
|
||||||
// the right to create an index while there is no index currently.
|
// the right to create an index while there is no index currently.
|
||||||
// Since the autobatching is disabled, every tasks should be processed
|
// Since the auto-batching is disabled, every task should be processed
|
||||||
// sequentially and throw an IndexDoesNotExists.
|
// sequentially and throw an IndexDoesNotExists.
|
||||||
let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]);
|
let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]);
|
||||||
|
|
||||||
@@ -3809,8 +3849,8 @@ mod tests {
|
|||||||
handle.advance_n_failed_batches(5);
|
handle.advance_n_failed_batches(5);
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed");
|
||||||
|
|
||||||
// The index should not exists.
|
// The index should not exist.
|
||||||
snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found.");
|
snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -4716,6 +4756,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
@@ -4747,6 +4788,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
@@ -4785,6 +4827,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
@@ -4824,6 +4867,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
@@ -4989,7 +5033,6 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
|
||||||
|
|
||||||
@@ -5000,7 +5043,7 @@ mod tests {
|
|||||||
insta::assert_json_snapshot!(task.details);
|
insta::assert_json_snapshot!(task.details);
|
||||||
}
|
}
|
||||||
|
|
||||||
handle.advance_n_successful_batches(1);
|
handle.advance_one_successful_batch();
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -5017,13 +5060,17 @@ mod tests {
|
|||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
// for consistency with the below
|
// for consistency with the below
|
||||||
#[allow(clippy::get_first)]
|
#[allow(clippy::get_first)]
|
||||||
let (name, fakerest_config) = configs.get(0).unwrap();
|
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
|
||||||
insta::assert_json_snapshot!(name, @r###""A_fakerest""###);
|
configs.get(0).unwrap();
|
||||||
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
|
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||||
let fakerest_name = name.clone();
|
let fakerest_name = name.clone();
|
||||||
|
|
||||||
let (name, simple_hf_config) = configs.get(1).unwrap();
|
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
|
||||||
insta::assert_json_snapshot!(name, @r###""B_small_hf""###);
|
configs.get(1).unwrap();
|
||||||
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
|
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||||
let simple_hf_name = name.clone();
|
let simple_hf_name = name.clone();
|
||||||
|
|
||||||
@@ -5038,25 +5085,25 @@ mod tests {
|
|||||||
// add one doc, specifying vectors
|
// add one doc, specifying vectors
|
||||||
|
|
||||||
let doc = serde_json::json!(
|
let doc = serde_json::json!(
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 0,
|
||||||
"doggo": "Intel",
|
"doggo": "Intel",
|
||||||
"breed": "beagle",
|
"breed": "beagle",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
&fakerest_name: {
|
&fakerest_name: {
|
||||||
// this will never trigger regeneration, which is good because we can't actually generate with
|
// this will never trigger regeneration, which is good because we can't actually generate with
|
||||||
// this embedder
|
// this embedder
|
||||||
"userProvided": true,
|
"regenerate": false,
|
||||||
"embeddings": beagle_embed,
|
"embeddings": beagle_embed,
|
||||||
},
|
},
|
||||||
&simple_hf_name: {
|
&simple_hf_name: {
|
||||||
// this will be regenerated on updates
|
// this will be regenerated on updates
|
||||||
"userProvided": false,
|
"regenerate": true,
|
||||||
"embeddings": lab_embed,
|
"embeddings": lab_embed,
|
||||||
},
|
},
|
||||||
"noise": [0.1, 0.2, 0.3]
|
"noise": [0.1, 0.2, 0.3]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap();
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap();
|
||||||
@@ -5078,7 +5125,6 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
|
||||||
|
|
||||||
@@ -5091,6 +5137,19 @@ mod tests {
|
|||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
// Ensure the document have been inserted into the relevant bitamp
|
||||||
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
|
// for consistency with the below
|
||||||
|
#[allow(clippy::get_first)]
|
||||||
|
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||||
|
configs.get(0).unwrap();
|
||||||
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||||
|
|
||||||
|
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||||
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
|
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||||
|
|
||||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||||
|
|
||||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||||
@@ -5140,7 +5199,6 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
||||||
|
|
||||||
@@ -5153,11 +5211,25 @@ mod tests {
|
|||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
// Ensure the document have been inserted into the relevant bitamp
|
||||||
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
|
// for consistency with the below
|
||||||
|
#[allow(clippy::get_first)]
|
||||||
|
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||||
|
configs.get(0).unwrap();
|
||||||
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||||
|
|
||||||
|
let IndexEmbeddingConfig { name, config: _, user_provided } =
|
||||||
|
configs.get(1).unwrap();
|
||||||
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
|
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||||
|
|
||||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||||
|
|
||||||
// automatically changed to patou
|
// automatically changed to patou because set to regenerate
|
||||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
||||||
// remained beagle because set to userProvided
|
// remained beagle
|
||||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||||
|
|
||||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||||
@@ -5176,4 +5248,578 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn import_vectors_first_and_embedder_later() {
|
||||||
|
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||||
|
|
||||||
|
let content = serde_json::json!(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"doggo": "kefir",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"doggo": "intel",
|
||||||
|
"_vectors": {
|
||||||
|
"my_doggo_embedder": vec![1; 384],
|
||||||
|
"unknown embedder": vec![1, 2, 3],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"doggo": "max",
|
||||||
|
"_vectors": {
|
||||||
|
"my_doggo_embedder": {
|
||||||
|
"regenerate": false,
|
||||||
|
"embeddings": vec![2; 384],
|
||||||
|
},
|
||||||
|
"unknown embedder": vec![4, 5],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"doggo": "marcel",
|
||||||
|
"_vectors": {
|
||||||
|
"my_doggo_embedder": {
|
||||||
|
"regenerate": true,
|
||||||
|
"embeddings": vec![3; 384],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"doggo": "sora",
|
||||||
|
"_vectors": {
|
||||||
|
"my_doggo_embedder": {
|
||||||
|
"regenerate": true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
|
||||||
|
let documents_count =
|
||||||
|
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||||
|
.unwrap();
|
||||||
|
snapshot!(documents_count, @"5");
|
||||||
|
file.persist().unwrap();
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentAdditionOrUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
primary_key: None,
|
||||||
|
method: ReplaceDocuments,
|
||||||
|
content_file: uuid,
|
||||||
|
documents_count,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
|
||||||
|
|
||||||
|
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||||
|
embedders: Setting::Set(maplit::btreemap! {
|
||||||
|
S("my_doggo_embedder") => Setting::Set(EmbeddingSettings {
|
||||||
|
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||||
|
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||||
|
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||||
|
document_template: Setting::Set(S("{{doc.doggo}}")),
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::SettingsUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
new_settings: Box::new(setting),
|
||||||
|
is_deletion: false,
|
||||||
|
allow_index_creation: false,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
index_scheduler.assert_internally_consistent();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
index_scheduler.assert_internally_consistent();
|
||||||
|
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
// the all the vectors linked to the new specified embedder have been removed
|
||||||
|
// Only the unknown embedders stays in the document DB
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||||
|
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||||
|
// even though we specified the vector for the ID 3, it shouldn't be marked
|
||||||
|
// as user provided since we explicitely marked it as NOT user provided.
|
||||||
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
|
[
|
||||||
|
IndexEmbeddingConfig {
|
||||||
|
name: "my_doggo_embedder",
|
||||||
|
config: EmbeddingConfig {
|
||||||
|
embedder_options: HuggingFace(
|
||||||
|
EmbedderOptions {
|
||||||
|
model: "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
revision: Some(
|
||||||
|
"e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
),
|
||||||
|
distribution: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
prompt: PromptData {
|
||||||
|
template: "{{doc.doggo}}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
user_provided: RoaringBitmap<[1, 2]>,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
|
let embedding = &embeddings["my_doggo_embedder"];
|
||||||
|
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||||
|
|
||||||
|
// the document with the id 3 should keep its original embedding
|
||||||
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
|
let mut embeddings = Vec::new();
|
||||||
|
|
||||||
|
'vectors: for i in 0..=u8::MAX {
|
||||||
|
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
||||||
|
.map(Some)
|
||||||
|
.or_else(|e| match e {
|
||||||
|
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||||
|
e => Err(e),
|
||||||
|
})
|
||||||
|
.transpose();
|
||||||
|
|
||||||
|
let Some(reader) = reader else {
|
||||||
|
break 'vectors;
|
||||||
|
};
|
||||||
|
|
||||||
|
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
|
||||||
|
if let Some(embedding) = embedding {
|
||||||
|
embeddings.push(embedding)
|
||||||
|
} else {
|
||||||
|
break 'vectors;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot!(embeddings.len(), @"1");
|
||||||
|
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||||
|
|
||||||
|
// If we update marcel it should regenerate its embedding automatically
|
||||||
|
|
||||||
|
let content = serde_json::json!(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"doggo": "marvel",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"doggo": "sorry",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1_u128).unwrap();
|
||||||
|
let documents_count =
|
||||||
|
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||||
|
.unwrap();
|
||||||
|
snapshot!(documents_count, @"2");
|
||||||
|
file.persist().unwrap();
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentAdditionOrUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
primary_key: None,
|
||||||
|
method: UpdateDocuments,
|
||||||
|
content_file: uuid,
|
||||||
|
documents_count,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
// the document with the id 3 should have its original embedding updated
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
|
let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
|
||||||
|
let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
|
||||||
|
snapshot!(json_string!(doc), @r###"
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"doggo": "marvel"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
|
let embedding = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
|
assert!(!embedding.is_empty());
|
||||||
|
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||||
|
|
||||||
|
// the document with the id 4 should generate an embedding
|
||||||
|
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||||
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
|
let embedding = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
|
assert!(!embedding.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn delete_document_containing_vector() {
|
||||||
|
// 1. Add an embedder
|
||||||
|
// 2. Push two documents containing a simple vector
|
||||||
|
// 3. Delete the first document
|
||||||
|
// 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore
|
||||||
|
// 5. Clear the index
|
||||||
|
// 6. The user defined roaring bitmap shouldn't contains the id of the second document
|
||||||
|
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||||
|
|
||||||
|
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||||
|
embedders: Setting::Set(maplit::btreemap! {
|
||||||
|
S("manual") => Setting::Set(EmbeddingSettings {
|
||||||
|
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
|
||||||
|
dimensions: Setting::Set(3),
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::SettingsUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
new_settings: Box::new(setting),
|
||||||
|
is_deletion: false,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let content = serde_json::json!(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"doggo": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": vec![0, 0, 0],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"doggo": "intel",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": vec![1, 1, 1],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
|
||||||
|
let documents_count =
|
||||||
|
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||||
|
.unwrap();
|
||||||
|
snapshot!(documents_count, @"2");
|
||||||
|
file.persist().unwrap();
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentAdditionOrUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
primary_key: None,
|
||||||
|
method: ReplaceDocuments,
|
||||||
|
content_file: uuid,
|
||||||
|
documents_count,
|
||||||
|
allow_index_creation: false,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentDeletion {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
documents_ids: vec![S("1")],
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||||
|
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||||
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
|
[
|
||||||
|
IndexEmbeddingConfig {
|
||||||
|
name: "manual",
|
||||||
|
config: EmbeddingConfig {
|
||||||
|
embedder_options: UserProvided(
|
||||||
|
EmbedderOptions {
|
||||||
|
dimensions: 3,
|
||||||
|
distribution: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
prompt: PromptData {
|
||||||
|
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
user_provided: RoaringBitmap<[0]>,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
|
let embedding = &embeddings["manual"];
|
||||||
|
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||||
|
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||||
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
|
[
|
||||||
|
IndexEmbeddingConfig {
|
||||||
|
name: "manual",
|
||||||
|
config: EmbeddingConfig {
|
||||||
|
embedder_options: UserProvided(
|
||||||
|
EmbedderOptions {
|
||||||
|
dimensions: 3,
|
||||||
|
distribution: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
prompt: PromptData {
|
||||||
|
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
user_provided: RoaringBitmap<[]>,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn delete_embedder_with_user_provided_vectors() {
|
||||||
|
// 1. Add two embedders
|
||||||
|
// 2. Push two documents containing a simple vector
|
||||||
|
// 3. The documents must not contain the vectors after the update as they are in the vectors db
|
||||||
|
// 3. Delete the embedders
|
||||||
|
// 4. The documents contain the vectors again
|
||||||
|
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||||
|
|
||||||
|
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||||
|
embedders: Setting::Set(maplit::btreemap! {
|
||||||
|
S("manual") => Setting::Set(EmbeddingSettings {
|
||||||
|
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
|
||||||
|
dimensions: Setting::Set(3),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
S("my_doggo_embedder") => Setting::Set(EmbeddingSettings {
|
||||||
|
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||||
|
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||||
|
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||||
|
document_template: Setting::Set(S("{{doc.doggo}}")),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::SettingsUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
new_settings: Box::new(setting),
|
||||||
|
is_deletion: false,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let content = serde_json::json!(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"doggo": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": vec![0, 0, 0],
|
||||||
|
"my_doggo_embedder": vec![1; 384],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"doggo": "intel",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": vec![1, 1, 1],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
|
||||||
|
let documents_count =
|
||||||
|
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||||
|
.unwrap();
|
||||||
|
snapshot!(documents_count, @"2");
|
||||||
|
file.persist().unwrap();
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentAdditionOrUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
primary_key: None,
|
||||||
|
method: ReplaceDocuments,
|
||||||
|
content_file: uuid,
|
||||||
|
documents_count,
|
||||||
|
allow_index_creation: false,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
{
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||||
|
embedders: Setting::Set(maplit::btreemap! {
|
||||||
|
S("manual") => Setting::Reset,
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::SettingsUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
new_settings: Box::new(setting),
|
||||||
|
is_deletion: false,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||||
|
embedders: Setting::Reset,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::SettingsUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
new_settings: Box::new(setting),
|
||||||
|
is_deletion: false,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// FIXME: redaction
|
||||||
|
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,10 +6,6 @@ expression: doc
|
|||||||
"doggo": "Intel",
|
"doggo": "Intel",
|
||||||
"breed": "beagle",
|
"breed": "beagle",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"A_fakerest": {
|
|
||||||
"embeddings": "[vector]",
|
|
||||||
"userProvided": true
|
|
||||||
},
|
|
||||||
"noise": [
|
"noise": [
|
||||||
0.1,
|
0.1,
|
||||||
0.2,
|
0.2,
|
||||||
@@ -6,10 +6,6 @@ expression: doc
|
|||||||
"doggo": "kefir",
|
"doggo": "kefir",
|
||||||
"breed": "patou",
|
"breed": "patou",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"A_fakerest": {
|
|
||||||
"embeddings": "[vector]",
|
|
||||||
"userProvided": true
|
|
||||||
},
|
|
||||||
"noise": [
|
"noise": [
|
||||||
0.1,
|
0.1,
|
||||||
0.2,
|
0.2,
|
||||||
File diff suppressed because one or more lines are too long
@@ -238,6 +238,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
|||||||
let mut index_uids = vec![];
|
let mut index_uids = vec![];
|
||||||
match &mut task.kind {
|
match &mut task.kind {
|
||||||
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||||
|
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
|
||||||
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
||||||
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
||||||
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
||||||
@@ -408,7 +409,26 @@ impl IndexScheduler {
|
|||||||
match status {
|
match status {
|
||||||
Status::Succeeded => assert!(indexed_documents <= received_documents),
|
Status::Succeeded => assert!(indexed_documents <= received_documents),
|
||||||
Status::Failed | Status::Canceled => assert_eq!(indexed_documents, 0),
|
Status::Failed | Status::Canceled => assert_eq!(indexed_documents, 0),
|
||||||
status => panic!("DocumentAddition can't have an indexed_document set if it's {}", status),
|
status => panic!("DocumentAddition can't have an indexed_documents set if it's {}", status),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
assert!(matches!(status, Status::Enqueued | Status::Processing))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Details::DocumentEdition { edited_documents, .. } => {
|
||||||
|
assert_eq!(kind.as_kind(), Kind::DocumentEdition);
|
||||||
|
match edited_documents {
|
||||||
|
Some(edited_documents) => {
|
||||||
|
assert!(matches!(
|
||||||
|
status,
|
||||||
|
Status::Succeeded | Status::Failed | Status::Canceled
|
||||||
|
));
|
||||||
|
match status {
|
||||||
|
Status::Succeeded => (),
|
||||||
|
Status::Failed | Status::Canceled => assert_eq!(edited_documents, 0),
|
||||||
|
status => panic!("DocumentEdition can't have an edited_documents set if it's {}", status),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
|||||||
@@ -11,6 +11,6 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
insta = { version = "^1.34.0", features = ["json", "redactions"] }
|
insta = { version = "^1.39.0", features = ["json", "redactions"] }
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
once_cell = "1.19"
|
once_cell = "1.19"
|
||||||
|
|||||||
@@ -11,16 +11,16 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = "0.21.7"
|
base64 = "0.22.1"
|
||||||
enum-iterator = "1.5.0"
|
enum-iterator = "2.1.0"
|
||||||
hmac = "0.12.1"
|
hmac = "0.12.1"
|
||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
roaring = { version = "0.10.2", features = ["serde"] }
|
roaring = { version = "0.10.6", features = ["serde"] }
|
||||||
serde = { version = "1.0.195", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
sha2 = "0.10.8"
|
sha2 = "0.10.8"
|
||||||
thiserror = "1.0.56"
|
thiserror = "1.0.61"
|
||||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|||||||
@@ -188,6 +188,12 @@ impl AuthFilter {
|
|||||||
self.allow_index_creation && self.is_index_authorized(index)
|
self.allow_index_creation && self.is_index_authorized(index)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
/// Return true if a tenant token was used to generate the search rules.
|
||||||
|
pub fn is_tenant_token(&self) -> bool {
|
||||||
|
self.search_rules.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
|
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
search_rules: None,
|
search_rules: None,
|
||||||
@@ -205,6 +211,7 @@ impl AuthFilter {
|
|||||||
.unwrap_or(true)
|
.unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if the index is authorized by the API key and the tenant token.
|
||||||
pub fn is_index_authorized(&self, index: &str) -> bool {
|
pub fn is_index_authorized(&self, index: &str) -> bool {
|
||||||
self.key_authorized_indexes.is_index_authorized(index)
|
self.key_authorized_indexes.is_index_authorized(index)
|
||||||
&& self
|
&& self
|
||||||
@@ -214,6 +221,44 @@ impl AuthFilter {
|
|||||||
.unwrap_or(true)
|
.unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Only check if the index is authorized by the API key
|
||||||
|
pub fn api_key_is_index_authorized(&self, index: &str) -> bool {
|
||||||
|
self.key_authorized_indexes.is_index_authorized(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Only check if the index is authorized by the tenant token
|
||||||
|
pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool {
|
||||||
|
self.search_rules
|
||||||
|
.as_ref()
|
||||||
|
.map(|search_rules| search_rules.is_index_authorized(index))
|
||||||
|
.unwrap_or(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the list of authorized indexes by the tenant token if any
|
||||||
|
pub fn tenant_token_list_index_authorized(&self) -> Vec<String> {
|
||||||
|
match self.search_rules {
|
||||||
|
Some(ref search_rules) => {
|
||||||
|
let mut indexes: Vec<_> = match search_rules {
|
||||||
|
SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(),
|
||||||
|
SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(),
|
||||||
|
};
|
||||||
|
indexes.sort_unstable();
|
||||||
|
indexes
|
||||||
|
}
|
||||||
|
None => Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the list of authorized indexes by the api key if any
|
||||||
|
pub fn api_key_list_index_authorized(&self) -> Vec<String> {
|
||||||
|
let mut indexes: Vec<_> = match self.key_authorized_indexes {
|
||||||
|
SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(),
|
||||||
|
SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(),
|
||||||
|
};
|
||||||
|
indexes.sort_unstable();
|
||||||
|
indexes
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
|
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
|
||||||
if !self.is_index_authorized(index) {
|
if !self.is_index_authorized(index) {
|
||||||
return None;
|
return None;
|
||||||
|
|||||||
@@ -11,36 +11,36 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
actix-web = { version = "4.6.0", default-features = false }
|
actix-web = { version = "4.8.0", default-features = false }
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.86"
|
||||||
convert_case = "0.6.0"
|
convert_case = "0.6.0"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
deserr = { version = "0.6.1", features = ["actix-web"] }
|
deserr = { version = "0.6.2", features = ["actix-web"] }
|
||||||
either = { version = "1.9.0", features = ["serde"] }
|
either = { version = "1.13.0", features = ["serde"] }
|
||||||
enum-iterator = "1.5.0"
|
enum-iterator = "2.1.0"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
flate2 = "1.0.28"
|
flate2 = "1.0.30"
|
||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
memmap2 = "0.7.1"
|
memmap2 = "0.9.4"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
roaring = { version = "0.10.2", features = ["serde"] }
|
roaring = { version = "0.10.6", features = ["serde"] }
|
||||||
serde = { version = "1.0.195", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde-cs = "0.2.4"
|
serde-cs = "0.2.4"
|
||||||
serde_json = "1.0.111"
|
serde_json = "1.0.120"
|
||||||
tar = "0.4.40"
|
tar = "0.4.41"
|
||||||
tempfile = "3.9.0"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.56"
|
thiserror = "1.0.61"
|
||||||
time = { version = "0.3.31", features = [
|
time = { version = "0.3.36", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
"parsing",
|
"parsing",
|
||||||
"macros",
|
"macros",
|
||||||
] }
|
] }
|
||||||
tokio = "1.35"
|
tokio = "1.38"
|
||||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
insta = "1.34.0"
|
insta = "1.39.0"
|
||||||
meili-snap = { path = "../meili-snap" }
|
meili-snap = { path = "../meili-snap" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
@@ -54,6 +54,8 @@ chinese-pinyin = ["milli/chinese-pinyin"]
|
|||||||
hebrew = ["milli/hebrew"]
|
hebrew = ["milli/hebrew"]
|
||||||
# japanese specialized tokenization
|
# japanese specialized tokenization
|
||||||
japanese = ["milli/japanese"]
|
japanese = ["milli/japanese"]
|
||||||
|
# korean specialized tokenization
|
||||||
|
korean = ["milli/korean"]
|
||||||
# thai specialized tokenization
|
# thai specialized tokenization
|
||||||
thai = ["milli/thai"]
|
thai = ["milli/thai"]
|
||||||
# allow greek specialized tokenization
|
# allow greek specialized tokenization
|
||||||
|
|||||||
@@ -155,6 +155,10 @@ make_missing_field_convenience_builder!(
|
|||||||
MissingFacetSearchFacetName,
|
MissingFacetSearchFacetName,
|
||||||
missing_facet_search_facet_name
|
missing_facet_search_facet_name
|
||||||
);
|
);
|
||||||
|
make_missing_field_convenience_builder!(
|
||||||
|
MissingDocumentEditionFunction,
|
||||||
|
missing_document_edition_function
|
||||||
|
);
|
||||||
|
|
||||||
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
|
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
|
||||||
// the default error code (C) from `Self`
|
// the default error code (C) from `Self`
|
||||||
@@ -188,6 +192,7 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
|
|||||||
merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||||
|
merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight);
|
||||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||||
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
|
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
|
||||||
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
|
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
|
||||||
|
|||||||
@@ -222,7 +222,9 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||||
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||||
@@ -236,13 +238,20 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||||
@@ -270,13 +279,14 @@ InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
@@ -332,7 +342,10 @@ UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA
|
|||||||
|
|
||||||
// Experimental features
|
// Experimental features
|
||||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
||||||
NotFoundSimilarId , InvalidRequest , BAD_REQUEST
|
NotFoundSimilarId , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidDocumentEditionContext , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ErrorCode for JoinError {
|
impl ErrorCode for JoinError {
|
||||||
@@ -381,6 +394,7 @@ impl ErrorCode for milli::Error {
|
|||||||
Code::IndexPrimaryKeyMultipleCandidatesFound
|
Code::IndexPrimaryKeyMultipleCandidatesFound
|
||||||
}
|
}
|
||||||
UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists,
|
UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists,
|
||||||
|
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
|
||||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||||
@@ -393,14 +407,23 @@ impl ErrorCode for milli::Error {
|
|||||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||||
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
|
UserError::InvalidVectorsMapType { .. }
|
||||||
|
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
|
||||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||||
Code::InvalidSettingsTypoTolerance
|
Code::InvalidSettingsTypoTolerance
|
||||||
}
|
}
|
||||||
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
||||||
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
|
UserError::VectorEmbeddingError(_) | UserError::DocumentEmbeddingError(_) => {
|
||||||
|
Code::VectorEmbeddingError
|
||||||
|
}
|
||||||
|
UserError::DocumentEditionCannotModifyPrimaryKey
|
||||||
|
| UserError::DocumentEditionDocumentMustBeObject
|
||||||
|
| UserError::DocumentEditionRuntimeError(_)
|
||||||
|
| UserError::DocumentEditionCompilationError(_) => {
|
||||||
|
Code::EditDocumentsByFunctionError
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -496,6 +519,12 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for deserr_codes::InvalidMultiSearchWeight {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "the value of `weight` is invalid, expected a positive float (>= 0.0).")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Display for deserr_codes::InvalidSimilarId {
|
impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(
|
write!(
|
||||||
|
|||||||
@@ -6,10 +6,13 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub vector_store: bool,
|
pub vector_store: bool,
|
||||||
pub metrics: bool,
|
pub metrics: bool,
|
||||||
pub logs_route: bool,
|
pub logs_route: bool,
|
||||||
|
pub edit_documents_by_function: bool,
|
||||||
|
pub contains_filter: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
pub struct InstanceTogglableFeatures {
|
pub struct InstanceTogglableFeatures {
|
||||||
pub metrics: bool,
|
pub metrics: bool,
|
||||||
pub logs_route: bool,
|
pub logs_route: bool,
|
||||||
|
pub contains_filter: bool,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
|
use milli::index::IndexEmbeddingConfig;
|
||||||
use milli::proximity::ProximityPrecision;
|
use milli::proximity::ProximityPrecision;
|
||||||
use milli::update::Setting;
|
use milli::update::Setting;
|
||||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||||
@@ -672,7 +673,7 @@ pub fn settings(
|
|||||||
let embedders: BTreeMap<_, _> = index
|
let embedders: BTreeMap<_, _> = index
|
||||||
.embedding_configs(rtxn)?
|
.embedding_configs(rtxn)?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(name, config)| (name, Setting::Set(config.into())))
|
.map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into())))
|
||||||
.collect();
|
.collect();
|
||||||
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use milli::Object;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
|
|
||||||
@@ -54,6 +55,8 @@ pub struct DetailsView {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub indexed_documents: Option<Option<u64>>,
|
pub indexed_documents: Option<Option<u64>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub edited_documents: Option<Option<u64>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub primary_key: Option<Option<String>>,
|
pub primary_key: Option<Option<String>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub provided_ids: Option<usize>,
|
pub provided_ids: Option<usize>,
|
||||||
@@ -70,6 +73,10 @@ pub struct DetailsView {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub dump_uid: Option<Option<String>>,
|
pub dump_uid: Option<Option<String>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub context: Option<Option<Object>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub function: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
@@ -86,6 +93,20 @@ impl From<Details> for DetailsView {
|
|||||||
..DetailsView::default()
|
..DetailsView::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Details::DocumentEdition {
|
||||||
|
deleted_documents,
|
||||||
|
edited_documents,
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
} => DetailsView {
|
||||||
|
deleted_documents: Some(deleted_documents),
|
||||||
|
edited_documents: Some(edited_documents),
|
||||||
|
original_filter: Some(original_filter),
|
||||||
|
context: Some(context),
|
||||||
|
function: Some(function),
|
||||||
|
..DetailsView::default()
|
||||||
|
},
|
||||||
Details::SettingsUpdate { mut settings } => {
|
Details::SettingsUpdate { mut settings } => {
|
||||||
settings.hide_secrets();
|
settings.hide_secrets();
|
||||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
use milli::update::IndexDocumentsMethod;
|
use milli::update::IndexDocumentsMethod;
|
||||||
|
use milli::Object;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
@@ -48,6 +49,7 @@ impl Task {
|
|||||||
| TaskDeletion { .. }
|
| TaskDeletion { .. }
|
||||||
| IndexSwap { .. } => None,
|
| IndexSwap { .. } => None,
|
||||||
DocumentAdditionOrUpdate { index_uid, .. }
|
DocumentAdditionOrUpdate { index_uid, .. }
|
||||||
|
| DocumentEdition { index_uid, .. }
|
||||||
| DocumentDeletion { index_uid, .. }
|
| DocumentDeletion { index_uid, .. }
|
||||||
| DocumentDeletionByFilter { index_uid, .. }
|
| DocumentDeletionByFilter { index_uid, .. }
|
||||||
| DocumentClear { index_uid }
|
| DocumentClear { index_uid }
|
||||||
@@ -67,7 +69,8 @@ impl Task {
|
|||||||
pub fn content_uuid(&self) -> Option<Uuid> {
|
pub fn content_uuid(&self) -> Option<Uuid> {
|
||||||
match self.kind {
|
match self.kind {
|
||||||
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
|
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
|
||||||
KindWithContent::DocumentDeletion { .. }
|
KindWithContent::DocumentEdition { .. }
|
||||||
|
| KindWithContent::DocumentDeletion { .. }
|
||||||
| KindWithContent::DocumentDeletionByFilter { .. }
|
| KindWithContent::DocumentDeletionByFilter { .. }
|
||||||
| KindWithContent::DocumentClear { .. }
|
| KindWithContent::DocumentClear { .. }
|
||||||
| KindWithContent::SettingsUpdate { .. }
|
| KindWithContent::SettingsUpdate { .. }
|
||||||
@@ -102,6 +105,12 @@ pub enum KindWithContent {
|
|||||||
index_uid: String,
|
index_uid: String,
|
||||||
filter_expr: serde_json::Value,
|
filter_expr: serde_json::Value,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
index_uid: String,
|
||||||
|
filter_expr: Option<serde_json::Value>,
|
||||||
|
context: Option<milli::Object>,
|
||||||
|
function: String,
|
||||||
|
},
|
||||||
DocumentClear {
|
DocumentClear {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
},
|
},
|
||||||
@@ -150,6 +159,7 @@ impl KindWithContent {
|
|||||||
pub fn as_kind(&self) -> Kind {
|
pub fn as_kind(&self) -> Kind {
|
||||||
match self {
|
match self {
|
||||||
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
|
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
|
||||||
|
KindWithContent::DocumentEdition { .. } => Kind::DocumentEdition,
|
||||||
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
|
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
|
||||||
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
|
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
|
||||||
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
|
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
|
||||||
@@ -174,6 +184,7 @@ impl KindWithContent {
|
|||||||
| TaskCancelation { .. }
|
| TaskCancelation { .. }
|
||||||
| TaskDeletion { .. } => vec![],
|
| TaskDeletion { .. } => vec![],
|
||||||
DocumentAdditionOrUpdate { index_uid, .. }
|
DocumentAdditionOrUpdate { index_uid, .. }
|
||||||
|
| DocumentEdition { index_uid, .. }
|
||||||
| DocumentDeletion { index_uid, .. }
|
| DocumentDeletion { index_uid, .. }
|
||||||
| DocumentDeletionByFilter { index_uid, .. }
|
| DocumentDeletionByFilter { index_uid, .. }
|
||||||
| DocumentClear { index_uid }
|
| DocumentClear { index_uid }
|
||||||
@@ -202,6 +213,15 @@ impl KindWithContent {
|
|||||||
indexed_documents: None,
|
indexed_documents: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
|
||||||
|
Some(Details::DocumentEdition {
|
||||||
|
deleted_documents: None,
|
||||||
|
edited_documents: None,
|
||||||
|
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
|
||||||
|
context: context.clone(),
|
||||||
|
function: function.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
||||||
Some(Details::DocumentDeletion {
|
Some(Details::DocumentDeletion {
|
||||||
provided_ids: documents_ids.len(),
|
provided_ids: documents_ids.len(),
|
||||||
@@ -250,6 +270,15 @@ impl KindWithContent {
|
|||||||
indexed_documents: Some(0),
|
indexed_documents: Some(0),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
|
||||||
|
Some(Details::DocumentEdition {
|
||||||
|
deleted_documents: Some(0),
|
||||||
|
edited_documents: Some(0),
|
||||||
|
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
|
||||||
|
context: context.clone(),
|
||||||
|
function: function.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
||||||
Some(Details::DocumentDeletion {
|
Some(Details::DocumentDeletion {
|
||||||
provided_ids: documents_ids.len(),
|
provided_ids: documents_ids.len(),
|
||||||
@@ -301,6 +330,7 @@ impl From<&KindWithContent> for Option<Details> {
|
|||||||
indexed_documents: None,
|
indexed_documents: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { .. } => None,
|
||||||
KindWithContent::DocumentDeletion { .. } => None,
|
KindWithContent::DocumentDeletion { .. } => None,
|
||||||
KindWithContent::DocumentDeletionByFilter { .. } => None,
|
KindWithContent::DocumentDeletionByFilter { .. } => None,
|
||||||
KindWithContent::DocumentClear { .. } => None,
|
KindWithContent::DocumentClear { .. } => None,
|
||||||
@@ -394,6 +424,7 @@ impl std::error::Error for ParseTaskStatusError {}
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub enum Kind {
|
pub enum Kind {
|
||||||
DocumentAdditionOrUpdate,
|
DocumentAdditionOrUpdate,
|
||||||
|
DocumentEdition,
|
||||||
DocumentDeletion,
|
DocumentDeletion,
|
||||||
SettingsUpdate,
|
SettingsUpdate,
|
||||||
IndexCreation,
|
IndexCreation,
|
||||||
@@ -410,6 +441,7 @@ impl Kind {
|
|||||||
pub fn related_to_one_index(&self) -> bool {
|
pub fn related_to_one_index(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Kind::DocumentAdditionOrUpdate
|
Kind::DocumentAdditionOrUpdate
|
||||||
|
| Kind::DocumentEdition
|
||||||
| Kind::DocumentDeletion
|
| Kind::DocumentDeletion
|
||||||
| Kind::SettingsUpdate
|
| Kind::SettingsUpdate
|
||||||
| Kind::IndexCreation
|
| Kind::IndexCreation
|
||||||
@@ -427,6 +459,7 @@ impl Display for Kind {
|
|||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
|
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
|
||||||
|
Kind::DocumentEdition => write!(f, "documentEdition"),
|
||||||
Kind::DocumentDeletion => write!(f, "documentDeletion"),
|
Kind::DocumentDeletion => write!(f, "documentDeletion"),
|
||||||
Kind::SettingsUpdate => write!(f, "settingsUpdate"),
|
Kind::SettingsUpdate => write!(f, "settingsUpdate"),
|
||||||
Kind::IndexCreation => write!(f, "indexCreation"),
|
Kind::IndexCreation => write!(f, "indexCreation"),
|
||||||
@@ -454,6 +487,8 @@ impl FromStr for Kind {
|
|||||||
Ok(Kind::IndexDeletion)
|
Ok(Kind::IndexDeletion)
|
||||||
} else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") {
|
} else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") {
|
||||||
Ok(Kind::DocumentAdditionOrUpdate)
|
Ok(Kind::DocumentAdditionOrUpdate)
|
||||||
|
} else if kind.eq_ignore_ascii_case("documentEdition") {
|
||||||
|
Ok(Kind::DocumentEdition)
|
||||||
} else if kind.eq_ignore_ascii_case("documentDeletion") {
|
} else if kind.eq_ignore_ascii_case("documentDeletion") {
|
||||||
Ok(Kind::DocumentDeletion)
|
Ok(Kind::DocumentDeletion)
|
||||||
} else if kind.eq_ignore_ascii_case("settingsUpdate") {
|
} else if kind.eq_ignore_ascii_case("settingsUpdate") {
|
||||||
@@ -495,16 +530,50 @@ impl std::error::Error for ParseTaskKindError {}
|
|||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||||
pub enum Details {
|
pub enum Details {
|
||||||
DocumentAdditionOrUpdate { received_documents: u64, indexed_documents: Option<u64> },
|
DocumentAdditionOrUpdate {
|
||||||
SettingsUpdate { settings: Box<Settings<Unchecked>> },
|
received_documents: u64,
|
||||||
IndexInfo { primary_key: Option<String> },
|
indexed_documents: Option<u64>,
|
||||||
DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
|
},
|
||||||
DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
|
SettingsUpdate {
|
||||||
ClearAll { deleted_documents: Option<u64> },
|
settings: Box<Settings<Unchecked>>,
|
||||||
TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
|
},
|
||||||
TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
|
IndexInfo {
|
||||||
Dump { dump_uid: Option<String> },
|
primary_key: Option<String>,
|
||||||
IndexSwap { swaps: Vec<IndexSwap> },
|
},
|
||||||
|
DocumentDeletion {
|
||||||
|
provided_ids: usize,
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
},
|
||||||
|
DocumentDeletionByFilter {
|
||||||
|
original_filter: String,
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
edited_documents: Option<u64>,
|
||||||
|
original_filter: Option<String>,
|
||||||
|
context: Option<Object>,
|
||||||
|
function: String,
|
||||||
|
},
|
||||||
|
ClearAll {
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
},
|
||||||
|
TaskCancelation {
|
||||||
|
matched_tasks: u64,
|
||||||
|
canceled_tasks: Option<u64>,
|
||||||
|
original_filter: String,
|
||||||
|
},
|
||||||
|
TaskDeletion {
|
||||||
|
matched_tasks: u64,
|
||||||
|
deleted_tasks: Option<u64>,
|
||||||
|
original_filter: String,
|
||||||
|
},
|
||||||
|
Dump {
|
||||||
|
dump_uid: Option<String>,
|
||||||
|
},
|
||||||
|
IndexSwap {
|
||||||
|
swaps: Vec<IndexSwap>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Details {
|
impl Details {
|
||||||
@@ -514,6 +583,7 @@ impl Details {
|
|||||||
Self::DocumentAdditionOrUpdate { indexed_documents, .. } => {
|
Self::DocumentAdditionOrUpdate { indexed_documents, .. } => {
|
||||||
*indexed_documents = Some(0)
|
*indexed_documents = Some(0)
|
||||||
}
|
}
|
||||||
|
Self::DocumentEdition { edited_documents, .. } => *edited_documents = Some(0),
|
||||||
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
|
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
|
||||||
Self::DocumentDeletionByFilter { deleted_documents, .. } => {
|
Self::DocumentDeletionByFilter { deleted_documents, .. } => {
|
||||||
*deleted_documents = Some(0)
|
*deleted_documents = Some(0)
|
||||||
|
|||||||
@@ -14,130 +14,125 @@ default-run = "meilisearch"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
actix-cors = "0.7.0"
|
actix-cors = "0.7.0"
|
||||||
actix-http = { version = "3.7.0", default-features = false, features = [
|
actix-http = { version = "3.8.0", default-features = false, features = [
|
||||||
"compress-brotli",
|
"compress-brotli",
|
||||||
"compress-gzip",
|
"compress-gzip",
|
||||||
"rustls-0_21",
|
"rustls-0_23",
|
||||||
] }
|
] }
|
||||||
actix-utils = "3.0.1"
|
actix-utils = "3.0.1"
|
||||||
actix-web = { version = "4.6.0", default-features = false, features = [
|
actix-web = { version = "4.8.0", default-features = false, features = [
|
||||||
"macros",
|
"macros",
|
||||||
"compress-brotli",
|
"compress-brotli",
|
||||||
"compress-gzip",
|
"compress-gzip",
|
||||||
"cookies",
|
"cookies",
|
||||||
"rustls-0_21",
|
"rustls-0_23",
|
||||||
] }
|
] }
|
||||||
actix-web-static-files = { version = "4.0.1", optional = true }
|
anyhow = { version = "1.0.86", features = ["backtrace"] }
|
||||||
anyhow = { version = "1.0.79", features = ["backtrace"] }
|
async-trait = "0.1.81"
|
||||||
async-stream = "0.3.5"
|
bstr = "1.9.1"
|
||||||
async-trait = "0.1.77"
|
byte-unit = { version = "5.1.4", default-features = false, features = [
|
||||||
bstr = "1.9.0"
|
|
||||||
byte-unit = { version = "4.0.19", default-features = false, features = [
|
|
||||||
"std",
|
"std",
|
||||||
|
"byte",
|
||||||
"serde",
|
"serde",
|
||||||
] }
|
] }
|
||||||
bytes = "1.5.0"
|
bytes = "1.6.0"
|
||||||
clap = { version = "4.4.17", features = ["derive", "env"] }
|
clap = { version = "4.5.9", features = ["derive", "env"] }
|
||||||
crossbeam-channel = "0.5.11"
|
crossbeam-channel = "0.5.13"
|
||||||
deserr = { version = "0.6.1", features = ["actix-web"] }
|
deserr = { version = "0.6.2", features = ["actix-web"] }
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
either = "1.9.0"
|
either = "1.13.0"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
flate2 = "1.0.28"
|
flate2 = "1.0.30"
|
||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
futures-util = "0.3.30"
|
futures-util = "0.3.30"
|
||||||
http = "0.2.11"
|
|
||||||
index-scheduler = { path = "../index-scheduler" }
|
index-scheduler = { path = "../index-scheduler" }
|
||||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
indexmap = { version = "2.2.6", features = ["serde"] }
|
||||||
is-terminal = "0.4.10"
|
is-terminal = "0.4.12"
|
||||||
itertools = "0.11.0"
|
itertools = "0.13.0"
|
||||||
jsonwebtoken = "9.2.0"
|
jsonwebtoken = "9.3.0"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.5.0"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
mimalloc = { version = "0.1.39", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
mime = "0.3.17"
|
mime = "0.3.17"
|
||||||
num_cpus = "1.16.0"
|
num_cpus = "1.16.0"
|
||||||
obkv = "0.2.1"
|
obkv = "0.2.2"
|
||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
ordered-float = "4.2.0"
|
ordered-float = "4.2.1"
|
||||||
parking_lot = "0.12.1"
|
parking_lot = "0.12.3"
|
||||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||||
pin-project-lite = "0.2.13"
|
pin-project-lite = "0.2.14"
|
||||||
platform-dirs = "0.3.0"
|
platform-dirs = "0.3.0"
|
||||||
prometheus = { version = "0.13.3", features = ["process"] }
|
prometheus = { version = "0.13.4", features = ["process"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rayon = "1.8.0"
|
rayon = "1.10.0"
|
||||||
regex = "1.10.2"
|
regex = "1.10.5"
|
||||||
reqwest = { version = "0.11.23", features = [
|
reqwest = { version = "0.12.5", features = [
|
||||||
"rustls-tls",
|
"rustls-tls",
|
||||||
"json",
|
"json",
|
||||||
], default-features = false }
|
], default-features = false }
|
||||||
rustls = "0.21.12"
|
rustls = { version = "0.23.11", features = ["ring"], default-features = false }
|
||||||
rustls-pemfile = "1.0.2"
|
rustls-pki-types = { version = "1.7.0", features = ["alloc"] }
|
||||||
segment = { version = "0.2.3", optional = true }
|
rustls-pemfile = "2.1.2"
|
||||||
serde = { version = "1.0.195", features = ["derive"] }
|
segment = { version = "0.2.4", optional = true }
|
||||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
sha2 = "0.10.8"
|
sha2 = "0.10.8"
|
||||||
siphasher = "1.0.0"
|
siphasher = "1.0.1"
|
||||||
slice-group-by = "0.3.1"
|
slice-group-by = "0.3.1"
|
||||||
static-files = { version = "0.2.3", optional = true }
|
static-files = { version = "0.2.4", optional = true }
|
||||||
sysinfo = "0.30.5"
|
sysinfo = "0.30.13"
|
||||||
tar = "0.4.40"
|
tar = "0.4.41"
|
||||||
tempfile = "3.9.0"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.56"
|
thiserror = "1.0.61"
|
||||||
time = { version = "0.3.31", features = [
|
time = { version = "0.3.36", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
"parsing",
|
"parsing",
|
||||||
"macros",
|
"macros",
|
||||||
] }
|
] }
|
||||||
tokio = { version = "1.35.1", features = ["full"] }
|
tokio = { version = "1.38.0", features = ["full"] }
|
||||||
tokio-stream = "0.1.14"
|
toml = "0.8.14"
|
||||||
toml = "0.8.8"
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
|
||||||
walkdir = "2.4.0"
|
|
||||||
yaup = "0.2.1"
|
|
||||||
serde_urlencoded = "0.7.1"
|
serde_urlencoded = "0.7.1"
|
||||||
termcolor = "1.4.1"
|
termcolor = "1.4.1"
|
||||||
url = { version = "2.5.0", features = ["serde"] }
|
url = { version = "2.5.2", features = ["serde"] }
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||||
tracing-actix-web = "0.7.10"
|
tracing-actix-web = "0.7.11"
|
||||||
build-info = { version = "1.7.0", path = "../build-info" }
|
build-info = { version = "1.7.0", path = "../build-info" }
|
||||||
|
roaring = "0.10.2"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
actix-rt = "2.9.0"
|
actix-rt = "2.10.0"
|
||||||
assert-json-diff = "2.0.2"
|
|
||||||
brotli = "6.0.0"
|
brotli = "6.0.0"
|
||||||
insta = "1.34.0"
|
insta = "1.39.0"
|
||||||
manifest-dir-macros = "0.1.18"
|
manifest-dir-macros = "0.1.18"
|
||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
meili-snap = { path = "../meili-snap" }
|
meili-snap = { path = "../meili-snap" }
|
||||||
temp-env = "0.3.6"
|
temp-env = "0.3.6"
|
||||||
urlencoding = "2.1.3"
|
urlencoding = "2.1.3"
|
||||||
yaup = "0.2.1"
|
yaup = "0.3.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = { version = "1.0.79", optional = true }
|
anyhow = { version = "1.0.86", optional = true }
|
||||||
cargo_toml = { version = "0.18.0", optional = true }
|
cargo_toml = { version = "0.20.3", optional = true }
|
||||||
hex = { version = "0.4.3", optional = true }
|
hex = { version = "0.4.3", optional = true }
|
||||||
reqwest = { version = "0.11.23", features = [
|
reqwest = { version = "0.12.5", features = [
|
||||||
"blocking",
|
"blocking",
|
||||||
"rustls-tls",
|
"rustls-tls",
|
||||||
], default-features = false, optional = true }
|
], default-features = false, optional = true }
|
||||||
sha-1 = { version = "0.10.1", optional = true }
|
sha-1 = { version = "0.10.1", optional = true }
|
||||||
static-files = { version = "0.2.3", optional = true }
|
static-files = { version = "0.2.4", optional = true }
|
||||||
tempfile = { version = "3.9.0", optional = true }
|
tempfile = { version = "3.10.1", optional = true }
|
||||||
zip = { version = "0.6.6", optional = true }
|
zip = { version = "2.1.3", optional = true }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||||
analytics = ["segment"]
|
analytics = ["segment"]
|
||||||
mini-dashboard = [
|
mini-dashboard = [
|
||||||
"actix-web-static-files",
|
|
||||||
"static-files",
|
"static-files",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"cargo_toml",
|
"cargo_toml",
|
||||||
@@ -151,6 +146,7 @@ chinese = ["meilisearch-types/chinese"]
|
|||||||
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
|
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
|
||||||
hebrew = ["meilisearch-types/hebrew"]
|
hebrew = ["meilisearch-types/hebrew"]
|
||||||
japanese = ["meilisearch-types/japanese"]
|
japanese = ["meilisearch-types/japanese"]
|
||||||
|
korean = ["meilisearch-types/korean"]
|
||||||
thai = ["meilisearch-types/thai"]
|
thai = ["meilisearch-types/thai"]
|
||||||
greek = ["meilisearch-types/greek"]
|
greek = ["meilisearch-types/greek"]
|
||||||
khmer = ["meilisearch-types/khmer"]
|
khmer = ["meilisearch-types/khmer"]
|
||||||
@@ -158,5 +154,5 @@ vietnamese = ["meilisearch-types/vietnamese"]
|
|||||||
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
|
||||||
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
|
sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use meilisearch_types::InstanceUid;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
pub struct MockAnalytics {
|
pub struct MockAnalytics {
|
||||||
@@ -42,7 +42,7 @@ pub struct MultiSearchAggregator;
|
|||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
impl MultiSearchAggregator {
|
impl MultiSearchAggregator {
|
||||||
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
|
pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
Self
|
Self
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,6 +97,13 @@ impl Analytics for MockAnalytics {
|
|||||||
_request: &HttpRequest,
|
_request: &HttpRequest,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
_documents_query: &DocumentEditionByFunction,
|
||||||
|
_index_creation: bool,
|
||||||
|
_request: &HttpRequest,
|
||||||
|
) {
|
||||||
|
}
|
||||||
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ use once_cell::sync::Lazy;
|
|||||||
use platform_dirs::AppDirs;
|
use platform_dirs::AppDirs;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
|
|
||||||
// if the analytics feature is disabled
|
// if the analytics feature is disabled
|
||||||
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
||||||
@@ -74,8 +74,8 @@ pub enum DocumentDeletionKind {
|
|||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum DocumentFetchKind {
|
pub enum DocumentFetchKind {
|
||||||
PerDocumentId,
|
PerDocumentId { retrieve_vectors: bool },
|
||||||
Normal { with_filter: bool, limit: usize, offset: usize },
|
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Analytics: Sync + Send {
|
pub trait Analytics: Sync + Send {
|
||||||
@@ -102,7 +102,7 @@ pub trait Analytics: Sync + Send {
|
|||||||
/// This method should be called to aggregate post facet values searches
|
/// This method should be called to aggregate post facet values searches
|
||||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
|
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
|
||||||
|
|
||||||
// this method should be called to aggregate a add documents request
|
// this method should be called to aggregate an add documents request
|
||||||
fn add_documents(
|
fn add_documents(
|
||||||
&self,
|
&self,
|
||||||
documents_query: &UpdateDocumentsQuery,
|
documents_query: &UpdateDocumentsQuery,
|
||||||
@@ -119,11 +119,19 @@ pub trait Analytics: Sync + Send {
|
|||||||
// this method should be called to aggregate a add documents request
|
// this method should be called to aggregate a add documents request
|
||||||
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
|
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
|
||||||
|
|
||||||
// this method should be called to batch a update documents request
|
// this method should be called to batch an update documents request
|
||||||
fn update_documents(
|
fn update_documents(
|
||||||
&self,
|
&self,
|
||||||
documents_query: &UpdateDocumentsQuery,
|
documents_query: &UpdateDocumentsQuery,
|
||||||
index_creation: bool,
|
index_creation: bool,
|
||||||
request: &HttpRequest,
|
request: &HttpRequest,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// this method should be called to batch an update documents by function request
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
documents_query: &DocumentEditionByFunction,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,10 +5,9 @@ use std::path::{Path, PathBuf};
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use actix_web::http::header::USER_AGENT;
|
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
|
||||||
use actix_web::HttpRequest;
|
use actix_web::HttpRequest;
|
||||||
use byte_unit::Byte;
|
use byte_unit::Byte;
|
||||||
use http::header::CONTENT_TYPE;
|
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_auth::{AuthController, AuthFilter};
|
use meilisearch_auth::{AuthController, AuthFilter};
|
||||||
use meilisearch_types::InstanceUid;
|
use meilisearch_types::InstanceUid;
|
||||||
@@ -31,12 +30,12 @@ use crate::analytics::Analytics;
|
|||||||
use crate::option::{
|
use crate::option::{
|
||||||
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
||||||
};
|
};
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||||
use crate::routes::{create_all_stats, Stats};
|
use crate::routes::{create_all_stats, Stats};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex,
|
||||||
SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
DEFAULT_SEMANTIC_RATIO,
|
DEFAULT_SEMANTIC_RATIO,
|
||||||
};
|
};
|
||||||
@@ -81,6 +80,7 @@ pub enum AnalyticsMsg {
|
|||||||
AggregateAddDocuments(DocumentsAggregator),
|
AggregateAddDocuments(DocumentsAggregator),
|
||||||
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
||||||
AggregateUpdateDocuments(DocumentsAggregator),
|
AggregateUpdateDocuments(DocumentsAggregator),
|
||||||
|
AggregateEditDocumentsByFunction(EditDocumentsByFunctionAggregator),
|
||||||
AggregateGetFetchDocuments(DocumentsFetchAggregator),
|
AggregateGetFetchDocuments(DocumentsFetchAggregator),
|
||||||
AggregatePostFetchDocuments(DocumentsFetchAggregator),
|
AggregatePostFetchDocuments(DocumentsFetchAggregator),
|
||||||
}
|
}
|
||||||
@@ -150,6 +150,7 @@ impl SegmentAnalytics {
|
|||||||
add_documents_aggregator: DocumentsAggregator::default(),
|
add_documents_aggregator: DocumentsAggregator::default(),
|
||||||
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
||||||
update_documents_aggregator: DocumentsAggregator::default(),
|
update_documents_aggregator: DocumentsAggregator::default(),
|
||||||
|
edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator::default(),
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
get_similar_aggregator: SimilarAggregator::default(),
|
get_similar_aggregator: SimilarAggregator::default(),
|
||||||
@@ -230,6 +231,17 @@ impl super::Analytics for SegmentAnalytics {
|
|||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
documents_query: &DocumentEditionByFunction,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
) {
|
||||||
|
let aggregate =
|
||||||
|
EditDocumentsByFunctionAggregator::from_query(documents_query, index_creation, request);
|
||||||
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateEditDocumentsByFunction(aggregate));
|
||||||
|
}
|
||||||
|
|
||||||
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
|
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
|
||||||
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
|
||||||
@@ -249,6 +261,7 @@ impl super::Analytics for SegmentAnalytics {
|
|||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
struct Infos {
|
struct Infos {
|
||||||
env: String,
|
env: String,
|
||||||
|
experimental_contains_filter: bool,
|
||||||
experimental_enable_metrics: bool,
|
experimental_enable_metrics: bool,
|
||||||
experimental_search_queue_size: usize,
|
experimental_search_queue_size: usize,
|
||||||
experimental_logs_mode: LogMode,
|
experimental_logs_mode: LogMode,
|
||||||
@@ -291,6 +304,7 @@ impl From<Opt> for Infos {
|
|||||||
// Thus we must not insert `..` at the end.
|
// Thus we must not insert `..` at the end.
|
||||||
let Opt {
|
let Opt {
|
||||||
db_path,
|
db_path,
|
||||||
|
experimental_contains_filter,
|
||||||
experimental_enable_metrics,
|
experimental_enable_metrics,
|
||||||
experimental_search_queue_size,
|
experimental_search_queue_size,
|
||||||
experimental_logs_mode,
|
experimental_logs_mode,
|
||||||
@@ -341,6 +355,7 @@ impl From<Opt> for Infos {
|
|||||||
// We consider information sensible if it contains a path, an address, or a key.
|
// We consider information sensible if it contains a path, an address, or a key.
|
||||||
Self {
|
Self {
|
||||||
env,
|
env,
|
||||||
|
experimental_contains_filter,
|
||||||
experimental_enable_metrics,
|
experimental_enable_metrics,
|
||||||
experimental_search_queue_size,
|
experimental_search_queue_size,
|
||||||
experimental_logs_mode,
|
experimental_logs_mode,
|
||||||
@@ -390,6 +405,7 @@ pub struct Segment {
|
|||||||
add_documents_aggregator: DocumentsAggregator,
|
add_documents_aggregator: DocumentsAggregator,
|
||||||
delete_documents_aggregator: DocumentsDeletionAggregator,
|
delete_documents_aggregator: DocumentsDeletionAggregator,
|
||||||
update_documents_aggregator: DocumentsAggregator,
|
update_documents_aggregator: DocumentsAggregator,
|
||||||
|
edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator,
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
get_similar_aggregator: SimilarAggregator,
|
get_similar_aggregator: SimilarAggregator,
|
||||||
@@ -454,6 +470,7 @@ impl Segment {
|
|||||||
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||||
|
Some(AnalyticsMsg::AggregateEditDocumentsByFunction(agreg)) => self.edit_documents_by_function_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
|
||||||
@@ -509,6 +526,7 @@ impl Segment {
|
|||||||
add_documents_aggregator,
|
add_documents_aggregator,
|
||||||
delete_documents_aggregator,
|
delete_documents_aggregator,
|
||||||
update_documents_aggregator,
|
update_documents_aggregator,
|
||||||
|
edit_documents_by_function_aggregator,
|
||||||
get_fetch_documents_aggregator,
|
get_fetch_documents_aggregator,
|
||||||
post_fetch_documents_aggregator,
|
post_fetch_documents_aggregator,
|
||||||
get_similar_aggregator,
|
get_similar_aggregator,
|
||||||
@@ -550,6 +568,11 @@ impl Segment {
|
|||||||
{
|
{
|
||||||
let _ = self.batcher.push(update_documents).await;
|
let _ = self.batcher.push(update_documents).await;
|
||||||
}
|
}
|
||||||
|
if let Some(edit_documents_by_function) = take(edit_documents_by_function_aggregator)
|
||||||
|
.into_event(user, "Documents Edited By Function")
|
||||||
|
{
|
||||||
|
let _ = self.batcher.push(edit_documents_by_function).await;
|
||||||
|
}
|
||||||
if let Some(get_fetch_documents) =
|
if let Some(get_fetch_documents) =
|
||||||
take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET")
|
take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET")
|
||||||
{
|
{
|
||||||
@@ -597,6 +620,9 @@ pub struct SearchAggregator {
|
|||||||
// every time a request has a filter, this field must be incremented by one
|
// every time a request has a filter, this field must be incremented by one
|
||||||
sort_total_number_of_criteria: usize,
|
sort_total_number_of_criteria: usize,
|
||||||
|
|
||||||
|
// distinct
|
||||||
|
distinct: bool,
|
||||||
|
|
||||||
// filter
|
// filter
|
||||||
filter_with_geo_radius: bool,
|
filter_with_geo_radius: bool,
|
||||||
filter_with_geo_bounding_box: bool,
|
filter_with_geo_bounding_box: bool,
|
||||||
@@ -622,6 +648,7 @@ pub struct SearchAggregator {
|
|||||||
// Whether a non-default embedder was specified
|
// Whether a non-default embedder was specified
|
||||||
embedder: bool,
|
embedder: bool,
|
||||||
hybrid: bool,
|
hybrid: bool,
|
||||||
|
retrieve_vectors: bool,
|
||||||
|
|
||||||
// every time a search is done, we increment the counter linked to the used settings
|
// every time a search is done, we increment the counter linked to the used settings
|
||||||
matching_strategy: HashMap<String, usize>,
|
matching_strategy: HashMap<String, usize>,
|
||||||
@@ -662,6 +689,7 @@ impl SearchAggregator {
|
|||||||
page,
|
page,
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
attributes_to_retrieve: _,
|
attributes_to_retrieve: _,
|
||||||
|
retrieve_vectors,
|
||||||
attributes_to_crop: _,
|
attributes_to_crop: _,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight: _,
|
attributes_to_highlight: _,
|
||||||
@@ -670,6 +698,7 @@ impl SearchAggregator {
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
filter,
|
filter,
|
||||||
sort,
|
sort,
|
||||||
|
distinct,
|
||||||
facets: _,
|
facets: _,
|
||||||
highlight_pre_tag,
|
highlight_pre_tag,
|
||||||
highlight_post_tag,
|
highlight_post_tag,
|
||||||
@@ -692,6 +721,8 @@ impl SearchAggregator {
|
|||||||
ret.sort_sum_of_criteria_terms = sort.len();
|
ret.sort_sum_of_criteria_terms = sort.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret.distinct = distinct.is_some();
|
||||||
|
|
||||||
if let Some(ref filter) = filter {
|
if let Some(ref filter) = filter {
|
||||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
||||||
ret.filter_total_number_of_criteria = 1;
|
ret.filter_total_number_of_criteria = 1;
|
||||||
@@ -728,6 +759,7 @@ impl SearchAggregator {
|
|||||||
if let Some(ref vector) = vector {
|
if let Some(ref vector) = vector {
|
||||||
ret.max_vector_size = vector.len();
|
ret.max_vector_size = vector.len();
|
||||||
}
|
}
|
||||||
|
ret.retrieve_vectors |= retrieve_vectors;
|
||||||
|
|
||||||
if query.is_finite_pagination() {
|
if query.is_finite_pagination() {
|
||||||
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
@@ -795,6 +827,7 @@ impl SearchAggregator {
|
|||||||
sort_with_geo_point,
|
sort_with_geo_point,
|
||||||
sort_sum_of_criteria_terms,
|
sort_sum_of_criteria_terms,
|
||||||
sort_total_number_of_criteria,
|
sort_total_number_of_criteria,
|
||||||
|
distinct,
|
||||||
filter_with_geo_radius,
|
filter_with_geo_radius,
|
||||||
filter_with_geo_bounding_box,
|
filter_with_geo_bounding_box,
|
||||||
filter_sum_of_criteria_terms,
|
filter_sum_of_criteria_terms,
|
||||||
@@ -803,6 +836,7 @@ impl SearchAggregator {
|
|||||||
attributes_to_search_on_total_number_of_uses,
|
attributes_to_search_on_total_number_of_uses,
|
||||||
max_terms_number,
|
max_terms_number,
|
||||||
max_vector_size,
|
max_vector_size,
|
||||||
|
retrieve_vectors,
|
||||||
matching_strategy,
|
matching_strategy,
|
||||||
max_limit,
|
max_limit,
|
||||||
max_offset,
|
max_offset,
|
||||||
@@ -851,6 +885,9 @@ impl SearchAggregator {
|
|||||||
self.sort_total_number_of_criteria =
|
self.sort_total_number_of_criteria =
|
||||||
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
|
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
|
||||||
|
|
||||||
|
// distinct
|
||||||
|
self.distinct |= distinct;
|
||||||
|
|
||||||
// filter
|
// filter
|
||||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
self.filter_with_geo_radius |= filter_with_geo_radius;
|
||||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
||||||
@@ -873,6 +910,7 @@ impl SearchAggregator {
|
|||||||
|
|
||||||
// vector
|
// vector
|
||||||
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
||||||
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
self.semantic_ratio |= semantic_ratio;
|
self.semantic_ratio |= semantic_ratio;
|
||||||
self.hybrid |= hybrid;
|
self.hybrid |= hybrid;
|
||||||
self.embedder |= embedder;
|
self.embedder |= embedder;
|
||||||
@@ -921,6 +959,7 @@ impl SearchAggregator {
|
|||||||
sort_with_geo_point,
|
sort_with_geo_point,
|
||||||
sort_sum_of_criteria_terms,
|
sort_sum_of_criteria_terms,
|
||||||
sort_total_number_of_criteria,
|
sort_total_number_of_criteria,
|
||||||
|
distinct,
|
||||||
filter_with_geo_radius,
|
filter_with_geo_radius,
|
||||||
filter_with_geo_bounding_box,
|
filter_with_geo_bounding_box,
|
||||||
filter_sum_of_criteria_terms,
|
filter_sum_of_criteria_terms,
|
||||||
@@ -929,6 +968,7 @@ impl SearchAggregator {
|
|||||||
attributes_to_search_on_total_number_of_uses,
|
attributes_to_search_on_total_number_of_uses,
|
||||||
max_terms_number,
|
max_terms_number,
|
||||||
max_vector_size,
|
max_vector_size,
|
||||||
|
retrieve_vectors,
|
||||||
matching_strategy,
|
matching_strategy,
|
||||||
max_limit,
|
max_limit,
|
||||||
max_offset,
|
max_offset,
|
||||||
@@ -977,6 +1017,7 @@ impl SearchAggregator {
|
|||||||
"with_geoPoint": sort_with_geo_point,
|
"with_geoPoint": sort_with_geo_point,
|
||||||
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
|
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
|
||||||
},
|
},
|
||||||
|
"distinct": distinct,
|
||||||
"filter": {
|
"filter": {
|
||||||
"with_geoRadius": filter_with_geo_radius,
|
"with_geoRadius": filter_with_geo_radius,
|
||||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
||||||
@@ -991,6 +1032,7 @@ impl SearchAggregator {
|
|||||||
},
|
},
|
||||||
"vector": {
|
"vector": {
|
||||||
"max_vector_size": max_vector_size,
|
"max_vector_size": max_vector_size,
|
||||||
|
"retrieve_vectors": retrieve_vectors,
|
||||||
},
|
},
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"enabled": hybrid,
|
"enabled": hybrid,
|
||||||
@@ -1056,22 +1098,33 @@ pub struct MultiSearchAggregator {
|
|||||||
show_ranking_score: bool,
|
show_ranking_score: bool,
|
||||||
show_ranking_score_details: bool,
|
show_ranking_score_details: bool,
|
||||||
|
|
||||||
|
// federation
|
||||||
|
use_federation: bool,
|
||||||
|
|
||||||
// context
|
// context
|
||||||
user_agents: HashSet<String>,
|
user_agents: HashSet<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MultiSearchAggregator {
|
impl MultiSearchAggregator {
|
||||||
pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
|
pub fn from_federated_search(
|
||||||
|
federated_search: &FederatedSearch,
|
||||||
|
request: &HttpRequest,
|
||||||
|
) -> Self {
|
||||||
let timestamp = Some(OffsetDateTime::now_utc());
|
let timestamp = Some(OffsetDateTime::now_utc());
|
||||||
|
|
||||||
let user_agents = extract_user_agents(request).into_iter().collect();
|
let user_agents = extract_user_agents(request).into_iter().collect();
|
||||||
|
|
||||||
let distinct_indexes: HashSet<_> = query
|
let use_federation = federated_search.federation.is_some();
|
||||||
|
|
||||||
|
let distinct_indexes: HashSet<_> = federated_search
|
||||||
|
.queries
|
||||||
.iter()
|
.iter()
|
||||||
.map(|query| {
|
.map(|query| {
|
||||||
|
let query = &query;
|
||||||
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
||||||
let SearchQueryWithIndex {
|
let SearchQueryWithIndex {
|
||||||
index_uid,
|
index_uid,
|
||||||
|
federation_options: _,
|
||||||
q: _,
|
q: _,
|
||||||
vector: _,
|
vector: _,
|
||||||
offset: _,
|
offset: _,
|
||||||
@@ -1079,6 +1132,7 @@ impl MultiSearchAggregator {
|
|||||||
page: _,
|
page: _,
|
||||||
hits_per_page: _,
|
hits_per_page: _,
|
||||||
attributes_to_retrieve: _,
|
attributes_to_retrieve: _,
|
||||||
|
retrieve_vectors: _,
|
||||||
attributes_to_crop: _,
|
attributes_to_crop: _,
|
||||||
crop_length: _,
|
crop_length: _,
|
||||||
attributes_to_highlight: _,
|
attributes_to_highlight: _,
|
||||||
@@ -1087,6 +1141,7 @@ impl MultiSearchAggregator {
|
|||||||
show_matches_position: _,
|
show_matches_position: _,
|
||||||
filter: _,
|
filter: _,
|
||||||
sort: _,
|
sort: _,
|
||||||
|
distinct: _,
|
||||||
facets: _,
|
facets: _,
|
||||||
highlight_pre_tag: _,
|
highlight_pre_tag: _,
|
||||||
highlight_post_tag: _,
|
highlight_post_tag: _,
|
||||||
@@ -1101,8 +1156,10 @@ impl MultiSearchAggregator {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let show_ranking_score = query.iter().any(|query| query.show_ranking_score);
|
let show_ranking_score =
|
||||||
let show_ranking_score_details = query.iter().any(|query| query.show_ranking_score_details);
|
federated_search.queries.iter().any(|query| query.show_ranking_score);
|
||||||
|
let show_ranking_score_details =
|
||||||
|
federated_search.queries.iter().any(|query| query.show_ranking_score_details);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
timestamp,
|
timestamp,
|
||||||
@@ -1110,10 +1167,11 @@ impl MultiSearchAggregator {
|
|||||||
total_succeeded: 0,
|
total_succeeded: 0,
|
||||||
total_distinct_index_count: distinct_indexes.len(),
|
total_distinct_index_count: distinct_indexes.len(),
|
||||||
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
||||||
total_search_count: query.len(),
|
total_search_count: federated_search.queries.len(),
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
user_agents,
|
user_agents,
|
||||||
|
use_federation,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1139,6 +1197,7 @@ impl MultiSearchAggregator {
|
|||||||
let show_ranking_score_details =
|
let show_ranking_score_details =
|
||||||
this.show_ranking_score_details || other.show_ranking_score_details;
|
this.show_ranking_score_details || other.show_ranking_score_details;
|
||||||
let mut user_agents = this.user_agents;
|
let mut user_agents = this.user_agents;
|
||||||
|
let use_federation = this.use_federation || other.use_federation;
|
||||||
|
|
||||||
for user_agent in other.user_agents.into_iter() {
|
for user_agent in other.user_agents.into_iter() {
|
||||||
user_agents.insert(user_agent);
|
user_agents.insert(user_agent);
|
||||||
@@ -1155,6 +1214,7 @@ impl MultiSearchAggregator {
|
|||||||
user_agents,
|
user_agents,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
|
use_federation,
|
||||||
// do not add _ or ..Default::default() here
|
// do not add _ or ..Default::default() here
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1173,6 +1233,7 @@ impl MultiSearchAggregator {
|
|||||||
user_agents,
|
user_agents,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
|
use_federation,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if total_received == 0 {
|
if total_received == 0 {
|
||||||
@@ -1197,6 +1258,9 @@ impl MultiSearchAggregator {
|
|||||||
"scoring": {
|
"scoring": {
|
||||||
"show_ranking_score": show_ranking_score,
|
"show_ranking_score": show_ranking_score,
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
|
},
|
||||||
|
"federation": {
|
||||||
|
"use_federation": use_federation,
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1445,6 +1509,75 @@ impl DocumentsAggregator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct EditDocumentsByFunctionAggregator {
|
||||||
|
timestamp: Option<OffsetDateTime>,
|
||||||
|
|
||||||
|
// Set to true if at least one request was filtered
|
||||||
|
filtered: bool,
|
||||||
|
// Set to true if at least one request contained a context
|
||||||
|
with_context: bool,
|
||||||
|
|
||||||
|
// context
|
||||||
|
user_agents: HashSet<String>,
|
||||||
|
|
||||||
|
index_creation: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EditDocumentsByFunctionAggregator {
|
||||||
|
pub fn from_query(
|
||||||
|
documents_query: &DocumentEditionByFunction,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
) -> Self {
|
||||||
|
let DocumentEditionByFunction { filter, context, function: _ } = documents_query;
|
||||||
|
|
||||||
|
Self {
|
||||||
|
timestamp: Some(OffsetDateTime::now_utc()),
|
||||||
|
user_agents: extract_user_agents(request).into_iter().collect(),
|
||||||
|
filtered: filter.is_some(),
|
||||||
|
with_context: context.is_some(),
|
||||||
|
index_creation,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregate one [DocumentsAggregator] into another.
|
||||||
|
pub fn aggregate(&mut self, other: Self) {
|
||||||
|
let Self { timestamp, user_agents, index_creation, filtered, with_context } = other;
|
||||||
|
|
||||||
|
if self.timestamp.is_none() {
|
||||||
|
self.timestamp = timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we can't create a union because there is no `into_union` method
|
||||||
|
for user_agent in user_agents {
|
||||||
|
self.user_agents.insert(user_agent);
|
||||||
|
}
|
||||||
|
self.index_creation |= index_creation;
|
||||||
|
self.filtered |= filtered;
|
||||||
|
self.with_context |= with_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||||
|
let Self { timestamp, user_agents, index_creation, filtered, with_context } = self;
|
||||||
|
|
||||||
|
let properties = json!({
|
||||||
|
"user-agent": user_agents,
|
||||||
|
"filtered": filtered,
|
||||||
|
"with_context": with_context,
|
||||||
|
"index_creation": index_creation,
|
||||||
|
});
|
||||||
|
|
||||||
|
Some(Track {
|
||||||
|
timestamp,
|
||||||
|
user: user.clone(),
|
||||||
|
event: event_name.to_string(),
|
||||||
|
properties,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default, Serialize)]
|
#[derive(Default, Serialize)]
|
||||||
pub struct DocumentsDeletionAggregator {
|
pub struct DocumentsDeletionAggregator {
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
@@ -1534,6 +1667,9 @@ pub struct DocumentsFetchAggregator {
|
|||||||
// if a filter was used
|
// if a filter was used
|
||||||
per_filter: bool,
|
per_filter: bool,
|
||||||
|
|
||||||
|
#[serde(rename = "vector.retrieve_vectors")]
|
||||||
|
retrieve_vectors: bool,
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
#[serde(rename = "pagination.max_limit")]
|
#[serde(rename = "pagination.max_limit")]
|
||||||
max_limit: usize,
|
max_limit: usize,
|
||||||
@@ -1543,18 +1679,21 @@ pub struct DocumentsFetchAggregator {
|
|||||||
|
|
||||||
impl DocumentsFetchAggregator {
|
impl DocumentsFetchAggregator {
|
||||||
pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
|
pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
|
||||||
let (limit, offset) = match query {
|
let (limit, offset, retrieve_vectors) = match query {
|
||||||
DocumentFetchKind::PerDocumentId => (1, 0),
|
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
|
||||||
DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
|
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
|
||||||
|
(*limit, *offset, *retrieve_vectors)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
Self {
|
Self {
|
||||||
timestamp: Some(OffsetDateTime::now_utc()),
|
timestamp: Some(OffsetDateTime::now_utc()),
|
||||||
user_agents: extract_user_agents(request).into_iter().collect(),
|
user_agents: extract_user_agents(request).into_iter().collect(),
|
||||||
total_received: 1,
|
total_received: 1,
|
||||||
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
|
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
|
||||||
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
||||||
max_limit: limit,
|
max_limit: limit,
|
||||||
max_offset: offset,
|
max_offset: offset,
|
||||||
|
retrieve_vectors,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1568,6 +1707,7 @@ impl DocumentsFetchAggregator {
|
|||||||
per_filter,
|
per_filter,
|
||||||
max_limit,
|
max_limit,
|
||||||
max_offset,
|
max_offset,
|
||||||
|
retrieve_vectors,
|
||||||
} = other;
|
} = other;
|
||||||
|
|
||||||
if self.timestamp.is_none() {
|
if self.timestamp.is_none() {
|
||||||
@@ -1583,6 +1723,8 @@ impl DocumentsFetchAggregator {
|
|||||||
|
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
self.max_limit = self.max_limit.max(max_limit);
|
||||||
self.max_offset = self.max_offset.max(max_offset);
|
self.max_offset = self.max_offset.max(max_offset);
|
||||||
|
|
||||||
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||||
@@ -1623,6 +1765,7 @@ pub struct SimilarAggregator {
|
|||||||
|
|
||||||
// Whether a non-default embedder was specified
|
// Whether a non-default embedder was specified
|
||||||
embedder: bool,
|
embedder: bool,
|
||||||
|
retrieve_vectors: bool,
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
max_limit: usize,
|
max_limit: usize,
|
||||||
@@ -1646,6 +1789,7 @@ impl SimilarAggregator {
|
|||||||
offset,
|
offset,
|
||||||
limit,
|
limit,
|
||||||
attributes_to_retrieve: _,
|
attributes_to_retrieve: _,
|
||||||
|
retrieve_vectors,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
filter,
|
filter,
|
||||||
@@ -1690,6 +1834,7 @@ impl SimilarAggregator {
|
|||||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||||
|
|
||||||
ret.embedder = embedder.is_some();
|
ret.embedder = embedder.is_some();
|
||||||
|
ret.retrieve_vectors = *retrieve_vectors;
|
||||||
|
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
@@ -1722,6 +1867,7 @@ impl SimilarAggregator {
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
embedder,
|
embedder,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
retrieve_vectors,
|
||||||
} = other;
|
} = other;
|
||||||
|
|
||||||
if self.timestamp.is_none() {
|
if self.timestamp.is_none() {
|
||||||
@@ -1751,6 +1897,7 @@ impl SimilarAggregator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.embedder |= embedder;
|
self.embedder |= embedder;
|
||||||
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
self.max_limit = self.max_limit.max(max_limit);
|
||||||
@@ -1785,6 +1932,7 @@ impl SimilarAggregator {
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
embedder,
|
embedder,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
retrieve_vectors,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if total_received == 0 {
|
if total_received == 0 {
|
||||||
@@ -1811,6 +1959,9 @@ impl SimilarAggregator {
|
|||||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
||||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
},
|
},
|
||||||
|
"vector": {
|
||||||
|
"retrieve_vectors": retrieve_vectors,
|
||||||
|
},
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"embedder": embedder,
|
"embedder": embedder,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use actix_web as aweb;
|
use actix_web as aweb;
|
||||||
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
||||||
use byte_unit::Byte;
|
use byte_unit::{Byte, UnitType};
|
||||||
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
||||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||||
@@ -27,13 +27,17 @@ pub enum MeilisearchHttpError {
|
|||||||
EmptyFilter,
|
EmptyFilter,
|
||||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||||
InvalidExpression(&'static [&'static str], Value),
|
InvalidExpression(&'static [&'static str], Value),
|
||||||
|
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
|
||||||
|
FederationOptionsInNonFederatedRequest(usize),
|
||||||
|
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
|
||||||
|
PaginationInFederatedQuery(usize, &'static str),
|
||||||
#[error("A {0} payload is missing.")]
|
#[error("A {0} payload is missing.")]
|
||||||
MissingPayload(PayloadType),
|
MissingPayload(PayloadType),
|
||||||
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
|
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
|
||||||
TooManySearchRequests(usize),
|
TooManySearchRequests(usize),
|
||||||
#[error("Internal error: Search limiter is down.")]
|
#[error("Internal error: Search limiter is down.")]
|
||||||
SearchLimiterIsDown,
|
SearchLimiterIsDown,
|
||||||
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
|
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
|
||||||
PayloadTooLarge(usize),
|
PayloadTooLarge(usize),
|
||||||
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
|
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
|
||||||
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
||||||
@@ -86,6 +90,12 @@ impl ErrorCode for MeilisearchHttpError {
|
|||||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||||
|
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
||||||
|
Code::InvalidMultiSearchFederationOptions
|
||||||
|
}
|
||||||
|
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
|
||||||
|
Code::InvalidMultiSearchQueryPagination
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -98,14 +108,29 @@ impl From<MeilisearchHttpError> for aweb::Error {
|
|||||||
|
|
||||||
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
|
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
|
||||||
fn from(error: aweb::error::PayloadError) -> Self {
|
fn from(error: aweb::error::PayloadError) -> Self {
|
||||||
MeilisearchHttpError::Payload(PayloadError::Payload(error))
|
match error {
|
||||||
|
aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
|
||||||
|
PayloadError::Payload(ActixPayloadError::IncompleteError),
|
||||||
|
),
|
||||||
|
_ => MeilisearchHttpError::Payload(PayloadError::Payload(
|
||||||
|
ActixPayloadError::OtherError(error),
|
||||||
|
)),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum ActixPayloadError {
|
||||||
|
#[error("The provided payload is incomplete and cannot be parsed")]
|
||||||
|
IncompleteError,
|
||||||
|
#[error(transparent)]
|
||||||
|
OtherError(aweb::error::PayloadError),
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum PayloadError {
|
pub enum PayloadError {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Payload(aweb::error::PayloadError),
|
Payload(ActixPayloadError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Json(JsonPayloadError),
|
Json(JsonPayloadError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@@ -122,13 +147,15 @@ impl ErrorCode for PayloadError {
|
|||||||
fn error_code(&self) -> Code {
|
fn error_code(&self) -> Code {
|
||||||
match self {
|
match self {
|
||||||
PayloadError::Payload(e) => match e {
|
PayloadError::Payload(e) => match e {
|
||||||
aweb::error::PayloadError::Incomplete(_) => Code::Internal,
|
ActixPayloadError::IncompleteError => Code::BadRequest,
|
||||||
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
|
ActixPayloadError::OtherError(error) => match error {
|
||||||
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
|
||||||
aweb::error::PayloadError::UnknownLength => Code::Internal,
|
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
||||||
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
|
aweb::error::PayloadError::UnknownLength => Code::Internal,
|
||||||
aweb::error::PayloadError::Io(_) => Code::Internal,
|
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
|
||||||
_ => todo!(),
|
aweb::error::PayloadError::Io(_) => Code::Internal,
|
||||||
|
_ => todo!(),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
PayloadError::Json(err) => match err {
|
PayloadError::Json(err) => match err {
|
||||||
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
|
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ use futures::Future;
|
|||||||
use meilisearch_auth::{AuthController, AuthFilter};
|
use meilisearch_auth::{AuthController, AuthFilter};
|
||||||
use meilisearch_types::error::{Code, ResponseError};
|
use meilisearch_types::error::{Code, ResponseError};
|
||||||
|
|
||||||
|
use self::policies::AuthError;
|
||||||
|
|
||||||
pub struct GuardedData<P, D> {
|
pub struct GuardedData<P, D> {
|
||||||
data: D,
|
data: D,
|
||||||
filters: AuthFilter,
|
filters: AuthFilter,
|
||||||
@@ -35,12 +37,12 @@ impl<P, D> GuardedData<P, D> {
|
|||||||
let missing_master_key = auth.get_master_key().is_none();
|
let missing_master_key = auth.get_master_key().is_none();
|
||||||
|
|
||||||
match Self::authenticate(auth, token, index).await? {
|
match Self::authenticate(auth, token, index).await? {
|
||||||
Some(filters) => match data {
|
Ok(filters) => match data {
|
||||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||||
},
|
},
|
||||||
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||||
None => Err(AuthenticationError::InvalidToken.into()),
|
Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,12 +53,12 @@ impl<P, D> GuardedData<P, D> {
|
|||||||
let missing_master_key = auth.get_master_key().is_none();
|
let missing_master_key = auth.get_master_key().is_none();
|
||||||
|
|
||||||
match Self::authenticate(auth, String::new(), None).await? {
|
match Self::authenticate(auth, String::new(), None).await? {
|
||||||
Some(filters) => match data {
|
Ok(filters) => match data {
|
||||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||||
},
|
},
|
||||||
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||||
None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
|
Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -64,7 +66,7 @@ impl<P, D> GuardedData<P, D> {
|
|||||||
auth: Data<AuthController>,
|
auth: Data<AuthController>,
|
||||||
token: String,
|
token: String,
|
||||||
index: Option<String>,
|
index: Option<String>,
|
||||||
) -> Result<Option<AuthFilter>, ResponseError>
|
) -> Result<Result<AuthFilter, AuthError>, ResponseError>
|
||||||
where
|
where
|
||||||
P: Policy + 'static,
|
P: Policy + 'static,
|
||||||
{
|
{
|
||||||
@@ -127,13 +129,14 @@ pub trait Policy {
|
|||||||
auth: Data<AuthController>,
|
auth: Data<AuthController>,
|
||||||
token: &str,
|
token: &str,
|
||||||
index: Option<&str>,
|
index: Option<&str>,
|
||||||
) -> Option<AuthFilter>;
|
) -> Result<AuthFilter, policies::AuthError>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod policies {
|
pub mod policies {
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||||
|
use meilisearch_types::error::{Code, ErrorCode};
|
||||||
// reexport actions in policies in order to be used in routes configuration.
|
// reexport actions in policies in order to be used in routes configuration.
|
||||||
pub use meilisearch_types::keys::{actions, Action};
|
pub use meilisearch_types::keys::{actions, Action};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -144,11 +147,53 @@ pub mod policies {
|
|||||||
|
|
||||||
enum TenantTokenOutcome {
|
enum TenantTokenOutcome {
|
||||||
NotATenantToken,
|
NotATenantToken,
|
||||||
Invalid,
|
|
||||||
Expired,
|
|
||||||
Valid(Uuid, SearchRules),
|
Valid(Uuid, SearchRules),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum AuthError {
|
||||||
|
#[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
|
||||||
|
ExpiredTenantToken { exp: i64, now: i64 },
|
||||||
|
#[error("The provided API key is invalid.")]
|
||||||
|
InvalidApiKey,
|
||||||
|
#[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
|
||||||
|
TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
|
||||||
|
#[error(
|
||||||
|
"The API key used to generate this tenant token cannot acces the index `{index}`."
|
||||||
|
)]
|
||||||
|
TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
|
||||||
|
#[error(
|
||||||
|
"The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
|
||||||
|
)]
|
||||||
|
ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
|
||||||
|
#[error("The provided tenant token is invalid.")]
|
||||||
|
InvalidTenantToken,
|
||||||
|
#[error("Could not decode tenant token, {0}.")]
|
||||||
|
CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
|
||||||
|
#[error("Invalid action `{0}`.")]
|
||||||
|
InternalInvalidAction(u8),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<jsonwebtoken::errors::Error> for AuthError {
|
||||||
|
fn from(error: jsonwebtoken::errors::Error) -> Self {
|
||||||
|
use jsonwebtoken::errors::ErrorKind;
|
||||||
|
|
||||||
|
match error.kind() {
|
||||||
|
ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
|
||||||
|
_ => AuthError::CouldNotDecodeTenantToken(error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ErrorCode for AuthError {
|
||||||
|
fn error_code(&self) -> Code {
|
||||||
|
match self {
|
||||||
|
AuthError::InternalInvalidAction(_) => Code::Internal,
|
||||||
|
_ => Code::InvalidApiKey,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn tenant_token_validation() -> Validation {
|
fn tenant_token_validation() -> Validation {
|
||||||
let mut validation = Validation::default();
|
let mut validation = Validation::default();
|
||||||
validation.validate_exp = false;
|
validation.validate_exp = false;
|
||||||
@@ -158,15 +203,15 @@ pub mod policies {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts the key id used to sign the payload, without performing any validation.
|
/// Extracts the key id used to sign the payload, without performing any validation.
|
||||||
fn extract_key_id(token: &str) -> Option<Uuid> {
|
fn extract_key_id(token: &str) -> Result<Uuid, AuthError> {
|
||||||
let mut validation = tenant_token_validation();
|
let mut validation = tenant_token_validation();
|
||||||
validation.insecure_disable_signature_validation();
|
validation.insecure_disable_signature_validation();
|
||||||
let dummy_key = DecodingKey::from_secret(b"secret");
|
let dummy_key = DecodingKey::from_secret(b"secret");
|
||||||
let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
|
let token_data = decode::<Claims>(token, &dummy_key, &validation)?;
|
||||||
|
|
||||||
// get token fields without validating it.
|
// get token fields without validating it.
|
||||||
let Claims { api_key_uid, .. } = token_data.claims;
|
let Claims { api_key_uid, .. } = token_data.claims;
|
||||||
Some(api_key_uid)
|
Ok(api_key_uid)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_keys_action(action: u8) -> bool {
|
fn is_keys_action(action: u8) -> bool {
|
||||||
@@ -187,76 +232,102 @@ pub mod policies {
|
|||||||
auth: Data<AuthController>,
|
auth: Data<AuthController>,
|
||||||
token: &str,
|
token: &str,
|
||||||
index: Option<&str>,
|
index: Option<&str>,
|
||||||
) -> Option<AuthFilter> {
|
) -> Result<AuthFilter, AuthError> {
|
||||||
// authenticate if token is the master key.
|
// authenticate if token is the master key.
|
||||||
// Without a master key, all routes are accessible except the key-related routes.
|
// Without a master key, all routes are accessible except the key-related routes.
|
||||||
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
|
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
|
||||||
return Some(AuthFilter::default());
|
return Ok(AuthFilter::default());
|
||||||
}
|
}
|
||||||
|
|
||||||
let (key_uuid, search_rules) =
|
let (key_uuid, search_rules) =
|
||||||
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
|
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
|
||||||
TenantTokenOutcome::Valid(key_uuid, search_rules) => {
|
Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => {
|
||||||
(key_uuid, Some(search_rules))
|
(key_uuid, Some(search_rules))
|
||||||
}
|
}
|
||||||
TenantTokenOutcome::Expired => return None,
|
Ok(TenantTokenOutcome::NotATenantToken)
|
||||||
TenantTokenOutcome::Invalid => return None,
|
| Err(AuthError::InvalidTenantToken) => (
|
||||||
TenantTokenOutcome::NotATenantToken => {
|
auth.get_optional_uid_from_encoded_key(token.as_bytes())
|
||||||
(auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
|
.map_err(|_e| AuthError::InvalidApiKey)?
|
||||||
}
|
.ok_or(AuthError::InvalidApiKey)?,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
Err(e) => return Err(e),
|
||||||
};
|
};
|
||||||
|
|
||||||
// check that the indexes are allowed
|
// check that the indexes are allowed
|
||||||
let action = Action::from_repr(A)?;
|
let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?;
|
||||||
let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
|
let auth_filter = auth
|
||||||
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
|
.get_key_filters(key_uuid, search_rules)
|
||||||
&& index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
|
.map_err(|_e| AuthError::InvalidApiKey)?;
|
||||||
{
|
|
||||||
return Some(auth_filter);
|
// First check if the index is authorized in the tenant token, this is a public
|
||||||
|
// information, we can return a nice error message.
|
||||||
|
if let Some(index) = index {
|
||||||
|
if !auth_filter.tenant_token_is_index_authorized(index) {
|
||||||
|
return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
|
||||||
|
index: index.to_string(),
|
||||||
|
allowed: auth_filter.tenant_token_list_index_authorized(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if !auth_filter.api_key_is_index_authorized(index) {
|
||||||
|
if auth_filter.is_tenant_token() {
|
||||||
|
// If the error comes from a tenant token we cannot share the list
|
||||||
|
// of authorized indexes in the API key. This is not public information.
|
||||||
|
return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
|
||||||
|
index: index.to_string(),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Otherwise we can share the list
|
||||||
|
// of authorized indexes in the API key.
|
||||||
|
return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
|
||||||
|
index: index.to_string(),
|
||||||
|
allowed: auth_filter.api_key_list_index_authorized(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
|
||||||
|
return Ok(auth_filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
None
|
Err(AuthError::InvalidApiKey)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<const A: u8> ActionPolicy<A> {
|
impl<const A: u8> ActionPolicy<A> {
|
||||||
fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
|
fn authenticate_tenant_token(
|
||||||
|
auth: &AuthController,
|
||||||
|
token: &str,
|
||||||
|
) -> Result<TenantTokenOutcome, AuthError> {
|
||||||
// Only search action can be accessed by a tenant token.
|
// Only search action can be accessed by a tenant token.
|
||||||
if A != actions::SEARCH {
|
if A != actions::SEARCH {
|
||||||
return TenantTokenOutcome::NotATenantToken;
|
return Ok(TenantTokenOutcome::NotATenantToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
let uid = if let Some(uid) = extract_key_id(token) {
|
let uid = extract_key_id(token)?;
|
||||||
uid
|
|
||||||
} else {
|
|
||||||
return TenantTokenOutcome::NotATenantToken;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check if tenant token is valid.
|
// Check if tenant token is valid.
|
||||||
let key = if let Some(key) = auth.generate_key(uid) {
|
let key = if let Some(key) = auth.generate_key(uid) {
|
||||||
key
|
key
|
||||||
} else {
|
} else {
|
||||||
return TenantTokenOutcome::Invalid;
|
return Err(AuthError::InvalidTenantToken);
|
||||||
};
|
};
|
||||||
|
|
||||||
let data = if let Ok(data) = decode::<Claims>(
|
let data = decode::<Claims>(
|
||||||
token,
|
token,
|
||||||
&DecodingKey::from_secret(key.as_bytes()),
|
&DecodingKey::from_secret(key.as_bytes()),
|
||||||
&tenant_token_validation(),
|
&tenant_token_validation(),
|
||||||
) {
|
)?;
|
||||||
data
|
|
||||||
} else {
|
|
||||||
return TenantTokenOutcome::Invalid;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check if token is expired.
|
// Check if token is expired.
|
||||||
if let Some(exp) = data.claims.exp {
|
if let Some(exp) = data.claims.exp {
|
||||||
if OffsetDateTime::now_utc().unix_timestamp() > exp {
|
let now = OffsetDateTime::now_utc().unix_timestamp();
|
||||||
return TenantTokenOutcome::Expired;
|
if now > exp {
|
||||||
|
return Err(AuthError::ExpiredTenantToken { exp, now });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TenantTokenOutcome::Valid(uid, data.claims.search_rules)
|
Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ use std::fs::File;
|
|||||||
use std::io::{BufReader, BufWriter};
|
use std::io::{BufReader, BufWriter};
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread::{self, available_parallelism};
|
use std::thread::{self, available_parallelism};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
@@ -23,13 +24,13 @@ use actix_cors::Cors;
|
|||||||
use actix_http::body::MessageBody;
|
use actix_http::body::MessageBody;
|
||||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||||
use actix_web::error::JsonPayloadError;
|
use actix_web::error::JsonPayloadError;
|
||||||
|
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpRequest};
|
use actix_web::{web, HttpRequest};
|
||||||
use analytics::Analytics;
|
use analytics::Analytics;
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use error::PayloadError;
|
use error::PayloadError;
|
||||||
use extractors::payload::PayloadConfig;
|
use extractors::payload::PayloadConfig;
|
||||||
use http::header::CONTENT_TYPE;
|
|
||||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||||
use meilisearch_auth::AuthController;
|
use meilisearch_auth::AuthController;
|
||||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
@@ -167,7 +168,7 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
|
|||||||
let conn_info = request.connection_info();
|
let conn_info = request.connection_info();
|
||||||
let headers = request.headers();
|
let headers = request.headers();
|
||||||
let user_agent = headers
|
let user_agent = headers
|
||||||
.get(http::header::USER_AGENT)
|
.get(USER_AGENT)
|
||||||
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
|
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
|
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
|
||||||
@@ -300,15 +301,15 @@ fn open_or_create_database_unchecked(
|
|||||||
dumps_path: opt.dump_dir.clone(),
|
dumps_path: opt.dump_dir.clone(),
|
||||||
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
||||||
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
||||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
task_db_size: opt.max_task_db_size.as_u64() as usize,
|
||||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
index_base_map_size: opt.max_index_size.as_u64() as usize,
|
||||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||||
indexer_config: (&opt.indexer_options).try_into()?,
|
indexer_config: (&opt.indexer_options).try_into()?,
|
||||||
autobatching_enabled: true,
|
autobatching_enabled: true,
|
||||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||||
max_number_of_tasks: 1_000_000,
|
max_number_of_tasks: 1_000_000,
|
||||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
|
||||||
index_count: DEFAULT_INDEX_COUNT,
|
index_count: DEFAULT_INDEX_COUNT,
|
||||||
instance_features,
|
instance_features,
|
||||||
})?)
|
})?)
|
||||||
@@ -476,7 +477,7 @@ pub fn configure_data(
|
|||||||
opt.experimental_search_queue_size,
|
opt.experimental_search_queue_size,
|
||||||
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
||||||
);
|
);
|
||||||
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||||
config
|
config
|
||||||
.app_data(index_scheduler)
|
.app_data(index_scheduler)
|
||||||
.app_data(auth)
|
.app_data(auth)
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ async fn run_http(
|
|||||||
.keep_alive(KeepAlive::Os);
|
.keep_alive(KeepAlive::Os);
|
||||||
|
|
||||||
if let Some(config) = opt_clone.get_ssl_config()? {
|
if let Some(config) = opt_clone.get_ssl_config()? {
|
||||||
http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?;
|
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?;
|
||||||
} else {
|
} else {
|
||||||
http_server.bind(&opt_clone.http_addr)?.run().await?;
|
http_server.bind(&opt_clone.http_addr)?.run().await?;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,16 +9,14 @@ use std::str::FromStr;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::{env, fmt, fs};
|
use std::{env, fmt, fs};
|
||||||
|
|
||||||
use byte_unit::{Byte, ByteError};
|
use byte_unit::{Byte, ParseError, UnitType};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch_types::features::InstanceTogglableFeatures;
|
use meilisearch_types::features::InstanceTogglableFeatures;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
|
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
|
||||||
use rustls::server::{
|
use rustls::server::{ServerSessionMemoryCache, WebPkiClientVerifier};
|
||||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
|
|
||||||
};
|
|
||||||
use rustls::RootCertStore;
|
use rustls::RootCertStore;
|
||||||
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
use rustls_pemfile::{certs, rsa_private_keys};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sysinfo::{MemoryRefreshKind, RefreshKind, System};
|
use sysinfo::{MemoryRefreshKind, RefreshKind, System};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
@@ -54,6 +52,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
|||||||
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
|
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
|
||||||
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
||||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||||
|
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||||
@@ -339,6 +338,13 @@ pub struct Opt {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub log_level: LogLevel,
|
pub log_level: LogLevel,
|
||||||
|
|
||||||
|
/// Experimental contains filter feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/763>
|
||||||
|
///
|
||||||
|
/// Enables the experimental contains filter operator.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_CONTAINS_FILTER)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub experimental_contains_filter: bool,
|
||||||
|
|
||||||
/// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
|
/// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
|
||||||
///
|
///
|
||||||
/// Enables the Prometheus metrics on the `GET /metrics` endpoint.
|
/// Enables the Prometheus metrics on the `GET /metrics` endpoint.
|
||||||
@@ -483,6 +489,7 @@ impl Opt {
|
|||||||
config_file_path: _,
|
config_file_path: _,
|
||||||
#[cfg(feature = "analytics")]
|
#[cfg(feature = "analytics")]
|
||||||
no_analytics,
|
no_analytics,
|
||||||
|
experimental_contains_filter,
|
||||||
experimental_enable_metrics,
|
experimental_enable_metrics,
|
||||||
experimental_search_queue_size,
|
experimental_search_queue_size,
|
||||||
experimental_logs_mode,
|
experimental_logs_mode,
|
||||||
@@ -540,6 +547,10 @@ impl Opt {
|
|||||||
|
|
||||||
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
|
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
|
||||||
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
|
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_CONTAINS_FILTER,
|
||||||
|
experimental_contains_filter.to_string(),
|
||||||
|
);
|
||||||
export_to_env_if_not_present(
|
export_to_env_if_not_present(
|
||||||
MEILI_EXPERIMENTAL_ENABLE_METRICS,
|
MEILI_EXPERIMENTAL_ENABLE_METRICS,
|
||||||
experimental_enable_metrics.to_string(),
|
experimental_enable_metrics.to_string(),
|
||||||
@@ -569,23 +580,21 @@ impl Opt {
|
|||||||
|
|
||||||
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
|
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
|
||||||
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
|
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
|
||||||
let config = rustls::ServerConfig::builder().with_safe_defaults();
|
let config = rustls::ServerConfig::builder();
|
||||||
|
|
||||||
let config = match &self.ssl_auth_path {
|
let config = match &self.ssl_auth_path {
|
||||||
Some(auth_path) => {
|
Some(auth_path) => {
|
||||||
let roots = load_certs(auth_path.to_path_buf())?;
|
let roots = load_certs(auth_path.to_path_buf())?;
|
||||||
let mut client_auth_roots = RootCertStore::empty();
|
let mut client_auth_roots = RootCertStore::empty();
|
||||||
for root in roots {
|
for root in roots {
|
||||||
client_auth_roots.add(&root).unwrap();
|
client_auth_roots.add(root).unwrap();
|
||||||
}
|
}
|
||||||
if self.ssl_require_auth {
|
let mut client_verifier =
|
||||||
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
|
WebPkiClientVerifier::builder(client_auth_roots.into());
|
||||||
config.with_client_cert_verifier(Arc::from(verifier))
|
if !self.ssl_require_auth {
|
||||||
} else {
|
client_verifier = client_verifier.allow_unauthenticated();
|
||||||
let verifier =
|
|
||||||
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
|
|
||||||
config.with_client_cert_verifier(Arc::from(verifier))
|
|
||||||
}
|
}
|
||||||
|
config.with_client_cert_verifier(client_verifier.build()?)
|
||||||
}
|
}
|
||||||
None => config.with_no_client_auth(),
|
None => config.with_no_client_auth(),
|
||||||
};
|
};
|
||||||
@@ -594,7 +603,7 @@ impl Opt {
|
|||||||
let privkey = load_private_key(key_path.to_path_buf())?;
|
let privkey = load_private_key(key_path.to_path_buf())?;
|
||||||
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
|
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
|
||||||
let mut config = config
|
let mut config = config
|
||||||
.with_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
|
.with_single_cert_with_ocsp(certs, privkey, ocsp)
|
||||||
.map_err(|_| anyhow::anyhow!("bad certificates/private key"))?;
|
.map_err(|_| anyhow::anyhow!("bad certificates/private key"))?;
|
||||||
|
|
||||||
config.key_log = Arc::new(rustls::KeyLogFile::new());
|
config.key_log = Arc::new(rustls::KeyLogFile::new());
|
||||||
@@ -604,7 +613,7 @@ impl Opt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if self.ssl_tickets {
|
if self.ssl_tickets {
|
||||||
config.ticketer = rustls::Ticketer::new().unwrap();
|
config.ticketer = rustls::crypto::ring::Ticketer::new().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Some(config))
|
Ok(Some(config))
|
||||||
@@ -617,6 +626,7 @@ impl Opt {
|
|||||||
InstanceTogglableFeatures {
|
InstanceTogglableFeatures {
|
||||||
metrics: self.experimental_enable_metrics,
|
metrics: self.experimental_enable_metrics,
|
||||||
logs_route: self.experimental_enable_logs_route,
|
logs_route: self.experimental_enable_logs_route,
|
||||||
|
contains_filter: self.experimental_contains_filter,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -674,7 +684,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||||
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||||
thread_pool: Some(thread_pool),
|
thread_pool: Some(thread_pool),
|
||||||
max_positions_per_attributes: None,
|
max_positions_per_attributes: None,
|
||||||
skip_index_budget: other.skip_index_budget,
|
skip_index_budget: other.skip_index_budget,
|
||||||
@@ -688,23 +698,25 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
pub struct MaxMemory(Option<Byte>);
|
pub struct MaxMemory(Option<Byte>);
|
||||||
|
|
||||||
impl FromStr for MaxMemory {
|
impl FromStr for MaxMemory {
|
||||||
type Err = ByteError;
|
type Err = ParseError;
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
|
fn from_str(s: &str) -> Result<MaxMemory, Self::Err> {
|
||||||
Byte::from_str(s).map(Some).map(MaxMemory)
|
Byte::from_str(s).map(Some).map(MaxMemory)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for MaxMemory {
|
impl Default for MaxMemory {
|
||||||
fn default() -> MaxMemory {
|
fn default() -> MaxMemory {
|
||||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes))
|
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for MaxMemory {
|
impl fmt::Display for MaxMemory {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self.0 {
|
match self.0 {
|
||||||
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
|
Some(memory) => {
|
||||||
|
write!(f, "{}", memory.get_appropriate_unit(UnitType::Binary))
|
||||||
|
}
|
||||||
None => f.write_str("unknown"),
|
None => f.write_str("unknown"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -767,21 +779,26 @@ impl Deref for MaxThreads {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_certs(filename: PathBuf) -> anyhow::Result<Vec<rustls::Certificate>> {
|
fn load_certs(
|
||||||
|
filename: PathBuf,
|
||||||
|
) -> anyhow::Result<Vec<rustls::pki_types::CertificateDer<'static>>> {
|
||||||
let certfile =
|
let certfile =
|
||||||
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;
|
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;
|
||||||
let mut reader = BufReader::new(certfile);
|
let mut reader = BufReader::new(certfile);
|
||||||
certs(&mut reader)
|
certs(&mut reader)
|
||||||
.map(|certs| certs.into_iter().map(rustls::Certificate).collect())
|
.collect::<Result<Vec<_>, _>>()
|
||||||
.map_err(|_| anyhow::anyhow!("cannot read certificate file"))
|
.map_err(|_| anyhow::anyhow!("cannot read certificate file"))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
|
fn load_private_key(
|
||||||
|
filename: PathBuf,
|
||||||
|
) -> anyhow::Result<rustls::pki_types::PrivateKeyDer<'static>> {
|
||||||
let rsa_keys = {
|
let rsa_keys = {
|
||||||
let keyfile = fs::File::open(filename.clone())
|
let keyfile = fs::File::open(filename.clone())
|
||||||
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
|
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
|
||||||
let mut reader = BufReader::new(keyfile);
|
let mut reader = BufReader::new(keyfile);
|
||||||
rsa_private_keys(&mut reader)
|
rsa_private_keys(&mut reader)
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
.map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))?
|
.map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))?
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -789,19 +806,21 @@ fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
|
|||||||
let keyfile = fs::File::open(filename)
|
let keyfile = fs::File::open(filename)
|
||||||
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
|
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
|
||||||
let mut reader = BufReader::new(keyfile);
|
let mut reader = BufReader::new(keyfile);
|
||||||
pkcs8_private_keys(&mut reader).map_err(|_| {
|
rustls_pemfile::pkcs8_private_keys(&mut reader).collect::<Result<Vec<_>, _>>().map_err(
|
||||||
anyhow::anyhow!(
|
|_| {
|
||||||
"file contains invalid pkcs8 private key (encrypted keys not supported)"
|
anyhow::anyhow!(
|
||||||
)
|
"file contains invalid pkcs8 private key (encrypted keys not supported)"
|
||||||
})?
|
)
|
||||||
|
},
|
||||||
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
// prefer to load pkcs8 keys
|
// prefer to load pkcs8 keys
|
||||||
if !pkcs8_keys.is_empty() {
|
if !pkcs8_keys.is_empty() {
|
||||||
Ok(rustls::PrivateKey(pkcs8_keys[0].clone()))
|
Ok(rustls::pki_types::PrivateKeyDer::Pkcs8(pkcs8_keys[0].clone_key()))
|
||||||
} else {
|
} else {
|
||||||
assert!(!rsa_keys.is_empty());
|
assert!(!rsa_keys.is_empty());
|
||||||
Ok(rustls::PrivateKey(rsa_keys[0].clone()))
|
Ok(rustls::pki_types::PrivateKeyDer::Pkcs1(rsa_keys[0].clone_key()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -844,11 +863,11 @@ fn default_env() -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn default_max_index_size() -> Byte {
|
fn default_max_index_size() -> Byte {
|
||||||
Byte::from_bytes(INDEX_SIZE)
|
Byte::from_u64(INDEX_SIZE)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_max_task_db_size() -> Byte {
|
fn default_max_task_db_size() -> Byte {
|
||||||
Byte::from_bytes(TASK_DB_SIZE)
|
Byte::from_u64(TASK_DB_SIZE)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_http_payload_size_limit() -> Byte {
|
fn default_http_payload_size_limit() -> Byte {
|
||||||
|
|||||||
@@ -47,6 +47,10 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub metrics: Option<bool>,
|
pub metrics: Option<bool>,
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub logs_route: Option<bool>,
|
pub logs_route: Option<bool>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub edit_documents_by_function: Option<bool>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub contains_filter: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn patch_features(
|
async fn patch_features(
|
||||||
@@ -66,13 +70,23 @@ async fn patch_features(
|
|||||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||||
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
||||||
|
edit_documents_by_function: new_features
|
||||||
|
.0
|
||||||
|
.edit_documents_by_function
|
||||||
|
.unwrap_or(old_features.edit_documents_by_function),
|
||||||
|
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
|
||||||
};
|
};
|
||||||
|
|
||||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||||
// the it renames to camelCase, which we don't want for analytics.
|
// the it renames to camelCase, which we don't want for analytics.
|
||||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||||
let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
|
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||||
new_features;
|
vector_store,
|
||||||
|
metrics,
|
||||||
|
logs_route,
|
||||||
|
edit_documents_by_function,
|
||||||
|
contains_filter,
|
||||||
|
} = new_features;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
"Experimental features Updated".to_string(),
|
"Experimental features Updated".to_string(),
|
||||||
@@ -80,6 +94,8 @@ async fn patch_features(
|
|||||||
"vector_store": vector_store,
|
"vector_store": vector_store,
|
||||||
"metrics": metrics,
|
"metrics": metrics,
|
||||||
"logs_route": logs_route,
|
"logs_route": logs_route,
|
||||||
|
"edit_documents_by_function": edit_documents_by_function,
|
||||||
|
"contains_filter": contains_filter,
|
||||||
}),
|
}),
|
||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use bstr::ByteSlice as _;
|
|||||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use index_scheduler::{IndexScheduler, TaskId};
|
use index_scheduler::{IndexScheduler, RoFeatures, TaskId};
|
||||||
use meilisearch_types::deserr::query_params::Param;
|
use meilisearch_types::deserr::query_params::Param;
|
||||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||||
@@ -16,6 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
|||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||||
|
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||||
use meilisearch_types::milli::DocumentId;
|
use meilisearch_types::milli::DocumentId;
|
||||||
use meilisearch_types::star_or::OptionStarOrList;
|
use meilisearch_types::star_or::OptionStarOrList;
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
@@ -39,7 +40,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
|||||||
use crate::routes::{
|
use crate::routes::{
|
||||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||||
};
|
};
|
||||||
use crate::search::parse_filter;
|
use crate::search::{parse_filter, RetrieveVectors};
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||||
@@ -81,6 +82,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
|
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
|
||||||
)
|
)
|
||||||
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
|
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
|
||||||
|
.service(web::resource("/edit").route(web::post().to(SeqHandler(edit_documents_by_function))))
|
||||||
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
|
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
|
||||||
.service(
|
.service(
|
||||||
web::resource("/{document_id}")
|
web::resource("/{document_id}")
|
||||||
@@ -94,6 +96,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
pub struct GetDocument {
|
pub struct GetDocument {
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
||||||
fields: OptionStarOrList<String>,
|
fields: OptionStarOrList<String>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
|
||||||
|
retrieve_vectors: Param<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_document(
|
pub async fn get_document(
|
||||||
@@ -107,13 +111,20 @@ pub async fn get_document(
|
|||||||
debug!(parameters = ?params, "Get document");
|
debug!(parameters = ?params, "Get document");
|
||||||
let index_uid = IndexUid::try_from(index_uid)?;
|
let index_uid = IndexUid::try_from(index_uid)?;
|
||||||
|
|
||||||
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
|
let GetDocument { fields, retrieve_vectors: param_retrieve_vectors } = params.into_inner();
|
||||||
|
|
||||||
let GetDocument { fields } = params.into_inner();
|
|
||||||
let attributes_to_retrieve = fields.merge_star_and_none();
|
let attributes_to_retrieve = fields.merge_star_and_none();
|
||||||
|
|
||||||
|
let features = index_scheduler.features();
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
|
||||||
|
|
||||||
|
analytics.get_fetch_documents(
|
||||||
|
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
|
||||||
|
&req,
|
||||||
|
);
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
|
let document =
|
||||||
|
retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors)?;
|
||||||
debug!(returns = ?document, "Get document");
|
debug!(returns = ?document, "Get document");
|
||||||
Ok(HttpResponse::Ok().json(document))
|
Ok(HttpResponse::Ok().json(document))
|
||||||
}
|
}
|
||||||
@@ -153,6 +164,8 @@ pub struct BrowseQueryGet {
|
|||||||
limit: Param<usize>,
|
limit: Param<usize>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
||||||
fields: OptionStarOrList<String>,
|
fields: OptionStarOrList<String>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
|
||||||
|
retrieve_vectors: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
||||||
filter: Option<String>,
|
filter: Option<String>,
|
||||||
}
|
}
|
||||||
@@ -166,6 +179,8 @@ pub struct BrowseQuery {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
|
||||||
fields: Option<Vec<String>>,
|
fields: Option<Vec<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
|
||||||
|
retrieve_vectors: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||||
filter: Option<Value>,
|
filter: Option<Value>,
|
||||||
}
|
}
|
||||||
@@ -185,6 +200,7 @@ pub async fn documents_by_query_post(
|
|||||||
with_filter: body.filter.is_some(),
|
with_filter: body.filter.is_some(),
|
||||||
limit: body.limit,
|
limit: body.limit,
|
||||||
offset: body.offset,
|
offset: body.offset,
|
||||||
|
retrieve_vectors: body.retrieve_vectors,
|
||||||
},
|
},
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
@@ -201,7 +217,7 @@ pub async fn get_documents(
|
|||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?params, "Get documents GET");
|
debug!(parameters = ?params, "Get documents GET");
|
||||||
|
|
||||||
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
|
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
|
||||||
|
|
||||||
let filter = match filter {
|
let filter = match filter {
|
||||||
Some(f) => match serde_json::from_str(&f) {
|
Some(f) => match serde_json::from_str(&f) {
|
||||||
@@ -215,6 +231,7 @@ pub async fn get_documents(
|
|||||||
offset: offset.0,
|
offset: offset.0,
|
||||||
limit: limit.0,
|
limit: limit.0,
|
||||||
fields: fields.merge_star_and_none(),
|
fields: fields.merge_star_and_none(),
|
||||||
|
retrieve_vectors: retrieve_vectors.0,
|
||||||
filter,
|
filter,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -223,6 +240,7 @@ pub async fn get_documents(
|
|||||||
with_filter: query.filter.is_some(),
|
with_filter: query.filter.is_some(),
|
||||||
limit: query.limit,
|
limit: query.limit,
|
||||||
offset: query.offset,
|
offset: query.offset,
|
||||||
|
retrieve_vectors: query.retrieve_vectors,
|
||||||
},
|
},
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
@@ -236,10 +254,21 @@ fn documents_by_query(
|
|||||||
query: BrowseQuery,
|
query: BrowseQuery,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let BrowseQuery { offset, limit, fields, filter } = query;
|
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
|
||||||
|
|
||||||
|
let features = index_scheduler.features();
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
|
let (total, documents) = retrieve_documents(
|
||||||
|
&index,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
filter,
|
||||||
|
fields,
|
||||||
|
retrieve_vectors,
|
||||||
|
index_scheduler.features(),
|
||||||
|
)?;
|
||||||
|
|
||||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||||
|
|
||||||
@@ -283,7 +312,11 @@ pub async fn replace_documents(
|
|||||||
debug!(parameters = ?params, "Replace documents");
|
debug!(parameters = ?params, "Replace documents");
|
||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
|
|
||||||
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
analytics.add_documents(
|
||||||
|
¶ms,
|
||||||
|
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
||||||
|
&req,
|
||||||
|
);
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
let uid = get_task_id(&req, &opt)?;
|
let uid = get_task_id(&req, &opt)?;
|
||||||
@@ -320,7 +353,11 @@ pub async fn update_documents(
|
|||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
debug!(parameters = ?params, "Update documents");
|
debug!(parameters = ?params, "Update documents");
|
||||||
|
|
||||||
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
analytics.add_documents(
|
||||||
|
¶ms,
|
||||||
|
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
||||||
|
&req,
|
||||||
|
);
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
let uid = get_task_id(&req, &opt)?;
|
let uid = get_task_id(&req, &opt)?;
|
||||||
@@ -535,11 +572,9 @@ pub async fn delete_documents_by_filter(
|
|||||||
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
|
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
|
||||||
|
|
||||||
// we ensure the filter is well formed before enqueuing it
|
// we ensure the filter is well formed before enqueuing it
|
||||||
|| -> Result<_, ResponseError> {
|
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
|
||||||
Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
|
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||||
}()
|
|
||||||
// and whatever was the error, the error code should always be an InvalidDocumentFilter
|
|
||||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
|
||||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||||
|
|
||||||
let uid = get_task_id(&req, &opt)?;
|
let uid = get_task_id(&req, &opt)?;
|
||||||
@@ -553,6 +588,83 @@ pub async fn delete_documents_by_filter(
|
|||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct DocumentEditionByFunction {
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||||
|
pub filter: Option<Value>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentEditionContext>)]
|
||||||
|
pub context: Option<Value>,
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidDocumentEditionFunctionFilter>, missing_field_error = DeserrJsonError::missing_document_edition_function)]
|
||||||
|
pub function: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn edit_documents_by_function(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
||||||
|
index_uid: web::Path<String>,
|
||||||
|
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||||
|
req: HttpRequest,
|
||||||
|
opt: web::Data<Opt>,
|
||||||
|
analytics: web::Data<dyn Analytics>,
|
||||||
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
debug!(parameters = ?params, "Edit documents by function");
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.features()
|
||||||
|
.check_edit_documents_by_function("Using the documents edit route")?;
|
||||||
|
|
||||||
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
let index_uid = index_uid.into_inner();
|
||||||
|
let params = params.into_inner();
|
||||||
|
|
||||||
|
analytics.update_documents_by_function(
|
||||||
|
¶ms,
|
||||||
|
index_scheduler.index(&index_uid).is_err(),
|
||||||
|
&req,
|
||||||
|
);
|
||||||
|
|
||||||
|
let DocumentEditionByFunction { filter, context, function } = params;
|
||||||
|
let engine = milli::rhai::Engine::new();
|
||||||
|
if let Err(e) = engine.compile(&function) {
|
||||||
|
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref filter) = filter {
|
||||||
|
// we ensure the filter is well formed before enqueuing it
|
||||||
|
crate::search::parse_filter(
|
||||||
|
filter,
|
||||||
|
Code::InvalidDocumentFilter,
|
||||||
|
index_scheduler.features(),
|
||||||
|
)?
|
||||||
|
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||||
|
}
|
||||||
|
let task = KindWithContent::DocumentEdition {
|
||||||
|
index_uid,
|
||||||
|
filter_expr: filter,
|
||||||
|
context: match context {
|
||||||
|
Some(Value::Object(m)) => Some(m),
|
||||||
|
None => None,
|
||||||
|
_ => {
|
||||||
|
return Err(ResponseError::from_msg(
|
||||||
|
"The context must be an object".to_string(),
|
||||||
|
Code::InvalidDocumentEditionContext,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
function,
|
||||||
|
};
|
||||||
|
|
||||||
|
let uid = get_task_id(&req, &opt)?;
|
||||||
|
let dry_run = is_dry_run(&req, &opt)?;
|
||||||
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
|
debug!(returns = ?task, "Edit documents by function");
|
||||||
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn clear_all_documents(
|
pub async fn clear_all_documents(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
@@ -579,13 +691,46 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
|
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||||
|
|
||||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||||
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
|
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||||
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
|
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||||
|
match retrieve_vectors {
|
||||||
|
RetrieveVectors::Ignore => {}
|
||||||
|
RetrieveVectors::Hide => {
|
||||||
|
document.remove("_vectors");
|
||||||
|
}
|
||||||
|
RetrieveVectors::Retrieve => {
|
||||||
|
// Clippy is simply wrong
|
||||||
|
#[allow(clippy::manual_unwrap_or_default)]
|
||||||
|
let mut vectors = match document.remove("_vectors") {
|
||||||
|
Some(Value::Object(map)) => map,
|
||||||
|
_ => Default::default(),
|
||||||
|
};
|
||||||
|
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||||
|
let user_provided = embedding_configs
|
||||||
|
.iter()
|
||||||
|
.find(|conf| conf.name == name)
|
||||||
|
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||||
|
let embeddings = ExplicitVectors {
|
||||||
|
embeddings: Some(vector.into()),
|
||||||
|
regenerate: !user_provided,
|
||||||
|
};
|
||||||
|
vectors.insert(
|
||||||
|
name,
|
||||||
|
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
document.insert("_vectors".into(), vectors.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(document)
|
||||||
})
|
})
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@@ -596,12 +741,13 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
filter: Option<Value>,
|
filter: Option<Value>,
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
attributes_to_retrieve: Option<Vec<S>>,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
|
features: RoFeatures,
|
||||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let filter = &filter;
|
let filter = &filter;
|
||||||
let filter = if let Some(filter) = filter {
|
let filter = if let Some(filter) = filter {
|
||||||
parse_filter(filter)
|
parse_filter(filter, Code::InvalidDocumentFilter, features)?
|
||||||
.map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
@@ -620,53 +766,57 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||||||
let (it, number_of_documents) = {
|
let (it, number_of_documents) = {
|
||||||
let number_of_documents = candidates.len();
|
let number_of_documents = candidates.len();
|
||||||
(
|
(
|
||||||
some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
|
some_documents(
|
||||||
|
index,
|
||||||
|
&rtxn,
|
||||||
|
candidates.into_iter().skip(offset).take(limit),
|
||||||
|
retrieve_vectors,
|
||||||
|
)?,
|
||||||
number_of_documents,
|
number_of_documents,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
let documents: Result<Vec<_>, ResponseError> = it
|
let documents: Vec<_> = it
|
||||||
.map(|document| {
|
.map(|document| {
|
||||||
Ok(match &attributes_to_retrieve {
|
Ok(match &attributes_to_retrieve {
|
||||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||||
&document?,
|
&document?,
|
||||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
attributes_to_retrieve.iter().map(|s| s.as_ref()).chain(
|
||||||
|
(retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"),
|
||||||
|
),
|
||||||
),
|
),
|
||||||
None => document?,
|
None => document?,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.collect();
|
.collect::<Result<_, ResponseError>>()?;
|
||||||
|
|
||||||
Ok((number_of_documents, documents?))
|
Ok((number_of_documents, documents))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn retrieve_document<S: AsRef<str>>(
|
fn retrieve_document<S: AsRef<str>>(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
doc_id: &str,
|
doc_id: &str,
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
attributes_to_retrieve: Option<Vec<S>>,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<Document, ResponseError> {
|
) -> Result<Document, ResponseError> {
|
||||||
let txn = index.read_txn()?;
|
let txn = index.read_txn()?;
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&txn)?;
|
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
|
||||||
|
|
||||||
let internal_id = index
|
let internal_id = index
|
||||||
.external_documents_ids()
|
.external_documents_ids()
|
||||||
.get(&txn, doc_id)?
|
.get(&txn, doc_id)?
|
||||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||||
|
|
||||||
let document = index
|
let document = some_documents(index, &txn, Some(internal_id), retrieve_vectors)?
|
||||||
.documents(&txn, std::iter::once(internal_id))?
|
|
||||||
.into_iter()
|
|
||||||
.next()
|
.next()
|
||||||
.map(|(_, d)| d)
|
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))??;
|
||||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
|
||||||
|
|
||||||
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
|
||||||
let document = match &attributes_to_retrieve {
|
let document = match &attributes_to_retrieve {
|
||||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||||
&document,
|
&document,
|
||||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
attributes_to_retrieve
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.as_ref())
|
||||||
|
.chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")),
|
||||||
),
|
),
|
||||||
None => document,
|
None => document,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -79,7 +79,14 @@ pub async fn search(
|
|||||||
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let _permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_facet_search(&index, search_query, facet_query, facet_name, search_kind)
|
perform_facet_search(
|
||||||
|
&index,
|
||||||
|
search_query,
|
||||||
|
facet_query,
|
||||||
|
facet_name,
|
||||||
|
search_kind,
|
||||||
|
index_scheduler.features(),
|
||||||
|
)
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -115,6 +122,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||||||
page: None,
|
page: None,
|
||||||
hits_per_page: None,
|
hits_per_page: None,
|
||||||
attributes_to_retrieve: None,
|
attributes_to_retrieve: None,
|
||||||
|
retrieve_vectors: false,
|
||||||
attributes_to_crop: None,
|
attributes_to_crop: None,
|
||||||
crop_length: DEFAULT_CROP_LENGTH(),
|
crop_length: DEFAULT_CROP_LENGTH(),
|
||||||
attributes_to_highlight: None,
|
attributes_to_highlight: None,
|
||||||
@@ -123,6 +131,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||||||
show_ranking_score_details: false,
|
show_ranking_score_details: false,
|
||||||
filter,
|
filter,
|
||||||
sort: None,
|
sort: None,
|
||||||
|
distinct: None,
|
||||||
facets: None,
|
facets: None,
|
||||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||||
|
|||||||
@@ -20,9 +20,9 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
|||||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||||
SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||||
DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@@ -51,6 +51,8 @@ pub struct SearchQueryGet {
|
|||||||
hits_per_page: Option<Param<usize>>,
|
hits_per_page: Option<Param<usize>>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
||||||
attributes_to_retrieve: Option<CS<String>>,
|
attributes_to_retrieve: Option<CS<String>>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
|
||||||
|
retrieve_vectors: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
||||||
attributes_to_crop: Option<CS<String>>,
|
attributes_to_crop: Option<CS<String>>,
|
||||||
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
||||||
@@ -61,6 +63,8 @@ pub struct SearchQueryGet {
|
|||||||
filter: Option<String>,
|
filter: Option<String>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
||||||
sort: Option<String>,
|
sort: Option<String>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
|
||||||
|
distinct: Option<String>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||||
show_matches_position: Param<bool>,
|
show_matches_position: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
||||||
@@ -153,11 +157,13 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
page: other.page.as_deref().copied(),
|
page: other.page.as_deref().copied(),
|
||||||
hits_per_page: other.hits_per_page.as_deref().copied(),
|
hits_per_page: other.hits_per_page.as_deref().copied(),
|
||||||
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||||
|
retrieve_vectors: other.retrieve_vectors.0,
|
||||||
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
||||||
crop_length: other.crop_length.0,
|
crop_length: other.crop_length.0,
|
||||||
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
||||||
filter,
|
filter,
|
||||||
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||||
|
distinct: other.distinct,
|
||||||
show_matches_position: other.show_matches_position.0,
|
show_matches_position: other.show_matches_position.0,
|
||||||
show_ranking_score: other.show_ranking_score.0,
|
show_ranking_score: other.show_ranking_score.0,
|
||||||
show_ranking_score_details: other.show_ranking_score_details.0,
|
show_ranking_score_details: other.show_ranking_score_details.0,
|
||||||
@@ -222,10 +228,12 @@ pub async fn search_with_url_query(
|
|||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||||
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let _permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result =
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
|
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
}
|
}
|
||||||
@@ -262,10 +270,13 @@ pub async fn search_with_post(
|
|||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
|
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let _permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result =
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
|
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
if search_result.degraded {
|
if search_result.degraded {
|
||||||
@@ -287,11 +298,10 @@ pub fn search_kind(
|
|||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
) -> Result<SearchKind, ResponseError> {
|
) -> Result<SearchKind, ResponseError> {
|
||||||
if query.vector.is_some() {
|
if query.vector.is_some() {
|
||||||
features.check_vector("Passing `vector` as a query parameter")?;
|
features.check_vector("Passing `vector` as a parameter")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if query.hybrid.is_some() {
|
if query.hybrid.is_some() {
|
||||||
features.check_vector("Passing `hybrid` as a query parameter")?;
|
features.check_vector("Passing `hybrid` as a parameter")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
||||||
|
|||||||
@@ -4,11 +4,7 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
|||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_types::deserr::query_params::Param;
|
use meilisearch_types::deserr::query_params::Param;
|
||||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||||
use meilisearch_types::error::deserr_codes::{
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
|
|
||||||
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarRankingScoreThreshold,
|
|
||||||
InvalidSimilarShowRankingScore, InvalidSimilarShowRankingScoreDetails,
|
|
||||||
};
|
|
||||||
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::keys::actions;
|
use meilisearch_types::keys::actions;
|
||||||
@@ -21,8 +17,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
|
|||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, SearchKind, SimilarQuery,
|
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
||||||
SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
@@ -97,6 +93,8 @@ async fn similar(
|
|||||||
|
|
||||||
features.check_vector("Using the similar API")?;
|
features.check_vector("Using the similar API")?;
|
||||||
|
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
add_search_rules(&mut query.filter, search_rules);
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
@@ -107,8 +105,17 @@ async fn similar(
|
|||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder) =
|
||||||
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || perform_similar(&index, query, embedder_name, embedder))
|
tokio::task::spawn_blocking(move || {
|
||||||
.await?
|
perform_similar(
|
||||||
|
&index,
|
||||||
|
query,
|
||||||
|
embedder_name,
|
||||||
|
embedder,
|
||||||
|
retrieve_vectors,
|
||||||
|
index_scheduler.features(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.await?
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, deserr::Deserr)]
|
#[derive(Debug, deserr::Deserr)]
|
||||||
@@ -122,6 +129,8 @@ pub struct SimilarQueryGet {
|
|||||||
limit: Param<usize>,
|
limit: Param<usize>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
|
||||||
attributes_to_retrieve: Option<CS<String>>,
|
attributes_to_retrieve: Option<CS<String>>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)]
|
||||||
|
retrieve_vectors: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
|
||||||
filter: Option<String>,
|
filter: Option<String>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
|
||||||
@@ -156,6 +165,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
|||||||
offset,
|
offset,
|
||||||
limit,
|
limit,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
filter,
|
filter,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
@@ -180,6 +190,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
|||||||
filter,
|
filter,
|
||||||
embedder,
|
embedder,
|
||||||
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||||
|
retrieve_vectors: retrieve_vectors.0,
|
||||||
show_ranking_score: show_ranking_score.0,
|
show_ranking_score: show_ranking_score.0,
|
||||||
show_ranking_score_details: show_ranking_score_details.0,
|
show_ranking_score_details: show_ranking_score_details.0,
|
||||||
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
|
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
|
||||||
|
|||||||
@@ -10,12 +10,14 @@ use serde::Serialize;
|
|||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::{Analytics, MultiSearchAggregator};
|
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||||
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::routes::indexes::search::search_kind;
|
use crate::routes::indexes::search::search_kind;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors,
|
||||||
|
SearchQueryWithIndex, SearchResultWithIndex,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@@ -28,82 +30,44 @@ struct SearchResults {
|
|||||||
results: Vec<SearchResultWithIndex>,
|
results: Vec<SearchResultWithIndex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, deserr::Deserr)]
|
|
||||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
|
||||||
pub struct SearchQueries {
|
|
||||||
queries: Vec<SearchQueryWithIndex>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn multi_search_with_post(
|
pub async fn multi_search_with_post(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
params: AwebJson<SearchQueries, DeserrJsonError>,
|
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<dyn Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let queries = params.into_inner().queries;
|
|
||||||
|
|
||||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
|
||||||
let features = index_scheduler.features();
|
|
||||||
|
|
||||||
// Since we don't want to process half of the search requests and then get a permit refused
|
// Since we don't want to process half of the search requests and then get a permit refused
|
||||||
// we're going to get one permit for the whole duration of the multi-search request.
|
// we're going to get one permit for the whole duration of the multi-search request.
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let _permit = search_queue.try_get_search_permit().await?;
|
||||||
|
|
||||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
let federated_search = params.into_inner();
|
||||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
|
||||||
// changes.
|
|
||||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
|
||||||
let mut search_results = Vec::with_capacity(queries.len());
|
|
||||||
for (query_index, (index_uid, mut query)) in
|
|
||||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
|
||||||
{
|
|
||||||
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
|
||||||
|
|
||||||
|
let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
|
||||||
|
|
||||||
|
let FederatedSearch { mut queries, federation } = federated_search;
|
||||||
|
|
||||||
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
|
// regardless of federation, check authorization and apply search rules
|
||||||
|
let auth = 'check_authorization: {
|
||||||
|
for (query_index, federated_query) in queries.iter_mut().enumerate() {
|
||||||
|
let index_uid = federated_query.index_uid.as_str();
|
||||||
// Check index from API key
|
// Check index from API key
|
||||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
if !index_scheduler.filters().is_index_authorized(index_uid) {
|
||||||
return Err(AuthenticationError::InvalidToken).with_index(query_index);
|
break 'check_authorization Err(AuthenticationError::InvalidToken)
|
||||||
|
.with_index(query_index);
|
||||||
}
|
}
|
||||||
// Apply search rules from tenant token
|
// Apply search rules from tenant token
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid)
|
||||||
{
|
{
|
||||||
add_search_rules(&mut query.filter, search_rules);
|
add_search_rules(&mut federated_query.filter, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let index = index_scheduler
|
|
||||||
.index(&index_uid)
|
|
||||||
.map_err(|err| {
|
|
||||||
let mut err = ResponseError::from(err);
|
|
||||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
|
||||||
// here the resource not found is not part of the URL.
|
|
||||||
err.code = StatusCode::BAD_REQUEST;
|
|
||||||
err
|
|
||||||
})
|
|
||||||
.with_index(query_index)?;
|
|
||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
|
|
||||||
.with_index(query_index)?;
|
|
||||||
|
|
||||||
let search_result =
|
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind))
|
|
||||||
.await
|
|
||||||
.with_index(query_index)?;
|
|
||||||
|
|
||||||
search_results.push(SearchResultWithIndex {
|
|
||||||
index_uid: index_uid.into_inner(),
|
|
||||||
result: search_result.with_index(query_index)?,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
Ok(search_results)
|
Ok(())
|
||||||
}
|
};
|
||||||
.await;
|
|
||||||
|
|
||||||
if search_results.is_ok() {
|
auth.map_err(|(mut err, query_index)| {
|
||||||
multi_aggregate.succeed();
|
|
||||||
}
|
|
||||||
analytics.post_multi_search(multi_aggregate);
|
|
||||||
|
|
||||||
let search_results = search_results.map_err(|(mut err, query_index)| {
|
|
||||||
// Add the query index that failed as context for the error message.
|
// Add the query index that failed as context for the error message.
|
||||||
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
|
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
|
||||||
// of result and we can benefit from static typing.
|
// of result and we can benefit from static typing.
|
||||||
@@ -111,9 +75,95 @@ pub async fn multi_search_with_post(
|
|||||||
err
|
err
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
debug!(returns = ?search_results, "Multi-search");
|
let response = match federation {
|
||||||
|
Some(federation) => {
|
||||||
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
|
perform_federated_search(&index_scheduler, queries, federation, features)
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
|
if let Ok(Ok(_)) = search_result {
|
||||||
|
multi_aggregate.succeed();
|
||||||
|
}
|
||||||
|
|
||||||
|
analytics.post_multi_search(multi_aggregate);
|
||||||
|
HttpResponse::Ok().json(search_result??)
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||||
|
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||||
|
// changes.
|
||||||
|
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||||
|
let mut search_results = Vec::with_capacity(queries.len());
|
||||||
|
for (query_index, (index_uid, query, federation_options)) in queries
|
||||||
|
.into_iter()
|
||||||
|
.map(SearchQueryWithIndex::into_index_query_federation)
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
||||||
|
|
||||||
|
if federation_options.is_some() {
|
||||||
|
return Err((
|
||||||
|
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(
|
||||||
|
query_index,
|
||||||
|
)
|
||||||
|
.into(),
|
||||||
|
query_index,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = index_scheduler
|
||||||
|
.index(&index_uid)
|
||||||
|
.map_err(|err| {
|
||||||
|
let mut err = ResponseError::from(err);
|
||||||
|
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||||
|
// here the resource not found is not part of the URL.
|
||||||
|
err.code = StatusCode::BAD_REQUEST;
|
||||||
|
err
|
||||||
|
})
|
||||||
|
.with_index(query_index)?;
|
||||||
|
|
||||||
|
let search_kind =
|
||||||
|
search_kind(&query, index_scheduler.get_ref(), &index, features)
|
||||||
|
.with_index(query_index)?;
|
||||||
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
|
||||||
|
.with_index(query_index)?;
|
||||||
|
|
||||||
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
|
perform_search(&index, query, search_kind, retrieve_vector, features)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.with_index(query_index)?;
|
||||||
|
|
||||||
|
search_results.push(SearchResultWithIndex {
|
||||||
|
index_uid: index_uid.into_inner(),
|
||||||
|
result: search_result.with_index(query_index)?,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(search_results)
|
||||||
|
}
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if search_results.is_ok() {
|
||||||
|
multi_aggregate.succeed();
|
||||||
|
}
|
||||||
|
analytics.post_multi_search(multi_aggregate);
|
||||||
|
|
||||||
|
let search_results = search_results.map_err(|(mut err, query_index)| {
|
||||||
|
// Add the query index that failed as context for the error message.
|
||||||
|
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
|
||||||
|
// of result and we can benefit from static typing.
|
||||||
|
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
|
||||||
|
err
|
||||||
|
})?;
|
||||||
|
|
||||||
|
debug!(returns = ?search_results, "Multi-search");
|
||||||
|
|
||||||
|
HttpResponse::Ok().json(SearchResults { results: search_results })
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
||||||
|
|||||||
@@ -591,7 +591,7 @@ mod tests {
|
|||||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||||
snapshot!(meili_snap::json_string!(err), @r###"
|
snapshot!(meili_snap::json_string!(err), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||||
"code": "invalid_task_types",
|
"code": "invalid_task_types",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||||
|
|||||||
629
meilisearch/src/search/federated.rs
Normal file
629
meilisearch/src/search/federated.rs
Normal file
@@ -0,0 +1,629 @@
|
|||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::fmt;
|
||||||
|
use std::iter::Zip;
|
||||||
|
use std::rc::Rc;
|
||||||
|
use std::str::FromStr as _;
|
||||||
|
use std::time::Duration;
|
||||||
|
use std::vec::{IntoIter, Vec};
|
||||||
|
|
||||||
|
use actix_http::StatusCode;
|
||||||
|
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||||
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
|
use meilisearch_types::error::deserr_codes::{
|
||||||
|
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
|
||||||
|
};
|
||||||
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
|
||||||
|
use meilisearch_types::milli::{self, DocumentId, TimeBudget};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use super::ranking_rules::{self, RankingRules};
|
||||||
|
use super::{
|
||||||
|
prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
|
||||||
|
SearchQuery, SearchQueryWithIndex,
|
||||||
|
};
|
||||||
|
use crate::error::MeilisearchHttpError;
|
||||||
|
use crate::routes::indexes::search::search_kind;
|
||||||
|
|
||||||
|
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct FederationOptions {
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
|
||||||
|
pub weight: Weight,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
|
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
|
||||||
|
pub struct Weight(f64);
|
||||||
|
|
||||||
|
impl Default for Weight {
|
||||||
|
fn default() -> Self {
|
||||||
|
Weight(DEFAULT_FEDERATED_WEIGHT)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::convert::TryFrom<f64> for Weight {
|
||||||
|
type Error = InvalidMultiSearchWeight;
|
||||||
|
|
||||||
|
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||||
|
if f < 0.0 {
|
||||||
|
Err(InvalidMultiSearchWeight)
|
||||||
|
} else {
|
||||||
|
Ok(Weight(f))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Deref for Weight {
|
||||||
|
type Target = f64;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct Federation {
|
||||||
|
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||||
|
pub limit: usize,
|
||||||
|
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||||
|
pub offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct FederatedSearch {
|
||||||
|
pub queries: Vec<SearchQueryWithIndex>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub federation: Option<Federation>,
|
||||||
|
}
|
||||||
|
#[derive(Serialize, Clone, PartialEq)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct FederatedSearchResult {
|
||||||
|
pub hits: Vec<SearchHit>,
|
||||||
|
pub processing_time_ms: u128,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub hits_info: HitsInfo,
|
||||||
|
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub semantic_hit_count: Option<u32>,
|
||||||
|
|
||||||
|
// These fields are only used for analytics purposes
|
||||||
|
#[serde(skip)]
|
||||||
|
pub degraded: bool,
|
||||||
|
#[serde(skip)]
|
||||||
|
pub used_negative_operator: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for FederatedSearchResult {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let FederatedSearchResult {
|
||||||
|
hits,
|
||||||
|
processing_time_ms,
|
||||||
|
hits_info,
|
||||||
|
semantic_hit_count,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
let mut debug = f.debug_struct("SearchResult");
|
||||||
|
// The most important thing when looking at a search result is the time it took to process
|
||||||
|
debug.field("processing_time_ms", &processing_time_ms);
|
||||||
|
debug.field("hits", &format!("[{} hits returned]", hits.len()));
|
||||||
|
debug.field("hits_info", &hits_info);
|
||||||
|
if *used_negative_operator {
|
||||||
|
debug.field("used_negative_operator", used_negative_operator);
|
||||||
|
}
|
||||||
|
if *degraded {
|
||||||
|
debug.field("degraded", degraded);
|
||||||
|
}
|
||||||
|
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||||
|
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
debug.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WeightedScore<'a> {
|
||||||
|
details: &'a [ScoreDetails],
|
||||||
|
weight: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> WeightedScore<'a> {
|
||||||
|
pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
|
||||||
|
Self { details, weight }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn weighted_global_score(&self) -> f64 {
|
||||||
|
ScoreDetails::global_score(self.details.iter()) * self.weight
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
|
||||||
|
self.weighted_global_score()
|
||||||
|
.partial_cmp(&other.weighted_global_score())
|
||||||
|
// both are numbers, possibly infinite
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compare(&self, other: &Self) -> Ordering {
|
||||||
|
let mut left_it = ScoreDetails::score_values(self.details.iter());
|
||||||
|
let mut right_it = ScoreDetails::score_values(other.details.iter());
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let left = left_it.next();
|
||||||
|
let right = right_it.next();
|
||||||
|
|
||||||
|
match (left, right) {
|
||||||
|
(None, None) => return Ordering::Equal,
|
||||||
|
(None, Some(_)) => return Ordering::Less,
|
||||||
|
(Some(_), None) => return Ordering::Greater,
|
||||||
|
(Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
|
||||||
|
let left = left * self.weight;
|
||||||
|
let right = right * other.weight;
|
||||||
|
if (left - right).abs() <= f64::EPSILON {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return left.partial_cmp(&right).unwrap();
|
||||||
|
}
|
||||||
|
(Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
|
||||||
|
match left.partial_cmp(right) {
|
||||||
|
Some(Ordering::Equal) => continue,
|
||||||
|
Some(order) => return order,
|
||||||
|
None => return self.compare_weighted_global_scores(other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
|
||||||
|
match left.partial_cmp(right) {
|
||||||
|
Some(Ordering::Equal) => continue,
|
||||||
|
Some(order) => return order,
|
||||||
|
None => {
|
||||||
|
return self.compare_weighted_global_scores(other);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// not comparable details, use global
|
||||||
|
(Some(ScoreValue::Score(_)), Some(_))
|
||||||
|
| (Some(_), Some(ScoreValue::Score(_)))
|
||||||
|
| (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
|
||||||
|
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
|
||||||
|
let left_count = left_it.count();
|
||||||
|
let right_count = right_it.count();
|
||||||
|
// compare how many remaining groups of rules each side has.
|
||||||
|
// the group with the most remaining groups wins.
|
||||||
|
return left_count
|
||||||
|
.cmp(&right_count)
|
||||||
|
// breaks ties with the global ranking score
|
||||||
|
.then_with(|| self.compare_weighted_global_scores(other));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct QueryByIndex {
|
||||||
|
query: SearchQuery,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByQuery<'a> {
|
||||||
|
documents_ids: Vec<DocumentId>,
|
||||||
|
document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
hit_maker: HitMaker<'a>,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByQueryIter<'a> {
|
||||||
|
it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
hit_maker: Rc<HitMaker<'a>>,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> SearchResultByQueryIter<'a> {
|
||||||
|
fn new(
|
||||||
|
SearchResultByQuery {
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
federation_options,
|
||||||
|
hit_maker,
|
||||||
|
query_index,
|
||||||
|
}: SearchResultByQuery<'a>,
|
||||||
|
) -> Self {
|
||||||
|
let it = documents_ids.into_iter().zip(document_scores);
|
||||||
|
Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByQueryIterItem<'a> {
|
||||||
|
docid: DocumentId,
|
||||||
|
score: Vec<ScoreDetails>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
hit_maker: Rc<HitMaker<'a>>,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_index_local_results(
|
||||||
|
results_by_query: Vec<SearchResultByQuery<'_>>,
|
||||||
|
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
|
||||||
|
itertools::kmerge_by(
|
||||||
|
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|
||||||
|
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
|
||||||
|
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||||
|
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||||
|
|
||||||
|
match left_score.compare(&right_score) {
|
||||||
|
// the biggest score goes first
|
||||||
|
Ordering::Greater => true,
|
||||||
|
// break ties using query index
|
||||||
|
Ordering::Equal => left.query_index < right.query_index,
|
||||||
|
Ordering::Less => false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_index_global_results(
|
||||||
|
results_by_index: Vec<SearchResultByIndex>,
|
||||||
|
) -> impl Iterator<Item = SearchHitByIndex> {
|
||||||
|
itertools::kmerge_by(
|
||||||
|
results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
|
||||||
|
|left: &SearchHitByIndex, right: &SearchHitByIndex| {
|
||||||
|
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||||
|
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||||
|
|
||||||
|
match left_score.compare(&right_score) {
|
||||||
|
// the biggest score goes first
|
||||||
|
Ordering::Greater => true,
|
||||||
|
// break ties using query index
|
||||||
|
Ordering::Equal => left.query_index < right.query_index,
|
||||||
|
Ordering::Less => false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for SearchResultByQueryIter<'a> {
|
||||||
|
type Item = SearchResultByQueryIterItem<'a>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let (docid, score) = self.it.next()?;
|
||||||
|
Some(SearchResultByQueryIterItem {
|
||||||
|
docid,
|
||||||
|
score,
|
||||||
|
federation_options: self.federation_options,
|
||||||
|
hit_maker: Rc::clone(&self.hit_maker),
|
||||||
|
query_index: self.query_index,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchHitByIndex {
|
||||||
|
hit: SearchHit,
|
||||||
|
score: Vec<ScoreDetails>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByIndex {
|
||||||
|
hits: Vec<SearchHitByIndex>,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
degraded: bool,
|
||||||
|
used_negative_operator: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn perform_federated_search(
|
||||||
|
index_scheduler: &IndexScheduler,
|
||||||
|
queries: Vec<SearchQueryWithIndex>,
|
||||||
|
federation: Federation,
|
||||||
|
features: RoFeatures,
|
||||||
|
) -> Result<FederatedSearchResult, ResponseError> {
|
||||||
|
let before_search = std::time::Instant::now();
|
||||||
|
|
||||||
|
// this implementation partition the queries by index to guarantee an important property:
|
||||||
|
// - all the queries to a particular index use the same read transaction.
|
||||||
|
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||||
|
|
||||||
|
// 1. partition queries by index
|
||||||
|
let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
|
||||||
|
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||||
|
if let Some(pagination_field) = federated_query.has_pagination() {
|
||||||
|
return Err(MeilisearchHttpError::PaginationInFederatedQuery(
|
||||||
|
query_index,
|
||||||
|
pagination_field,
|
||||||
|
)
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||||
|
|
||||||
|
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
|
||||||
|
query,
|
||||||
|
federation_options: federation_options.unwrap_or_default(),
|
||||||
|
query_index,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. perform queries, merge and make hits index by index
|
||||||
|
let required_hit_count = federation.limit + federation.offset;
|
||||||
|
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
|
||||||
|
// Then in step (3), we'll update its value if there is any semantic search
|
||||||
|
let mut semantic_hit_count = None;
|
||||||
|
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
|
||||||
|
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
|
||||||
|
|
||||||
|
for (index_uid, queries) in queries_by_index {
|
||||||
|
let index = match index_scheduler.index(&index_uid) {
|
||||||
|
Ok(index) => index,
|
||||||
|
Err(err) => {
|
||||||
|
let mut err = ResponseError::from(err);
|
||||||
|
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||||
|
// here the resource not found is not part of the URL.
|
||||||
|
err.code = StatusCode::BAD_REQUEST;
|
||||||
|
if let Some(query) = queries.first() {
|
||||||
|
err.message =
|
||||||
|
format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
|
||||||
|
}
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Important: this is the only transaction we'll use for this index during this federated search
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
|
let criteria = index.criteria(&rtxn)?;
|
||||||
|
|
||||||
|
// stuff we need for the hitmaker
|
||||||
|
let script_lang_map = index.script_language(&rtxn)?;
|
||||||
|
|
||||||
|
let dictionary = index.dictionary(&rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
let separators = index.allowed_separators(&rtxn)?;
|
||||||
|
let separators: Option<Vec<_>> =
|
||||||
|
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
|
||||||
|
// each query gets its individual cutoff
|
||||||
|
let cutoff = index.search_cutoff(&rtxn)?;
|
||||||
|
|
||||||
|
let mut degraded = false;
|
||||||
|
let mut used_negative_operator = false;
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
|
||||||
|
// 2.1. Compute all candidates for each query in the index
|
||||||
|
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||||
|
|
||||||
|
for QueryByIndex { query, federation_options, query_index } in queries {
|
||||||
|
// use an immediately invoked lambda to capture the result without returning from the function
|
||||||
|
|
||||||
|
let res: Result<(), ResponseError> = (|| {
|
||||||
|
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
|
||||||
|
|
||||||
|
let canonicalization_kind = match (&search_kind, &query.q) {
|
||||||
|
(SearchKind::SemanticOnly { .. }, _) => {
|
||||||
|
ranking_rules::CanonicalizationKind::Vector
|
||||||
|
}
|
||||||
|
(_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
|
||||||
|
_ => ranking_rules::CanonicalizationKind::Placeholder,
|
||||||
|
};
|
||||||
|
|
||||||
|
let sort = if let Some(sort) = &query.sort {
|
||||||
|
let sorts: Vec<_> =
|
||||||
|
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
|
||||||
|
Ok(sorts) => sorts,
|
||||||
|
Err(asc_desc_error) => {
|
||||||
|
return Err(milli::Error::from(milli::SortError::from(
|
||||||
|
asc_desc_error,
|
||||||
|
))
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Some(sorts)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let ranking_rules = ranking_rules::RankingRules::new(
|
||||||
|
criteria.clone(),
|
||||||
|
sort,
|
||||||
|
query.matching_strategy.into(),
|
||||||
|
canonicalization_kind,
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
|
||||||
|
previous_query_data.take()
|
||||||
|
{
|
||||||
|
if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
|
||||||
|
return Err(error.to_response_error(
|
||||||
|
&ranking_rules,
|
||||||
|
&previous_ranking_rules,
|
||||||
|
query_index,
|
||||||
|
previous_query_index,
|
||||||
|
&index_uid,
|
||||||
|
&previous_index_uid,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
previous_query_data = if previous_ranking_rules.constraint_count()
|
||||||
|
> ranking_rules.constraint_count()
|
||||||
|
{
|
||||||
|
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
|
||||||
|
} else {
|
||||||
|
Some((ranking_rules, query_index, index_uid.clone()))
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
match search_kind {
|
||||||
|
SearchKind::KeywordOnly => {}
|
||||||
|
_ => semantic_hit_count = Some(0),
|
||||||
|
}
|
||||||
|
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
|
|
||||||
|
let time_budget = match cutoff {
|
||||||
|
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||||
|
None => TimeBudget::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||||
|
prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||||
|
|
||||||
|
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
|
||||||
|
search.offset(0);
|
||||||
|
search.limit(required_hit_count);
|
||||||
|
|
||||||
|
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
|
||||||
|
let format = AttributesFormat {
|
||||||
|
attributes_to_retrieve: query.attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
|
attributes_to_highlight: query.attributes_to_highlight,
|
||||||
|
attributes_to_crop: query.attributes_to_crop,
|
||||||
|
crop_length: query.crop_length,
|
||||||
|
crop_marker: query.crop_marker,
|
||||||
|
highlight_pre_tag: query.highlight_pre_tag,
|
||||||
|
highlight_post_tag: query.highlight_post_tag,
|
||||||
|
show_matches_position: query.show_matches_position,
|
||||||
|
sort: query.sort,
|
||||||
|
show_ranking_score: query.show_ranking_score,
|
||||||
|
show_ranking_score_details: query.show_ranking_score_details,
|
||||||
|
};
|
||||||
|
|
||||||
|
let milli::SearchResult {
|
||||||
|
matching_words,
|
||||||
|
candidates: query_candidates,
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
degraded: query_degraded,
|
||||||
|
used_negative_operator: query_used_negative_operator,
|
||||||
|
} = result;
|
||||||
|
|
||||||
|
candidates |= query_candidates;
|
||||||
|
degraded |= query_degraded;
|
||||||
|
used_negative_operator |= query_used_negative_operator;
|
||||||
|
|
||||||
|
let tokenizer = HitMaker::tokenizer(
|
||||||
|
&script_lang_map,
|
||||||
|
dictionary.as_deref(),
|
||||||
|
separators.as_deref(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||||
|
|
||||||
|
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
|
||||||
|
|
||||||
|
results_by_query.push(SearchResultByQuery {
|
||||||
|
federation_options,
|
||||||
|
hit_maker,
|
||||||
|
query_index,
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
})();
|
||||||
|
|
||||||
|
if let Err(mut error) = res {
|
||||||
|
error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2.2. merge inside index
|
||||||
|
let mut documents_seen = RoaringBitmap::new();
|
||||||
|
let merged_result: Result<Vec<_>, ResponseError> =
|
||||||
|
merge_index_local_results(results_by_query)
|
||||||
|
// skip documents we've already seen & mark that we saw the current document
|
||||||
|
.filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
|
||||||
|
.take(required_hit_count)
|
||||||
|
// 2.3 make hits
|
||||||
|
.map(
|
||||||
|
|SearchResultByQueryIterItem {
|
||||||
|
docid,
|
||||||
|
score,
|
||||||
|
federation_options,
|
||||||
|
hit_maker,
|
||||||
|
query_index,
|
||||||
|
}| {
|
||||||
|
let mut hit = hit_maker.make_hit(docid, &score)?;
|
||||||
|
let weighted_score =
|
||||||
|
ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
|
||||||
|
|
||||||
|
let _federation = serde_json::json!(
|
||||||
|
{
|
||||||
|
"indexUid": index_uid,
|
||||||
|
"queriesPosition": query_index,
|
||||||
|
"weightedRankingScore": weighted_score,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
hit.document.insert("_federation".to_string(), _federation);
|
||||||
|
Ok(SearchHitByIndex { hit, score, federation_options, query_index })
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let merged_result = merged_result?;
|
||||||
|
results_by_index.push(SearchResultByIndex {
|
||||||
|
hits: merged_result,
|
||||||
|
candidates,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. merge hits and metadata across indexes
|
||||||
|
// 3.1 merge metadata
|
||||||
|
let (estimated_total_hits, degraded, used_negative_operator) = {
|
||||||
|
let mut estimated_total_hits = 0;
|
||||||
|
let mut degraded = false;
|
||||||
|
let mut used_negative_operator = false;
|
||||||
|
|
||||||
|
for SearchResultByIndex {
|
||||||
|
hits: _,
|
||||||
|
candidates,
|
||||||
|
degraded: degraded_by_index,
|
||||||
|
used_negative_operator: used_negative_operator_by_index,
|
||||||
|
} in &results_by_index
|
||||||
|
{
|
||||||
|
estimated_total_hits += candidates.len() as usize;
|
||||||
|
degraded |= *degraded_by_index;
|
||||||
|
used_negative_operator |= *used_negative_operator_by_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
(estimated_total_hits, degraded, used_negative_operator)
|
||||||
|
};
|
||||||
|
|
||||||
|
// 3.2 merge hits
|
||||||
|
let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
|
||||||
|
.skip(federation.offset)
|
||||||
|
.take(federation.limit)
|
||||||
|
.inspect(|hit| {
|
||||||
|
if let Some(semantic_hit_count) = &mut semantic_hit_count {
|
||||||
|
if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
|
||||||
|
*semantic_hit_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(|hit| hit.hit)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let search_result = FederatedSearchResult {
|
||||||
|
hits: merged_hits,
|
||||||
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
|
hits_info: HitsInfo::OffsetLimit {
|
||||||
|
limit: federation.limit,
|
||||||
|
offset: federation.offset,
|
||||||
|
estimated_total_hits,
|
||||||
|
},
|
||||||
|
semantic_hit_count,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(search_result)
|
||||||
|
}
|
||||||
@@ -1,12 +1,13 @@
|
|||||||
use core::fmt;
|
use core::fmt;
|
||||||
use std::cmp::min;
|
use std::cmp::min;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use either::Either;
|
use either::Either;
|
||||||
|
use index_scheduler::RoFeatures;
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use meilisearch_auth::IndexSearchRules;
|
use meilisearch_auth::IndexSearchRules;
|
||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
@@ -15,6 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
|||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
|
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||||
use meilisearch_types::milli::vector::Embedder;
|
use meilisearch_types::milli::vector::Embedder;
|
||||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
@@ -30,6 +32,11 @@ use serde_json::{json, Value};
|
|||||||
|
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
|
|
||||||
|
mod federated;
|
||||||
|
pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions};
|
||||||
|
|
||||||
|
mod ranking_rules;
|
||||||
|
|
||||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||||
|
|
||||||
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
||||||
@@ -59,6 +66,8 @@ pub struct SearchQuery {
|
|||||||
pub hits_per_page: Option<usize>,
|
pub hits_per_page: Option<usize>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
|
||||||
|
pub retrieve_vectors: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||||
pub attributes_to_crop: Option<Vec<String>>,
|
pub attributes_to_crop: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||||
@@ -75,6 +84,8 @@ pub struct SearchQuery {
|
|||||||
pub filter: Option<Value>,
|
pub filter: Option<Value>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||||
pub sort: Option<Vec<String>>,
|
pub sort: Option<Vec<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
|
||||||
|
pub distinct: Option<String>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||||
pub facets: Option<Vec<String>>,
|
pub facets: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||||
@@ -141,6 +152,7 @@ impl fmt::Debug for SearchQuery {
|
|||||||
page,
|
page,
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
@@ -149,6 +161,7 @@ impl fmt::Debug for SearchQuery {
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
filter,
|
filter,
|
||||||
sort,
|
sort,
|
||||||
|
distinct,
|
||||||
facets,
|
facets,
|
||||||
highlight_pre_tag,
|
highlight_pre_tag,
|
||||||
highlight_post_tag,
|
highlight_post_tag,
|
||||||
@@ -173,6 +186,9 @@ impl fmt::Debug for SearchQuery {
|
|||||||
if let Some(q) = q {
|
if let Some(q) = q {
|
||||||
debug.field("q", &q);
|
debug.field("q", &q);
|
||||||
}
|
}
|
||||||
|
if *retrieve_vectors {
|
||||||
|
debug.field("retrieve_vectors", &retrieve_vectors);
|
||||||
|
}
|
||||||
if let Some(v) = vector {
|
if let Some(v) = vector {
|
||||||
if v.len() < 10 {
|
if v.len() < 10 {
|
||||||
debug.field("vector", &v);
|
debug.field("vector", &v);
|
||||||
@@ -195,6 +211,9 @@ impl fmt::Debug for SearchQuery {
|
|||||||
if let Some(sort) = sort {
|
if let Some(sort) = sort {
|
||||||
debug.field("sort", &sort);
|
debug.field("sort", &sort);
|
||||||
}
|
}
|
||||||
|
if let Some(distinct) = distinct {
|
||||||
|
debug.field("distinct", &distinct);
|
||||||
|
}
|
||||||
if let Some(facets) = facets {
|
if let Some(facets) = facets {
|
||||||
debug.field("facets", &facets);
|
debug.field("facets", &facets);
|
||||||
}
|
}
|
||||||
@@ -244,11 +263,13 @@ pub struct HybridQuery {
|
|||||||
pub embedder: Option<String>,
|
pub embedder: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub enum SearchKind {
|
pub enum SearchKind {
|
||||||
KeywordOnly,
|
KeywordOnly,
|
||||||
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
||||||
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchKind {
|
impl SearchKind {
|
||||||
pub(crate) fn semantic(
|
pub(crate) fn semantic(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
@@ -345,7 +366,7 @@ impl SearchQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A `SearchQuery` + an index UID.
|
/// A `SearchQuery` + an index UID and optional FederationOptions.
|
||||||
// This struct contains the fields of `SearchQuery` inline.
|
// This struct contains the fields of `SearchQuery` inline.
|
||||||
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
|
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
|
||||||
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
|
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
|
||||||
@@ -360,16 +381,18 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub vector: Option<Vec<f32>>,
|
pub vector: Option<Vec<f32>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||||
pub hybrid: Option<HybridQuery>,
|
pub hybrid: Option<HybridQuery>,
|
||||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchOffset>)]
|
||||||
pub offset: usize,
|
pub offset: Option<usize>,
|
||||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchLimit>)]
|
||||||
pub limit: usize,
|
pub limit: Option<usize>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
||||||
pub page: Option<usize>,
|
pub page: Option<usize>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
||||||
pub hits_per_page: Option<usize>,
|
pub hits_per_page: Option<usize>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
|
||||||
|
pub retrieve_vectors: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||||
pub attributes_to_crop: Option<Vec<String>>,
|
pub attributes_to_crop: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||||
@@ -386,6 +409,8 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub filter: Option<Value>,
|
pub filter: Option<Value>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||||
pub sort: Option<Vec<String>>,
|
pub sort: Option<Vec<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
|
||||||
|
pub distinct: Option<String>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||||
pub facets: Option<Vec<String>>,
|
pub facets: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||||
@@ -400,12 +425,33 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub attributes_to_search_on: Option<Vec<String>>,
|
pub attributes_to_search_on: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||||
|
|
||||||
|
#[deserr(default)]
|
||||||
|
pub federation_options: Option<FederationOptions>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchQueryWithIndex {
|
impl SearchQueryWithIndex {
|
||||||
pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
|
pub fn has_federation_options(&self) -> bool {
|
||||||
|
self.federation_options.is_some()
|
||||||
|
}
|
||||||
|
pub fn has_pagination(&self) -> Option<&'static str> {
|
||||||
|
if self.offset.is_some() {
|
||||||
|
Some("offset")
|
||||||
|
} else if self.limit.is_some() {
|
||||||
|
Some("limit")
|
||||||
|
} else if self.page.is_some() {
|
||||||
|
Some("page")
|
||||||
|
} else if self.hits_per_page.is_some() {
|
||||||
|
Some("hitsPerPage")
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
|
||||||
let SearchQueryWithIndex {
|
let SearchQueryWithIndex {
|
||||||
index_uid,
|
index_uid,
|
||||||
|
federation_options,
|
||||||
q,
|
q,
|
||||||
vector,
|
vector,
|
||||||
offset,
|
offset,
|
||||||
@@ -413,6 +459,7 @@ impl SearchQueryWithIndex {
|
|||||||
page,
|
page,
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
@@ -421,6 +468,7 @@ impl SearchQueryWithIndex {
|
|||||||
show_matches_position,
|
show_matches_position,
|
||||||
filter,
|
filter,
|
||||||
sort,
|
sort,
|
||||||
|
distinct,
|
||||||
facets,
|
facets,
|
||||||
highlight_pre_tag,
|
highlight_pre_tag,
|
||||||
highlight_post_tag,
|
highlight_post_tag,
|
||||||
@@ -435,11 +483,12 @@ impl SearchQueryWithIndex {
|
|||||||
SearchQuery {
|
SearchQuery {
|
||||||
q,
|
q,
|
||||||
vector,
|
vector,
|
||||||
offset,
|
offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()),
|
||||||
limit,
|
limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()),
|
||||||
page,
|
page,
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
@@ -448,6 +497,7 @@ impl SearchQueryWithIndex {
|
|||||||
show_matches_position,
|
show_matches_position,
|
||||||
filter,
|
filter,
|
||||||
sort,
|
sort,
|
||||||
|
distinct,
|
||||||
facets,
|
facets,
|
||||||
highlight_pre_tag,
|
highlight_pre_tag,
|
||||||
highlight_post_tag,
|
highlight_post_tag,
|
||||||
@@ -459,6 +509,7 @@ impl SearchQueryWithIndex {
|
|||||||
// do not use ..Default::default() here,
|
// do not use ..Default::default() here,
|
||||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||||
},
|
},
|
||||||
|
federation_options,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -478,6 +529,8 @@ pub struct SimilarQuery {
|
|||||||
pub embedder: Option<String>,
|
pub embedder: Option<String>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
|
||||||
|
pub retrieve_vectors: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)]
|
||||||
pub show_ranking_score: bool,
|
pub show_ranking_score: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
|
||||||
@@ -709,13 +762,18 @@ fn prepare_search<'t>(
|
|||||||
query: &'t SearchQuery,
|
query: &'t SearchQuery,
|
||||||
search_kind: &SearchKind,
|
search_kind: &SearchKind,
|
||||||
time_budget: TimeBudget,
|
time_budget: TimeBudget,
|
||||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
features: RoFeatures,
|
||||||
|
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
|
||||||
let mut search = index.search(rtxn);
|
let mut search = index.search(rtxn);
|
||||||
search.time_budget(time_budget);
|
search.time_budget(time_budget);
|
||||||
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
|
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
|
||||||
search.ranking_score_threshold(ranking_score_threshold.0);
|
search.ranking_score_threshold(ranking_score_threshold.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(distinct) = &query.distinct {
|
||||||
|
search.distinct(distinct.clone());
|
||||||
|
}
|
||||||
|
|
||||||
match search_kind {
|
match search_kind {
|
||||||
SearchKind::KeywordOnly => {
|
SearchKind::KeywordOnly => {
|
||||||
if let Some(q) = &query.q {
|
if let Some(q) = &query.q {
|
||||||
@@ -725,10 +783,15 @@ fn prepare_search<'t>(
|
|||||||
SearchKind::SemanticOnly { embedder_name, embedder } => {
|
SearchKind::SemanticOnly { embedder_name, embedder } => {
|
||||||
let vector = match query.vector.clone() {
|
let vector = match query.vector.clone() {
|
||||||
Some(vector) => vector,
|
Some(vector) => vector,
|
||||||
None => embedder
|
None => {
|
||||||
.embed_one(query.q.clone().unwrap())
|
let span = tracing::trace_span!(target: "search::vector", "embed_one");
|
||||||
.map_err(milli::vector::Error::from)
|
let _entered = span.enter();
|
||||||
.map_err(milli::Error::from)?,
|
|
||||||
|
embedder
|
||||||
|
.embed_one(query.q.clone().unwrap())
|
||||||
|
.map_err(milli::vector::Error::from)
|
||||||
|
.map_err(milli::Error::from)?
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
|
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
|
||||||
@@ -787,7 +850,7 @@ fn prepare_search<'t>(
|
|||||||
search.limit(limit);
|
search.limit(limit);
|
||||||
|
|
||||||
if let Some(ref filter) = query.filter {
|
if let Some(ref filter) = query.filter {
|
||||||
if let Some(facets) = parse_filter(filter)? {
|
if let Some(facets) = parse_filter(filter, Code::InvalidSearchFilter, features)? {
|
||||||
search.filter(facets);
|
search.filter(facets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -810,7 +873,9 @@ pub fn perform_search(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
query: SearchQuery,
|
query: SearchQuery,
|
||||||
search_kind: SearchKind,
|
search_kind: SearchKind,
|
||||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
retrieve_vectors: RetrieveVectors,
|
||||||
|
features: RoFeatures,
|
||||||
|
) -> Result<SearchResult, ResponseError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||||
@@ -819,7 +884,7 @@ pub fn perform_search(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget)?;
|
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||||
|
|
||||||
let (
|
let (
|
||||||
milli::SearchResult {
|
milli::SearchResult {
|
||||||
@@ -831,15 +896,7 @@ pub fn perform_search(
|
|||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
},
|
},
|
||||||
semantic_hit_count,
|
semantic_hit_count,
|
||||||
) = match &search_kind {
|
) = search_from_kind(search_kind, search)?;
|
||||||
SearchKind::KeywordOnly => (search.execute()?, None),
|
|
||||||
SearchKind::SemanticOnly { .. } => {
|
|
||||||
let results = search.execute()?;
|
|
||||||
let semantic_hit_count = results.document_scores.len() as u32;
|
|
||||||
(results, Some(semantic_hit_count))
|
|
||||||
}
|
|
||||||
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
|
||||||
};
|
|
||||||
|
|
||||||
let SearchQuery {
|
let SearchQuery {
|
||||||
q,
|
q,
|
||||||
@@ -847,6 +904,8 @@ pub fn perform_search(
|
|||||||
page,
|
page,
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
// use the enum passed as parameter
|
||||||
|
retrieve_vectors: _,
|
||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
@@ -866,10 +925,12 @@ pub fn perform_search(
|
|||||||
matching_strategy: _,
|
matching_strategy: _,
|
||||||
attributes_to_search_on: _,
|
attributes_to_search_on: _,
|
||||||
filter: _,
|
filter: _,
|
||||||
|
distinct: _,
|
||||||
} = query;
|
} = query;
|
||||||
|
|
||||||
let format = AttributesFormat {
|
let format = AttributesFormat {
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
@@ -882,8 +943,13 @@ pub fn perform_search(
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
};
|
};
|
||||||
|
|
||||||
let documents =
|
let documents = make_hits(
|
||||||
make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?;
|
index,
|
||||||
|
&rtxn,
|
||||||
|
format,
|
||||||
|
matching_words,
|
||||||
|
documents_ids.iter().copied().zip(document_scores.iter()),
|
||||||
|
)?;
|
||||||
|
|
||||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||||
let hits_info = if is_finite_pagination {
|
let hits_info = if is_finite_pagination {
|
||||||
@@ -951,8 +1017,25 @@ pub fn perform_search(
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn search_from_kind(
|
||||||
|
search_kind: SearchKind,
|
||||||
|
search: milli::Search<'_>,
|
||||||
|
) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
|
||||||
|
let (milli_result, semantic_hit_count) = match &search_kind {
|
||||||
|
SearchKind::KeywordOnly => (search.execute()?, None),
|
||||||
|
SearchKind::SemanticOnly { .. } => {
|
||||||
|
let results = search.execute()?;
|
||||||
|
let semantic_hit_count = results.document_scores.len() as u32;
|
||||||
|
(results, Some(semantic_hit_count))
|
||||||
|
}
|
||||||
|
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
||||||
|
};
|
||||||
|
Ok((milli_result, semantic_hit_count))
|
||||||
|
}
|
||||||
|
|
||||||
struct AttributesFormat {
|
struct AttributesFormat {
|
||||||
attributes_to_retrieve: Option<BTreeSet<String>>,
|
attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
attributes_to_highlight: Option<HashSet<String>>,
|
attributes_to_highlight: Option<HashSet<String>>,
|
||||||
attributes_to_crop: Option<Vec<String>>,
|
attributes_to_crop: Option<Vec<String>>,
|
||||||
crop_length: usize,
|
crop_length: usize,
|
||||||
@@ -965,103 +1048,248 @@ struct AttributesFormat {
|
|||||||
show_ranking_score_details: bool,
|
show_ranking_score_details: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_hits(
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
index: &Index,
|
pub enum RetrieveVectors {
|
||||||
rtxn: &RoTxn<'_>,
|
/// Do not touch the `_vectors` field
|
||||||
format: AttributesFormat,
|
///
|
||||||
matching_words: milli::MatchingWords,
|
/// this is the behavior when the vectorStore feature is disabled
|
||||||
documents_ids: Vec<u32>,
|
Ignore,
|
||||||
document_scores: Vec<Vec<ScoreDetails>>,
|
/// Remove the `_vectors` field
|
||||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
///
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
|
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `false`
|
||||||
let displayed_ids = index
|
Hide,
|
||||||
.displayed_fields_ids(rtxn)?
|
/// Retrieve vectors from the DB and merge them into the `_vectors` field
|
||||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
///
|
||||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `true`
|
||||||
let fids = |attrs: &BTreeSet<String>| {
|
Retrieve,
|
||||||
let mut ids = BTreeSet::new();
|
}
|
||||||
for attr in attrs {
|
|
||||||
if attr == "*" {
|
|
||||||
ids.clone_from(&displayed_ids);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr) {
|
impl RetrieveVectors {
|
||||||
ids.insert(id);
|
pub fn new(
|
||||||
}
|
retrieve_vector: bool,
|
||||||
|
features: index_scheduler::RoFeatures,
|
||||||
|
) -> Result<Self, index_scheduler::Error> {
|
||||||
|
match (retrieve_vector, features.check_vector("Passing `retrieveVectors` as a parameter")) {
|
||||||
|
(true, Ok(())) => Ok(Self::Retrieve),
|
||||||
|
(true, Err(error)) => Err(error),
|
||||||
|
(false, Ok(())) => Ok(Self::Hide),
|
||||||
|
(false, Err(_)) => Ok(Self::Ignore),
|
||||||
}
|
}
|
||||||
ids
|
|
||||||
};
|
|
||||||
let to_retrieve_ids: BTreeSet<_> = format
|
|
||||||
.attributes_to_retrieve
|
|
||||||
.as_ref()
|
|
||||||
.map(fids)
|
|
||||||
.unwrap_or_else(|| displayed_ids.clone())
|
|
||||||
.intersection(&displayed_ids)
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
|
|
||||||
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
|
|
||||||
let formatted_options = compute_formatted_options(
|
|
||||||
&attr_to_highlight,
|
|
||||||
&attr_to_crop,
|
|
||||||
format.crop_length,
|
|
||||||
&to_retrieve_ids,
|
|
||||||
&fields_ids_map,
|
|
||||||
&displayed_ids,
|
|
||||||
);
|
|
||||||
let mut tokenizer_builder = TokenizerBuilder::default();
|
|
||||||
tokenizer_builder.create_char_map(true);
|
|
||||||
let script_lang_map = index.script_language(rtxn)?;
|
|
||||||
if !script_lang_map.is_empty() {
|
|
||||||
tokenizer_builder.allow_list(&script_lang_map);
|
|
||||||
}
|
}
|
||||||
let separators = index.allowed_separators(rtxn)?;
|
}
|
||||||
let separators: Option<Vec<_>> =
|
|
||||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
struct HitMaker<'a> {
|
||||||
if let Some(ref separators) = separators {
|
index: &'a Index,
|
||||||
tokenizer_builder.separators(separators);
|
rtxn: &'a RoTxn<'a>,
|
||||||
|
fields_ids_map: FieldsIdsMap,
|
||||||
|
displayed_ids: BTreeSet<FieldId>,
|
||||||
|
vectors_fid: Option<FieldId>,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
|
to_retrieve_ids: BTreeSet<FieldId>,
|
||||||
|
embedding_configs: Vec<milli::index::IndexEmbeddingConfig>,
|
||||||
|
formatter_builder: MatcherBuilder<'a>,
|
||||||
|
formatted_options: BTreeMap<FieldId, FormatOptions>,
|
||||||
|
show_ranking_score: bool,
|
||||||
|
show_ranking_score_details: bool,
|
||||||
|
sort: Option<Vec<String>>,
|
||||||
|
show_matches_position: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> HitMaker<'a> {
|
||||||
|
pub fn tokenizer<'b>(
|
||||||
|
script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>,
|
||||||
|
dictionary: Option<&'b [&'b str]>,
|
||||||
|
separators: Option<&'b [&'b str]>,
|
||||||
|
) -> milli::tokenizer::Tokenizer<'b> {
|
||||||
|
let mut tokenizer_builder = TokenizerBuilder::default();
|
||||||
|
tokenizer_builder.create_char_map(true);
|
||||||
|
if !script_lang_map.is_empty() {
|
||||||
|
tokenizer_builder.allow_list(script_lang_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(separators) = separators {
|
||||||
|
tokenizer_builder.separators(separators);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(dictionary) = dictionary {
|
||||||
|
tokenizer_builder.words_dict(dictionary);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenizer_builder.into_tokenizer()
|
||||||
}
|
}
|
||||||
let dictionary = index.dictionary(rtxn)?;
|
|
||||||
let dictionary: Option<Vec<_>> =
|
pub fn formatter_builder(
|
||||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
matching_words: milli::MatchingWords,
|
||||||
if let Some(ref dictionary) = dictionary {
|
tokenizer: milli::tokenizer::Tokenizer<'_>,
|
||||||
tokenizer_builder.words_dict(dictionary);
|
) -> MatcherBuilder<'_> {
|
||||||
|
let formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
||||||
|
|
||||||
|
formatter_builder
|
||||||
}
|
}
|
||||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
|
||||||
formatter_builder.crop_marker(format.crop_marker);
|
pub fn new(
|
||||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
index: &'a Index,
|
||||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
rtxn: &'a RoTxn<'a>,
|
||||||
let mut documents = Vec::new();
|
format: AttributesFormat,
|
||||||
let documents_iter = index.documents(rtxn, documents_ids)?;
|
mut formatter_builder: MatcherBuilder<'a>,
|
||||||
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
) -> Result<Self, MeilisearchHttpError> {
|
||||||
|
formatter_builder.crop_marker(format.crop_marker);
|
||||||
|
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||||
|
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
let displayed_ids = index
|
||||||
|
.displayed_fields_ids(rtxn)?
|
||||||
|
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
|
||||||
|
|
||||||
|
let vectors_fid =
|
||||||
|
fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
|
||||||
|
|
||||||
|
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
|
||||||
|
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
|
||||||
|
(None, _) => false,
|
||||||
|
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
|
||||||
|
(Some(_), None) => true,
|
||||||
|
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
|
||||||
|
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
|
||||||
|
};
|
||||||
|
|
||||||
|
let displayed_ids =
|
||||||
|
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||||
|
|
||||||
|
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
|
||||||
|
if vectors_is_hidden {
|
||||||
|
RetrieveVectors::Hide
|
||||||
|
} else {
|
||||||
|
RetrieveVectors::Retrieve
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
format.retrieve_vectors
|
||||||
|
};
|
||||||
|
|
||||||
|
let fids = |attrs: &BTreeSet<String>| {
|
||||||
|
let mut ids = BTreeSet::new();
|
||||||
|
for attr in attrs {
|
||||||
|
if attr == "*" {
|
||||||
|
ids.clone_from(&displayed_ids);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(id) = fields_ids_map.id(attr) {
|
||||||
|
ids.insert(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ids
|
||||||
|
};
|
||||||
|
let to_retrieve_ids: BTreeSet<_> = format
|
||||||
|
.attributes_to_retrieve
|
||||||
|
.as_ref()
|
||||||
|
.map(fids)
|
||||||
|
.unwrap_or_else(|| displayed_ids.clone())
|
||||||
|
.intersection(&displayed_ids)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
|
||||||
|
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
|
||||||
|
let formatted_options = compute_formatted_options(
|
||||||
|
&attr_to_highlight,
|
||||||
|
&attr_to_crop,
|
||||||
|
format.crop_length,
|
||||||
|
&to_retrieve_ids,
|
||||||
|
&fields_ids_map,
|
||||||
|
&displayed_ids,
|
||||||
|
);
|
||||||
|
|
||||||
|
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
index,
|
||||||
|
rtxn,
|
||||||
|
fields_ids_map,
|
||||||
|
displayed_ids,
|
||||||
|
vectors_fid,
|
||||||
|
retrieve_vectors,
|
||||||
|
to_retrieve_ids,
|
||||||
|
embedding_configs,
|
||||||
|
formatter_builder,
|
||||||
|
formatted_options,
|
||||||
|
show_ranking_score: format.show_ranking_score,
|
||||||
|
show_ranking_score_details: format.show_ranking_score_details,
|
||||||
|
show_matches_position: format.show_matches_position,
|
||||||
|
sort: format.sort,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn make_hit(
|
||||||
|
&self,
|
||||||
|
id: u32,
|
||||||
|
score: &[ScoreDetails],
|
||||||
|
) -> Result<SearchHit, MeilisearchHttpError> {
|
||||||
|
let (_, obkv) =
|
||||||
|
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
|
||||||
|
|
||||||
// First generate a document with all the displayed fields
|
// First generate a document with all the displayed fields
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
|
||||||
|
|
||||||
|
let add_vectors_fid =
|
||||||
|
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);
|
||||||
|
|
||||||
// select the attributes to retrieve
|
// select the attributes to retrieve
|
||||||
let attributes_to_retrieve = to_retrieve_ids
|
let attributes_to_retrieve = self
|
||||||
|
.to_retrieve_ids
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
// skip the vectors_fid if RetrieveVectors::Hide
|
||||||
|
.filter(|fid| match self.vectors_fid {
|
||||||
|
Some(vectors_fid) => {
|
||||||
|
!(self.retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
|
||||||
|
}
|
||||||
|
None => true,
|
||||||
|
})
|
||||||
|
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
|
||||||
|
.chain(add_vectors_fid.iter())
|
||||||
|
.map(|&fid| self.fields_ids_map.name(fid).expect("Missing field name"));
|
||||||
|
|
||||||
let mut document =
|
let mut document =
|
||||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||||
|
|
||||||
|
if self.retrieve_vectors == RetrieveVectors::Retrieve {
|
||||||
|
// Clippy is wrong
|
||||||
|
#[allow(clippy::manual_unwrap_or_default)]
|
||||||
|
let mut vectors = match document.remove("_vectors") {
|
||||||
|
Some(Value::Object(map)) => map,
|
||||||
|
_ => Default::default(),
|
||||||
|
};
|
||||||
|
for (name, vector) in self.index.embeddings(self.rtxn, id)? {
|
||||||
|
let user_provided = self
|
||||||
|
.embedding_configs
|
||||||
|
.iter()
|
||||||
|
.find(|conf| conf.name == name)
|
||||||
|
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||||
|
let embeddings =
|
||||||
|
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
|
||||||
|
vectors.insert(name, serde_json::to_value(embeddings)?);
|
||||||
|
}
|
||||||
|
document.insert("_vectors".into(), vectors.into());
|
||||||
|
}
|
||||||
|
|
||||||
let (matches_position, formatted) = format_fields(
|
let (matches_position, formatted) = format_fields(
|
||||||
&displayed_document,
|
&displayed_document,
|
||||||
&fields_ids_map,
|
&self.fields_ids_map,
|
||||||
&formatter_builder,
|
&self.formatter_builder,
|
||||||
&formatted_options,
|
&self.formatted_options,
|
||||||
format.show_matches_position,
|
self.show_matches_position,
|
||||||
&displayed_ids,
|
&self.displayed_ids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(sort) = format.sort.as_ref() {
|
if let Some(sort) = self.sort.as_ref() {
|
||||||
insert_geo_distance(sort, &mut document);
|
insert_geo_distance(sort, &mut document);
|
||||||
}
|
}
|
||||||
|
|
||||||
let ranking_score =
|
let ranking_score =
|
||||||
format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
self.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||||
let ranking_score_details =
|
let ranking_score_details =
|
||||||
format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
self.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
||||||
|
|
||||||
let hit = SearchHit {
|
let hit = SearchHit {
|
||||||
document,
|
document,
|
||||||
@@ -1070,7 +1298,38 @@ fn make_hits(
|
|||||||
ranking_score_details,
|
ranking_score_details,
|
||||||
ranking_score,
|
ranking_score,
|
||||||
};
|
};
|
||||||
documents.push(hit);
|
|
||||||
|
Ok(hit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_hits<'a>(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &RoTxn<'_>,
|
||||||
|
format: AttributesFormat,
|
||||||
|
matching_words: milli::MatchingWords,
|
||||||
|
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
|
||||||
|
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
|
let script_lang_map = index.script_language(rtxn)?;
|
||||||
|
|
||||||
|
let dictionary = index.dictionary(rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
let separators = index.allowed_separators(rtxn)?;
|
||||||
|
let separators: Option<Vec<_>> =
|
||||||
|
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
|
||||||
|
let tokenizer =
|
||||||
|
HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref());
|
||||||
|
|
||||||
|
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||||
|
|
||||||
|
let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
|
||||||
|
|
||||||
|
for (id, score) in documents_ids_scores {
|
||||||
|
documents.push(hit_maker.make_hit(id, score)?);
|
||||||
}
|
}
|
||||||
Ok(documents)
|
Ok(documents)
|
||||||
}
|
}
|
||||||
@@ -1081,7 +1340,8 @@ pub fn perform_facet_search(
|
|||||||
facet_query: Option<String>,
|
facet_query: Option<String>,
|
||||||
facet_name: String,
|
facet_name: String,
|
||||||
search_kind: SearchKind,
|
search_kind: SearchKind,
|
||||||
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
features: RoFeatures,
|
||||||
|
) -> Result<FacetSearchResult, ResponseError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||||
@@ -1089,7 +1349,8 @@ pub fn perform_facet_search(
|
|||||||
None => TimeBudget::default(),
|
None => TimeBudget::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, &search_kind, time_budget)?;
|
let (search, _, _, _) =
|
||||||
|
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
|
||||||
let mut facet_search = SearchForFacetValues::new(
|
let mut facet_search = SearchForFacetValues::new(
|
||||||
facet_name,
|
facet_name,
|
||||||
search,
|
search,
|
||||||
@@ -1114,6 +1375,8 @@ pub fn perform_similar(
|
|||||||
query: SimilarQuery,
|
query: SimilarQuery,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
|
features: RoFeatures,
|
||||||
) -> Result<SimilarResult, ResponseError> {
|
) -> Result<SimilarResult, ResponseError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
@@ -1125,6 +1388,7 @@ pub fn perform_similar(
|
|||||||
filter: _,
|
filter: _,
|
||||||
embedder: _,
|
embedder: _,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
retrieve_vectors: _,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
@@ -1143,10 +1407,7 @@ pub fn perform_similar(
|
|||||||
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
||||||
|
|
||||||
if let Some(ref filter) = query.filter {
|
if let Some(ref filter) = query.filter {
|
||||||
if let Some(facets) = parse_filter(filter)
|
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
||||||
// inject InvalidSimilarFilter code
|
|
||||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))?
|
|
||||||
{
|
|
||||||
similar.filter(facets);
|
similar.filter(facets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1171,6 +1432,7 @@ pub fn perform_similar(
|
|||||||
|
|
||||||
let format = AttributesFormat {
|
let format = AttributesFormat {
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
attributes_to_highlight: None,
|
attributes_to_highlight: None,
|
||||||
attributes_to_crop: None,
|
attributes_to_crop: None,
|
||||||
crop_length: DEFAULT_CROP_LENGTH(),
|
crop_length: DEFAULT_CROP_LENGTH(),
|
||||||
@@ -1183,7 +1445,13 @@ pub fn perform_similar(
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
};
|
};
|
||||||
|
|
||||||
let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?;
|
let hits = make_hits(
|
||||||
|
index,
|
||||||
|
&rtxn,
|
||||||
|
format,
|
||||||
|
Default::default(),
|
||||||
|
documents_ids.iter().copied().zip(document_scores.iter()),
|
||||||
|
)?;
|
||||||
|
|
||||||
let max_total_hits = index
|
let max_total_hits = index
|
||||||
.pagination_max_total_hits(&rtxn)
|
.pagination_max_total_hits(&rtxn)
|
||||||
@@ -1212,13 +1480,23 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
|||||||
// TODO: TAMO: milli encountered an internal error, what do we want to do?
|
// TODO: TAMO: milli encountered an internal error, what do we want to do?
|
||||||
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
|
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
|
||||||
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
|
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
|
||||||
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
|
if let Some((lat, lng)) =
|
||||||
|
extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"]))
|
||||||
|
{
|
||||||
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
|
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
|
||||||
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
|
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn extract_geo_value(value: &Value) -> Option<f64> {
|
||||||
|
match value {
|
||||||
|
Value::Number(n) => n.as_f64(),
|
||||||
|
Value::String(s) => s.parse().ok(),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn compute_formatted_options(
|
fn compute_formatted_options(
|
||||||
attr_to_highlight: &HashSet<String>,
|
attr_to_highlight: &HashSet<String>,
|
||||||
attr_to_crop: &[String],
|
attr_to_crop: &[String],
|
||||||
@@ -1346,10 +1624,10 @@ fn make_document(
|
|||||||
Ok(document)
|
Ok(document)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_fields<'a>(
|
fn format_fields(
|
||||||
document: &Document,
|
document: &Document,
|
||||||
field_ids_map: &FieldsIdsMap,
|
field_ids_map: &FieldsIdsMap,
|
||||||
builder: &'a MatcherBuilder<'a>,
|
builder: &MatcherBuilder<'_>,
|
||||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
||||||
displayable_ids: &BTreeSet<FieldId>,
|
displayable_ids: &BTreeSet<FieldId>,
|
||||||
@@ -1404,9 +1682,9 @@ fn format_fields<'a>(
|
|||||||
Ok((matches_position, document))
|
Ok((matches_position, document))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_value<'a>(
|
fn format_value(
|
||||||
value: Value,
|
value: Value,
|
||||||
builder: &'a MatcherBuilder<'a>,
|
builder: &MatcherBuilder<'_>,
|
||||||
format_options: Option<FormatOptions>,
|
format_options: Option<FormatOptions>,
|
||||||
infos: &mut Vec<MatchBounds>,
|
infos: &mut Vec<MatchBounds>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
||||||
@@ -1485,15 +1763,33 @@ fn format_value<'a>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
|
pub(crate) fn parse_filter(
|
||||||
match facets {
|
facets: &Value,
|
||||||
Value::String(expr) => {
|
filter_parsing_error_code: Code,
|
||||||
let condition = Filter::from_str(expr)?;
|
features: RoFeatures,
|
||||||
Ok(condition)
|
) -> Result<Option<Filter>, ResponseError> {
|
||||||
|
let filter = match facets {
|
||||||
|
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
|
||||||
|
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
|
||||||
|
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone()).into()),
|
||||||
|
};
|
||||||
|
let filter = filter.map_err(|err: ResponseError| {
|
||||||
|
ResponseError::from_msg(err.to_string(), filter_parsing_error_code)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if let Some(ref filter) = filter {
|
||||||
|
// If the contains operator is used while the contains filter features is not enabled, errors out
|
||||||
|
if let Some((token, error)) =
|
||||||
|
filter.use_contains_operator().zip(features.check_contains_filter().err())
|
||||||
|
{
|
||||||
|
return Err(ResponseError::from_msg(
|
||||||
|
token.as_external_error(error).to_string(),
|
||||||
|
Code::FeatureNotEnabled,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
Value::Array(arr) => parse_filter_array(arr),
|
|
||||||
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone())),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(filter)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
|
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||||
@@ -1592,4 +1888,54 @@ mod test {
|
|||||||
insert_geo_distance(sorters, &mut document);
|
insert_geo_distance(sorters, &mut document);
|
||||||
assert_eq!(document.get("_geoDistance"), None);
|
assert_eq!(document.get("_geoDistance"), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_insert_geo_distance_with_coords_as_string() {
|
||||||
|
let value: Document = serde_json::from_str(
|
||||||
|
r#"{
|
||||||
|
"_geo": {
|
||||||
|
"lat": "50",
|
||||||
|
"lng": 3
|
||||||
|
}
|
||||||
|
}"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let sorters = &["_geoPoint(50,3):desc".to_string()];
|
||||||
|
let mut document = value.clone();
|
||||||
|
insert_geo_distance(sorters, &mut document);
|
||||||
|
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||||
|
|
||||||
|
let value: Document = serde_json::from_str(
|
||||||
|
r#"{
|
||||||
|
"_geo": {
|
||||||
|
"lat": "50",
|
||||||
|
"lng": "3"
|
||||||
|
},
|
||||||
|
"id": "1"
|
||||||
|
}"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let sorters = &["_geoPoint(50,3):desc".to_string()];
|
||||||
|
let mut document = value.clone();
|
||||||
|
insert_geo_distance(sorters, &mut document);
|
||||||
|
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||||
|
|
||||||
|
let value: Document = serde_json::from_str(
|
||||||
|
r#"{
|
||||||
|
"_geo": {
|
||||||
|
"lat": 50,
|
||||||
|
"lng": "3"
|
||||||
|
},
|
||||||
|
"id": "1"
|
||||||
|
}"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let sorters = &["_geoPoint(50,3):desc".to_string()];
|
||||||
|
let mut document = value.clone();
|
||||||
|
insert_geo_distance(sorters, &mut document);
|
||||||
|
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
823
meilisearch/src/search/ranking_rules.rs
Normal file
823
meilisearch/src/search/ranking_rules.rs
Normal file
@@ -0,0 +1,823 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
use itertools::Itertools as _;
|
||||||
|
use meilisearch_types::error::{Code, ResponseError};
|
||||||
|
use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy};
|
||||||
|
|
||||||
|
pub struct RankingRules {
|
||||||
|
canonical_criteria: Vec<Criterion>,
|
||||||
|
canonical_sort: Option<Vec<AscDesc>>,
|
||||||
|
canonicalization_actions: Vec<CanonicalizationAction>,
|
||||||
|
source_criteria: Vec<Criterion>,
|
||||||
|
source_sort: Option<Vec<AscDesc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum CanonicalizationAction {
|
||||||
|
PrependedWords {
|
||||||
|
prepended_index: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource,
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedWords {
|
||||||
|
reason: RemoveWords,
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedPlaceholder {
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
TruncatedVector {
|
||||||
|
vector_rule: RankingRuleSource,
|
||||||
|
truncated_from: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedVector {
|
||||||
|
vector_rule: RankingRuleSource,
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum RemoveWords {
|
||||||
|
WasPrepended,
|
||||||
|
MatchingStrategyAll,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for RemoveWords {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let reason = match self {
|
||||||
|
RemoveWords::WasPrepended => "it was previously prepended",
|
||||||
|
RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`",
|
||||||
|
};
|
||||||
|
f.write_str(reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum CanonicalizationKind {
|
||||||
|
Placeholder,
|
||||||
|
Keyword,
|
||||||
|
Vector,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CompatibilityError {
|
||||||
|
previous: RankingRule,
|
||||||
|
current: RankingRule,
|
||||||
|
}
|
||||||
|
impl CompatibilityError {
|
||||||
|
pub(crate) fn to_response_error(
|
||||||
|
&self,
|
||||||
|
ranking_rules: &RankingRules,
|
||||||
|
previous_ranking_rules: &RankingRules,
|
||||||
|
query_index: usize,
|
||||||
|
previous_query_index: usize,
|
||||||
|
index_uid: &str,
|
||||||
|
previous_index_uid: &str,
|
||||||
|
) -> meilisearch_types::error::ResponseError {
|
||||||
|
let rule = self.current.as_string(
|
||||||
|
&ranking_rules.canonical_criteria,
|
||||||
|
&ranking_rules.canonical_sort,
|
||||||
|
query_index,
|
||||||
|
index_uid,
|
||||||
|
);
|
||||||
|
let previous_rule = self.previous.as_string(
|
||||||
|
&previous_ranking_rules.canonical_criteria,
|
||||||
|
&previous_ranking_rules.canonical_sort,
|
||||||
|
previous_query_index,
|
||||||
|
previous_index_uid,
|
||||||
|
);
|
||||||
|
|
||||||
|
let canonicalization_actions = ranking_rules.canonicalization_notes();
|
||||||
|
let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes();
|
||||||
|
|
||||||
|
let mut msg = String::new();
|
||||||
|
let reason = self.reason();
|
||||||
|
let _ = writeln!(
|
||||||
|
&mut msg,
|
||||||
|
"The results of queries #{previous_query_index} and #{query_index} are incompatible: "
|
||||||
|
);
|
||||||
|
let _ = writeln!(&mut msg, " 1. {previous_rule}");
|
||||||
|
let _ = writeln!(&mut msg, " 2. {rule}");
|
||||||
|
let _ = writeln!(&mut msg, " - {reason}");
|
||||||
|
|
||||||
|
if !previous_canonicalization_actions.is_empty() {
|
||||||
|
let _ = write!(&mut msg, " - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if !canonicalization_actions.is_empty() {
|
||||||
|
let _ = write!(&mut msg, " - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}");
|
||||||
|
}
|
||||||
|
|
||||||
|
ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules)
|
||||||
|
}
|
||||||
|
pub fn reason(&self) -> &'static str {
|
||||||
|
match (self.previous.kind, self.current.kind) {
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => {
|
||||||
|
"cannot compare a relevancy rule with a sort rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => {
|
||||||
|
"cannot compare a relevancy rule with a geosort rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => {
|
||||||
|
"cannot compare two sort rules in opposite directions"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => {
|
||||||
|
"cannot compare a sort rule with a geosort rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => {
|
||||||
|
"cannot compare two geosort rules in opposite directions"
|
||||||
|
}
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy)
|
||||||
|
| (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => {
|
||||||
|
"internal error, comparison should be possible"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RankingRules {
|
||||||
|
pub fn new(
|
||||||
|
criteria: Vec<Criterion>,
|
||||||
|
sort: Option<Vec<AscDesc>>,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
canonicalization_kind: CanonicalizationKind,
|
||||||
|
) -> Self {
|
||||||
|
let (canonical_criteria, canonical_sort, canonicalization_actions) =
|
||||||
|
Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind);
|
||||||
|
Self {
|
||||||
|
canonical_criteria,
|
||||||
|
canonical_sort,
|
||||||
|
canonicalization_actions,
|
||||||
|
source_criteria: criteria,
|
||||||
|
source_sort: sort,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort: &Option<Vec<AscDesc>>,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
canonicalization_kind: CanonicalizationKind,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
match canonicalization_kind {
|
||||||
|
CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort),
|
||||||
|
CanonicalizationKind::Keyword => {
|
||||||
|
Self::canonicalize_keyword(criteria, sort, terms_matching_strategy)
|
||||||
|
}
|
||||||
|
CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_placeholder(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort_query: &Option<Vec<AscDesc>>,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
let mut sort = None;
|
||||||
|
|
||||||
|
let mut sorted_fields = HashMap::new();
|
||||||
|
let mut canonicalization_actions = Vec::new();
|
||||||
|
let mut canonical_criteria = Vec::new();
|
||||||
|
let mut canonical_sort = None;
|
||||||
|
|
||||||
|
for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||||
|
match criterion.clone() {
|
||||||
|
Criterion::Words
|
||||||
|
| Criterion::Typo
|
||||||
|
| Criterion::Proximity
|
||||||
|
| Criterion::Attribute
|
||||||
|
| Criterion::Exactness => {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
Criterion::Sort => {
|
||||||
|
if let Some(previous_index) = sort {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else if let Some(sort_query) = sort_query {
|
||||||
|
sort = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion.clone());
|
||||||
|
canonical_sort = Some(canonicalize_sort(
|
||||||
|
&mut sorted_fields,
|
||||||
|
sort_query.as_slice(),
|
||||||
|
criterion_index,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||||
|
canonical_criteria.push(criterion.clone())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_vector(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort_query: &Option<Vec<AscDesc>>,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
let mut sort = None;
|
||||||
|
|
||||||
|
let mut sorted_fields = HashMap::new();
|
||||||
|
let mut canonicalization_actions = Vec::new();
|
||||||
|
let mut canonical_criteria = Vec::new();
|
||||||
|
let mut canonical_sort = None;
|
||||||
|
|
||||||
|
let mut vector = None;
|
||||||
|
|
||||||
|
'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||||
|
match criterion.clone() {
|
||||||
|
Criterion::Words
|
||||||
|
| Criterion::Typo
|
||||||
|
| Criterion::Proximity
|
||||||
|
| Criterion::Attribute
|
||||||
|
| Criterion::Exactness => match vector {
|
||||||
|
Some(previous_occurrence) => {
|
||||||
|
if sorted_fields.is_empty() {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedVector {
|
||||||
|
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(
|
||||||
|
CanonicalizationAction::TruncatedVector {
|
||||||
|
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
|
||||||
|
truncated_from: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
break 'criteria;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
canonical_criteria.push(criterion.clone());
|
||||||
|
vector = Some(criterion_index);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
Criterion::Sort => {
|
||||||
|
if let Some(previous_index) = sort {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else if let Some(sort_query) = sort_query {
|
||||||
|
sort = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion.clone());
|
||||||
|
canonical_sort = Some(canonicalize_sort(
|
||||||
|
&mut sorted_fields,
|
||||||
|
sort_query.as_slice(),
|
||||||
|
criterion_index,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||||
|
canonical_criteria.push(criterion.clone())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_keyword(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort_query: &Option<Vec<AscDesc>>,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
let mut words = None;
|
||||||
|
let mut typo = None;
|
||||||
|
let mut proximity = None;
|
||||||
|
let mut sort = None;
|
||||||
|
let mut attribute = None;
|
||||||
|
let mut exactness = None;
|
||||||
|
let mut sorted_fields = HashMap::new();
|
||||||
|
|
||||||
|
let mut canonical_criteria = Vec::new();
|
||||||
|
let mut canonical_sort = None;
|
||||||
|
|
||||||
|
let mut canonicalization_actions = Vec::new();
|
||||||
|
|
||||||
|
for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||||
|
let criterion = criterion.clone();
|
||||||
|
match criterion.clone() {
|
||||||
|
Criterion::Words => {
|
||||||
|
if let TermsMatchingStrategy::All = terms_matching_strategy {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
|
||||||
|
reason: RemoveWords::MatchingStrategyAll,
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Some(maybe_previous_index) = words {
|
||||||
|
if let Some(previous_index) = maybe_previous_index {
|
||||||
|
canonicalization_actions.push(
|
||||||
|
CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(
|
||||||
|
previous_index,
|
||||||
|
),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(
|
||||||
|
criterion_index,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
|
||||||
|
reason: RemoveWords::WasPrepended,
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
words = Some(Some(criterion_index));
|
||||||
|
canonical_criteria.push(criterion);
|
||||||
|
}
|
||||||
|
Criterion::Typo => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut typo,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Criterion::Proximity => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut proximity,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Criterion::Attribute => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut attribute,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Criterion::Exactness => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut exactness,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Criterion::Sort => {
|
||||||
|
if let Some(previous_index) = sort {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else if let Some(sort_query) = sort_query {
|
||||||
|
sort = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion);
|
||||||
|
canonical_sort = Some(canonicalize_sort(
|
||||||
|
&mut sorted_fields,
|
||||||
|
sort_query.as_slice(),
|
||||||
|
criterion_index,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||||
|
canonical_criteria.push(criterion)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> {
|
||||||
|
for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) {
|
||||||
|
if current.kind != previous.kind {
|
||||||
|
return Err(CompatibilityError { current, previous });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn constraint_count(&self) -> usize {
|
||||||
|
self.coalesce_iterator().count()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ {
|
||||||
|
self.canonical_criteria
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.flat_map(|(criterion_index, criterion)| {
|
||||||
|
RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort)
|
||||||
|
})
|
||||||
|
.coalesce(
|
||||||
|
|previous @ RankingRule { source: previous_source, kind: previous_kind },
|
||||||
|
current @ RankingRule { source, kind }| {
|
||||||
|
match (previous_kind, kind) {
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => {
|
||||||
|
let merged_source = match (previous_source, source) {
|
||||||
|
(
|
||||||
|
RankingRuleSource::Criterion(previous),
|
||||||
|
RankingRuleSource::Criterion(current),
|
||||||
|
) => RankingRuleSource::CoalescedCriteria(previous, current),
|
||||||
|
(
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, _end),
|
||||||
|
RankingRuleSource::Criterion(current),
|
||||||
|
) => RankingRuleSource::CoalescedCriteria(begin, current),
|
||||||
|
(_previous, current) => current,
|
||||||
|
};
|
||||||
|
Ok(RankingRule { source: merged_source, kind })
|
||||||
|
}
|
||||||
|
_ => Err((previous, current)),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalization_notes(&self) -> String {
|
||||||
|
use CanonicalizationAction::*;
|
||||||
|
let mut notes = String::new();
|
||||||
|
for (index, action) in self.canonicalization_actions.iter().enumerate() {
|
||||||
|
let index = index + 1;
|
||||||
|
let _ = match action {
|
||||||
|
PrependedWords { prepended_index } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Prepended rule `words` before first relevancy rule `{}` at position {}",
|
||||||
|
prepended_index.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
prepended_index.rule_position()
|
||||||
|
),
|
||||||
|
RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}",
|
||||||
|
earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
removed_occurrence.rule_position(),
|
||||||
|
earlier_occurrence.rule_position(),
|
||||||
|
),
|
||||||
|
RemovedWords { reason, removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed rule `words` at position {} because {reason}",
|
||||||
|
removed_occurrence.rule_position()
|
||||||
|
),
|
||||||
|
RemovedPlaceholder { removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")",
|
||||||
|
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
removed_occurrence.rule_position()
|
||||||
|
),
|
||||||
|
TruncatedVector { vector_rule, truncated_from } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}",
|
||||||
|
truncated_from.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
truncated_from.rule_position(),
|
||||||
|
vector_rule.rule_position(),
|
||||||
|
),
|
||||||
|
RemovedVector { vector_rule, removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}",
|
||||||
|
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
removed_occurrence.rule_position(),
|
||||||
|
vector_rule.rule_position(),
|
||||||
|
),
|
||||||
|
RemovedSort { removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed rule `sort` at position {} because `query.sort` is empty",
|
||||||
|
removed_occurrence.rule_position()
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
notes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_sort(
|
||||||
|
sorted_fields: &mut HashMap<String, RankingRuleSource>,
|
||||||
|
sort_query: &[AscDesc],
|
||||||
|
criterion_index: usize,
|
||||||
|
canonicalization_actions: &mut Vec<CanonicalizationAction>,
|
||||||
|
) -> Vec<AscDesc> {
|
||||||
|
let mut geo_sorted = None;
|
||||||
|
let mut canonical_sort = Vec::new();
|
||||||
|
for (sort_index, asc_desc) in sort_query.iter().enumerate() {
|
||||||
|
let source = RankingRuleSource::Sort { criterion_index, sort_index };
|
||||||
|
let asc_desc = asc_desc.clone();
|
||||||
|
match asc_desc.clone() {
|
||||||
|
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
|
||||||
|
match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: source,
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(source);
|
||||||
|
canonical_sort.push(asc_desc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted {
|
||||||
|
Some(earlier_sort_index) => {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Sort {
|
||||||
|
criterion_index,
|
||||||
|
sort_index: earlier_sort_index,
|
||||||
|
},
|
||||||
|
removed_occurrence: source,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
geo_sorted = Some(sort_index);
|
||||||
|
canonical_sort.push(asc_desc);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
canonical_sort
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_criterion(
|
||||||
|
criterion: Criterion,
|
||||||
|
criterion_index: usize,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
words: &mut Option<Option<usize>>,
|
||||||
|
canonicalization_actions: &mut Vec<CanonicalizationAction>,
|
||||||
|
canonical_criteria: &mut Vec<Criterion>,
|
||||||
|
rule: &mut Option<usize>,
|
||||||
|
) {
|
||||||
|
*words = match (terms_matching_strategy, words.take()) {
|
||||||
|
(TermsMatchingStrategy::All, words) => words,
|
||||||
|
(_, None) => {
|
||||||
|
// inject words
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::PrependedWords {
|
||||||
|
prepended_index: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
canonical_criteria.push(Criterion::Words);
|
||||||
|
Some(None)
|
||||||
|
}
|
||||||
|
(_, words) => words,
|
||||||
|
};
|
||||||
|
if let Some(previous_index) = *rule {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
*rule = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum RankingRuleKind {
|
||||||
|
Relevancy,
|
||||||
|
AscendingSort,
|
||||||
|
DescendingSort,
|
||||||
|
AscendingGeoSort,
|
||||||
|
DescendingGeoSort,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct RankingRule {
|
||||||
|
source: RankingRuleSource,
|
||||||
|
kind: RankingRuleKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub enum RankingRuleSource {
|
||||||
|
Criterion(usize),
|
||||||
|
CoalescedCriteria(usize, usize),
|
||||||
|
Sort { criterion_index: usize, sort_index: usize },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RankingRuleSource {
|
||||||
|
fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String {
|
||||||
|
match self {
|
||||||
|
RankingRuleSource::Criterion(criterion_index) => criteria
|
||||||
|
.get(*criterion_index)
|
||||||
|
.map(|c| c.to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".into()),
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||||
|
let rules: Vec<_> = criteria
|
||||||
|
.get(*begin..=*end)
|
||||||
|
.iter()
|
||||||
|
.flat_map(|c| c.iter())
|
||||||
|
.map(|c| c.to_string())
|
||||||
|
.collect();
|
||||||
|
rules.join(", ")
|
||||||
|
}
|
||||||
|
RankingRuleSource::Sort { criterion_index: _, sort_index } => {
|
||||||
|
match sort.as_deref().and_then(|sort| sort.get(*sort_index)) {
|
||||||
|
Some(sort) => match sort {
|
||||||
|
AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"),
|
||||||
|
AscDesc::Desc(Member::Field(field_name)) => {
|
||||||
|
format!("{field_name}:desc")
|
||||||
|
}
|
||||||
|
AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(),
|
||||||
|
AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(),
|
||||||
|
},
|
||||||
|
None => "unknown".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rule_position(&self) -> String {
|
||||||
|
match self {
|
||||||
|
RankingRuleSource::Criterion(criterion_index) => {
|
||||||
|
format!("#{criterion_index} in ranking rules")
|
||||||
|
}
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||||
|
format!("#{begin} to #{end} in ranking rules")
|
||||||
|
}
|
||||||
|
RankingRuleSource::Sort { criterion_index, sort_index } => format!(
|
||||||
|
"#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RankingRule {
|
||||||
|
fn from_criterion<'a>(
|
||||||
|
criterion_index: usize,
|
||||||
|
criterion: &'a Criterion,
|
||||||
|
sort: &'a Option<Vec<AscDesc>>,
|
||||||
|
) -> impl Iterator<Item = Self> + 'a {
|
||||||
|
let kind = match criterion {
|
||||||
|
Criterion::Words
|
||||||
|
| Criterion::Typo
|
||||||
|
| Criterion::Proximity
|
||||||
|
| Criterion::Attribute
|
||||||
|
| Criterion::Exactness => RankingRuleKind::Relevancy,
|
||||||
|
Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort,
|
||||||
|
|
||||||
|
Criterion::Asc(_) => RankingRuleKind::AscendingSort,
|
||||||
|
Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort,
|
||||||
|
|
||||||
|
Criterion::Desc(_) => RankingRuleKind::DescendingSort,
|
||||||
|
Criterion::Sort => {
|
||||||
|
return either::Right(sort.iter().flatten().enumerate().map(
|
||||||
|
move |(rule_index, asc_desc)| {
|
||||||
|
Self::from_asc_desc(asc_desc, criterion_index, rule_index)
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
either::Left(std::iter::once(Self {
|
||||||
|
source: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
kind,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self {
|
||||||
|
let kind = match asc_desc {
|
||||||
|
AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort,
|
||||||
|
AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort,
|
||||||
|
AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort,
|
||||||
|
AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort,
|
||||||
|
};
|
||||||
|
Self {
|
||||||
|
source: RankingRuleSource::Sort {
|
||||||
|
criterion_index: sort_index,
|
||||||
|
sort_index: rule_index_in_sort,
|
||||||
|
},
|
||||||
|
kind,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_string(
|
||||||
|
&self,
|
||||||
|
canonical_criteria: &[Criterion],
|
||||||
|
canonical_sort: &Option<Vec<AscDesc>>,
|
||||||
|
query_index: usize,
|
||||||
|
index_uid: &str,
|
||||||
|
) -> String {
|
||||||
|
let kind = match self.kind {
|
||||||
|
RankingRuleKind::Relevancy => "relevancy",
|
||||||
|
RankingRuleKind::AscendingSort => "ascending sort",
|
||||||
|
RankingRuleKind::DescendingSort => "descending sort",
|
||||||
|
RankingRuleKind::AscendingGeoSort => "ascending geo sort",
|
||||||
|
RankingRuleKind::DescendingGeoSort => "descending geo sort",
|
||||||
|
};
|
||||||
|
let rules = self.fetch_from_source(canonical_criteria, canonical_sort);
|
||||||
|
|
||||||
|
let source = match self.source {
|
||||||
|
RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"),
|
||||||
|
RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
|
||||||
|
};
|
||||||
|
|
||||||
|
format!("{source}: {kind} {rules}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fetch_from_source(
|
||||||
|
&self,
|
||||||
|
canonical_criteria: &[Criterion],
|
||||||
|
canonical_sort: &Option<Vec<AscDesc>>,
|
||||||
|
) -> String {
|
||||||
|
let rule_name = match self.source {
|
||||||
|
RankingRuleSource::Criterion(index) => {
|
||||||
|
canonical_criteria.get(index).map(|criterion| criterion.to_string())
|
||||||
|
}
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||||
|
let rules: Vec<String> = canonical_criteria
|
||||||
|
.get(begin..=end)
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|criteria| criteria.iter())
|
||||||
|
.map(|criterion| criterion.to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
(!rules.is_empty()).then_some(rules.join(", "))
|
||||||
|
}
|
||||||
|
RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort
|
||||||
|
.as_deref()
|
||||||
|
.and_then(|canonical_sort| canonical_sort.get(sort_index))
|
||||||
|
.and_then(|asc_desc: &AscDesc| match asc_desc {
|
||||||
|
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
|
||||||
|
Some(format!("on field `{s}`"))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
let rule_name = rule_name.unwrap_or_else(|| "default".into());
|
||||||
|
|
||||||
|
format!("rule(s) {rule_name}")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
|
|||||||
});
|
});
|
||||||
|
|
||||||
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
||||||
json!({"message": "The provided API key is invalid.",
|
json!({"message": null,
|
||||||
"code": "invalid_api_key",
|
"code": "invalid_api_key",
|
||||||
"type": "auth",
|
"type": "auth",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
@@ -119,7 +119,8 @@ async fn error_access_expired_key() {
|
|||||||
thread::sleep(time::Duration::new(1, 0));
|
thread::sleep(time::Duration::new(1, 0));
|
||||||
|
|
||||||
for (method, route) in AUTHORIZATIONS.keys() {
|
for (method, route) in AUTHORIZATIONS.keys() {
|
||||||
let (response, code) = server.dummy_request(method, route).await;
|
let (mut response, code) = server.dummy_request(method, route).await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
|
||||||
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
||||||
assert_eq!(403, code, "{:?}", &response);
|
assert_eq!(403, code, "{:?}", &response);
|
||||||
@@ -149,7 +150,8 @@ async fn error_access_unauthorized_index() {
|
|||||||
// filter `products` index routes
|
// filter `products` index routes
|
||||||
.filter(|(_, route)| route.starts_with("/indexes/products"))
|
.filter(|(_, route)| route.starts_with("/indexes/products"))
|
||||||
{
|
{
|
||||||
let (response, code) = server.dummy_request(method, route).await;
|
let (mut response, code) = server.dummy_request(method, route).await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
|
||||||
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
||||||
assert_eq!(403, code, "{:?}", &response);
|
assert_eq!(403, code, "{:?}", &response);
|
||||||
@@ -176,7 +178,8 @@ async fn error_access_unauthorized_action() {
|
|||||||
|
|
||||||
let key = response["key"].as_str().unwrap();
|
let key = response["key"].as_str().unwrap();
|
||||||
server.use_api_key(key);
|
server.use_api_key(key);
|
||||||
let (response, code) = server.dummy_request(method, route).await;
|
let (mut response, code) = server.dummy_request(method, route).await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
|
||||||
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
||||||
assert_eq!(403, code, "{:?}", &response);
|
assert_eq!(403, code, "{:?}", &response);
|
||||||
@@ -280,7 +283,7 @@ async fn access_authorized_no_index_restriction() {
|
|||||||
route,
|
route,
|
||||||
action
|
action
|
||||||
);
|
);
|
||||||
assert_ne!(code, 403);
|
assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
|
use actix_web::http::StatusCode;
|
||||||
|
use actix_web::test;
|
||||||
|
use jsonwebtoken::{EncodingKey, Header};
|
||||||
use meili_snap::*;
|
use meili_snap::*;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::common::Server;
|
use crate::common::{Server, Value};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@@ -436,3 +439,262 @@ async fn patch_api_keys_unknown_field() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn send_request_with_custom_auth(
|
||||||
|
app: impl actix_web::dev::Service<
|
||||||
|
actix_http::Request,
|
||||||
|
Response = actix_web::dev::ServiceResponse<impl actix_web::body::MessageBody>,
|
||||||
|
Error = actix_web::Error,
|
||||||
|
>,
|
||||||
|
url: &str,
|
||||||
|
auth: &str,
|
||||||
|
) -> (Value, StatusCode) {
|
||||||
|
let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request();
|
||||||
|
let res = test::call_service(&app, req).await;
|
||||||
|
let status_code = res.status();
|
||||||
|
let body = test::read_body(res).await;
|
||||||
|
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||||
|
|
||||||
|
(response, status_code)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn invalid_auth_format() {
|
||||||
|
let server = Server::new_auth().await;
|
||||||
|
let app = server.init_web_app().await;
|
||||||
|
|
||||||
|
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
|
||||||
|
let res = test::call_service(&app, req).await;
|
||||||
|
let status_code = res.status();
|
||||||
|
let body = test::read_body(res).await;
|
||||||
|
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||||
|
snapshot!(status_code, @"401 Unauthorized");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||||
|
"code": "missing_authorization_header",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
|
||||||
|
let res = test::call_service(&app, req).await;
|
||||||
|
let status_code = res.status();
|
||||||
|
let body = test::read_body(res).await;
|
||||||
|
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||||
|
snapshot!(status_code, @"401 Unauthorized");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||||
|
"code": "missing_authorization_header",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The provided API key is invalid.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn invalid_api_key() {
|
||||||
|
let server = Server::new_auth().await;
|
||||||
|
let app = server.init_web_app().await;
|
||||||
|
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The provided API key is invalid.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let uuid = Uuid::nil();
|
||||||
|
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
|
||||||
|
let req = test::TestRequest::post()
|
||||||
|
.uri("/keys")
|
||||||
|
.insert_header(("Authorization", "Bearer MASTER_KEY"))
|
||||||
|
.set_json(&key)
|
||||||
|
.to_request();
|
||||||
|
let res = test::call_service(&app, req).await;
|
||||||
|
let body = test::read_body(res).await;
|
||||||
|
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||||
|
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
|
||||||
|
{
|
||||||
|
"name": null,
|
||||||
|
"description": null,
|
||||||
|
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
|
||||||
|
"uid": "00000000-0000-0000-0000-000000000000",
|
||||||
|
"actions": [
|
||||||
|
"search"
|
||||||
|
],
|
||||||
|
"indexes": [
|
||||||
|
"dog"
|
||||||
|
],
|
||||||
|
"expiresAt": null,
|
||||||
|
"createdAt": "[date]",
|
||||||
|
"updatedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let key = response["key"].as_str().unwrap();
|
||||||
|
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}"))
|
||||||
|
.await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn invalid_tenant_token() {
|
||||||
|
let server = Server::new_auth().await;
|
||||||
|
let app = server.init_web_app().await;
|
||||||
|
|
||||||
|
// The tenant token won't be recognized at all if we're not on a search route
|
||||||
|
let claims = json!({ "tamo": "kefir" });
|
||||||
|
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||||
|
.unwrap();
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}"))
|
||||||
|
.await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The provided API key is invalid.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let claims = json!({ "tamo": "kefir" });
|
||||||
|
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||||
|
.unwrap();
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// The error messages are not ideal but that's expected since we cannot _yet_ use deserr
|
||||||
|
let claims = json!({ "searchRules": "kefir" });
|
||||||
|
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||||
|
.unwrap();
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let uuid = Uuid::nil();
|
||||||
|
let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() });
|
||||||
|
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||||
|
.unwrap();
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "Could not decode tenant token, InvalidSignature.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// ~~ For the next tests we first need a valid API key
|
||||||
|
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
|
||||||
|
let req = test::TestRequest::post()
|
||||||
|
.uri("/keys")
|
||||||
|
.insert_header(("Authorization", "Bearer MASTER_KEY"))
|
||||||
|
.set_json(&key)
|
||||||
|
.to_request();
|
||||||
|
let res = test::call_service(&app, req).await;
|
||||||
|
let body = test::read_body(res).await;
|
||||||
|
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||||
|
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
|
||||||
|
{
|
||||||
|
"name": null,
|
||||||
|
"description": null,
|
||||||
|
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
|
||||||
|
"uid": "00000000-0000-0000-0000-000000000000",
|
||||||
|
"actions": [
|
||||||
|
"search"
|
||||||
|
],
|
||||||
|
"indexes": [
|
||||||
|
"dog"
|
||||||
|
],
|
||||||
|
"expiresAt": null,
|
||||||
|
"createdAt": "[date]",
|
||||||
|
"updatedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let key = response["key"].as_str().unwrap();
|
||||||
|
|
||||||
|
let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() });
|
||||||
|
let jwt = jsonwebtoken::encode(
|
||||||
|
&Header::default(),
|
||||||
|
&claims,
|
||||||
|
&EncodingKey::from_secret(key.as_bytes()),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
// Try to access an index that is not authorized by the tenant token
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt
|
||||||
|
let (response, status_code) =
|
||||||
|
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}"))
|
||||||
|
.await;
|
||||||
|
snapshot!(status_code, @"403 Forbidden");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "The API key used to generate this tenant token cannot acces the index `doggo`.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|||||||
@@ -53,7 +53,8 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
});
|
});
|
||||||
|
|
||||||
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
||||||
json!({"message": "The provided API key is invalid.",
|
json!({
|
||||||
|
"message": null,
|
||||||
"code": "invalid_api_key",
|
"code": "invalid_api_key",
|
||||||
"type": "auth",
|
"type": "auth",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
@@ -191,7 +192,9 @@ macro_rules! compute_forbidden_search {
|
|||||||
server.use_api_key(&web_token);
|
server.use_api_key(&web_token);
|
||||||
let index = server.index("sales");
|
let index = server.index("sales");
|
||||||
index
|
index
|
||||||
.search(json!({}), |response, code| {
|
.search(json!({}), |mut response, code| {
|
||||||
|
// We don't assert anything on the message since it may change between cases
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response,
|
response,
|
||||||
INVALID_RESPONSE.clone(),
|
INVALID_RESPONSE.clone(),
|
||||||
@@ -495,7 +498,8 @@ async fn error_access_forbidden_routes() {
|
|||||||
|
|
||||||
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
||||||
if !actions.contains("search") {
|
if !actions.contains("search") {
|
||||||
let (response, code) = server.dummy_request(method, route).await;
|
let (mut response, code) = server.dummy_request(method, route).await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_eq!(response, INVALID_RESPONSE.clone());
|
assert_eq!(response, INVALID_RESPONSE.clone());
|
||||||
assert_eq!(code, 403);
|
assert_eq!(code, 403);
|
||||||
}
|
}
|
||||||
@@ -529,14 +533,16 @@ async fn error_access_expired_parent_key() {
|
|||||||
server.use_api_key(&web_token);
|
server.use_api_key(&web_token);
|
||||||
|
|
||||||
// test search request while parent_key is not expired
|
// test search request while parent_key is not expired
|
||||||
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_ne!(response, INVALID_RESPONSE.clone());
|
assert_ne!(response, INVALID_RESPONSE.clone());
|
||||||
assert_ne!(code, 403);
|
assert_ne!(code, 403);
|
||||||
|
|
||||||
// wait until the key is expired.
|
// wait until the key is expired.
|
||||||
thread::sleep(time::Duration::new(1, 0));
|
thread::sleep(time::Duration::new(1, 0));
|
||||||
|
|
||||||
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_eq!(response, INVALID_RESPONSE.clone());
|
assert_eq!(response, INVALID_RESPONSE.clone());
|
||||||
assert_eq!(code, 403);
|
assert_eq!(code, 403);
|
||||||
}
|
}
|
||||||
@@ -585,7 +591,8 @@ async fn error_access_modified_token() {
|
|||||||
.join(".");
|
.join(".");
|
||||||
|
|
||||||
server.use_api_key(&altered_token);
|
server.use_api_key(&altered_token);
|
||||||
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_eq!(response, INVALID_RESPONSE.clone());
|
assert_eq!(response, INVALID_RESPONSE.clone());
|
||||||
assert_eq!(code, 403);
|
assert_eq!(code, 403);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -109,9 +109,11 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
|
|
||||||
fn invalid_response(query_index: Option<usize>) -> Value {
|
fn invalid_response(query_index: Option<usize>) -> Value {
|
||||||
let message = if let Some(query_index) = query_index {
|
let message = if let Some(query_index) = query_index {
|
||||||
format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")
|
json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid."))
|
||||||
} else {
|
} else {
|
||||||
"The provided API key is invalid.".to_string()
|
// if it's anything else we simply return null and will tests all the
|
||||||
|
// error messages somewhere else
|
||||||
|
json!(null)
|
||||||
};
|
};
|
||||||
json!({"message": message,
|
json!({"message": message,
|
||||||
"code": "invalid_api_key",
|
"code": "invalid_api_key",
|
||||||
@@ -308,6 +310,23 @@ macro_rules! compute_authorized_single_search {
|
|||||||
tenant_token,
|
tenant_token,
|
||||||
key_content
|
key_content
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// federated
|
||||||
|
let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales", "filter": $filter}]})).await;
|
||||||
|
assert_eq!(
|
||||||
|
200, code,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
// same count as the search is federated over a single query
|
||||||
|
$expected_count,
|
||||||
|
response["hits"].as_array().unwrap().len(),
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -373,6 +392,25 @@ macro_rules! compute_authorized_multiple_search {
|
|||||||
tenant_token,
|
tenant_token,
|
||||||
key_content
|
key_content
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [
|
||||||
|
{"indexUid": "sales", "filter": $filter1},
|
||||||
|
{"indexUid": "products", "filter": $filter2},
|
||||||
|
]})).await;
|
||||||
|
assert_eq!(
|
||||||
|
code, 200,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
response["hits"].as_array().unwrap().len(),
|
||||||
|
// sum of counts as the search is federated across to queries in different indexes
|
||||||
|
$expected_count1 + $expected_count2,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -414,7 +452,28 @@ macro_rules! compute_forbidden_single_search {
|
|||||||
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
|
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
|
||||||
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
|
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
|
||||||
server.use_api_key(&web_token);
|
server.use_api_key(&web_token);
|
||||||
let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
|
let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
|
||||||
|
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
response,
|
||||||
|
invalid_response(failed_query_index),
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
code, 403,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
|
|
||||||
|
let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales"}]})).await;
|
||||||
|
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
}
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response,
|
response,
|
||||||
invalid_response(failed_query_index),
|
invalid_response(failed_query_index),
|
||||||
@@ -469,10 +528,34 @@ macro_rules! compute_forbidden_multiple_search {
|
|||||||
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
|
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
|
||||||
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
|
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
|
||||||
server.use_api_key(&web_token);
|
server.use_api_key(&web_token);
|
||||||
let (response, code) = server.multi_search(json!({"queries" : [
|
let (mut response, code) = server.multi_search(json!({"queries" : [
|
||||||
{"indexUid": "sales"},
|
{"indexUid": "sales"},
|
||||||
{"indexUid": "products"},
|
{"indexUid": "products"},
|
||||||
]})).await;
|
]})).await;
|
||||||
|
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
response,
|
||||||
|
invalid_response(failed_query_index),
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
code, 403,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
|
|
||||||
|
let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [
|
||||||
|
{"indexUid": "sales"},
|
||||||
|
{"indexUid": "products"},
|
||||||
|
]})).await;
|
||||||
|
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
}
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response,
|
response,
|
||||||
invalid_response(failed_query_index),
|
invalid_response(failed_query_index),
|
||||||
@@ -1073,18 +1156,20 @@ async fn error_access_expired_parent_key() {
|
|||||||
server.use_api_key(&web_token);
|
server.use_api_key(&web_token);
|
||||||
|
|
||||||
// test search request while parent_key is not expired
|
// test search request while parent_key is not expired
|
||||||
let (response, code) = server
|
let (mut response, code) = server
|
||||||
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
|
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
|
||||||
.await;
|
.await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_ne!(response, invalid_response(None));
|
assert_ne!(response, invalid_response(None));
|
||||||
assert_ne!(code, 403);
|
assert_ne!(code, 403);
|
||||||
|
|
||||||
// wait until the key is expired.
|
// wait until the key is expired.
|
||||||
thread::sleep(time::Duration::new(1, 0));
|
thread::sleep(time::Duration::new(1, 0));
|
||||||
|
|
||||||
let (response, code) = server
|
let (mut response, code) = server
|
||||||
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
|
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
|
||||||
.await;
|
.await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_eq!(response, invalid_response(None));
|
assert_eq!(response, invalid_response(None));
|
||||||
assert_eq!(code, 403);
|
assert_eq!(code, 403);
|
||||||
}
|
}
|
||||||
@@ -1134,8 +1219,9 @@ async fn error_access_modified_token() {
|
|||||||
.join(".");
|
.join(".");
|
||||||
|
|
||||||
server.use_api_key(&altered_token);
|
server.use_api_key(&altered_token);
|
||||||
let (response, code) =
|
let (mut response, code) =
|
||||||
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
|
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
assert_eq!(response, invalid_response(None));
|
assert_eq!(response, invalid_response(None));
|
||||||
assert_eq!(code, 403);
|
assert_eq!(code, 403);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -182,14 +182,10 @@ impl Index<'_> {
|
|||||||
self.service.get(url).await
|
self.service.get(url).await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_document(
|
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
|
||||||
&self,
|
|
||||||
id: u64,
|
|
||||||
options: Option<GetDocumentOptions>,
|
|
||||||
) -> (Value, StatusCode) {
|
|
||||||
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
|
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
|
||||||
if let Some(fields) = options.and_then(|o| o.fields) {
|
if let Some(options) = options {
|
||||||
let _ = write!(url, "?fields={}", fields.join(","));
|
write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap();
|
||||||
}
|
}
|
||||||
self.service.get(url).await
|
self.service.get(url).await
|
||||||
}
|
}
|
||||||
@@ -205,18 +201,11 @@ impl Index<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
|
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
|
||||||
let mut url = format!("/indexes/{}/documents?", urlencode(self.uid.as_ref()));
|
let url = format!(
|
||||||
if let Some(limit) = options.limit {
|
"/indexes/{}/documents{}",
|
||||||
let _ = write!(url, "limit={}&", limit);
|
urlencode(self.uid.as_ref()),
|
||||||
}
|
yaup::to_string(&options).unwrap()
|
||||||
|
);
|
||||||
if let Some(offset) = options.offset {
|
|
||||||
let _ = write!(url, "offset={}&", offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(attributes_to_retrieve) = options.attributes_to_retrieve {
|
|
||||||
let _ = write!(url, "fields={}&", attributes_to_retrieve.join(","));
|
|
||||||
}
|
|
||||||
|
|
||||||
self.service.get(url).await
|
self.service.get(url).await
|
||||||
}
|
}
|
||||||
@@ -376,7 +365,7 @@ impl Index<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
|
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
|
||||||
let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query);
|
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
|
||||||
self.service.get(url).await
|
self.service.get(url).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -413,7 +402,7 @@ impl Index<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
|
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
|
||||||
let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
|
let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query);
|
||||||
self.service.get(url).await
|
self.service.get(url).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -435,13 +424,14 @@ impl Index<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct GetDocumentOptions {
|
#[derive(Debug, Default, serde::Serialize)]
|
||||||
pub fields: Option<Vec<&'static str>>,
|
#[serde(rename_all = "camelCase")]
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
|
||||||
pub struct GetAllDocumentsOptions {
|
pub struct GetAllDocumentsOptions {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub limit: Option<usize>,
|
pub limit: Option<usize>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub offset: Option<usize>,
|
pub offset: Option<usize>,
|
||||||
pub attributes_to_retrieve: Option<Vec<&'static str>>,
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub fields: Option<Vec<&'static str>>,
|
||||||
|
pub retrieve_vectors: bool,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ pub mod service;
|
|||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
pub use index::GetAllDocumentsOptions;
|
||||||
use meili_snap::json_string;
|
use meili_snap::json_string;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
@@ -26,6 +26,15 @@ impl Value {
|
|||||||
panic!("Didn't find any task id in: {self}");
|
panic!("Didn't find any task id in: {self}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Panic if the json doesn't contain the `status` field set to "succeeded"
|
||||||
|
#[track_caller]
|
||||||
|
pub fn succeeded(&self) -> &Self {
|
||||||
|
if self["status"] != serde_json::Value::String(String::from("succeeded")) {
|
||||||
|
panic!("Called succeeded on {}", serde_json::to_string_pretty(&self.0).unwrap());
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<serde_json::Value> for Value {
|
impl From<serde_json::Value> for Value {
|
||||||
@@ -42,6 +51,12 @@ impl std::ops::Deref for Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::ops::DerefMut for Value {
|
||||||
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
|
&mut self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl PartialEq<serde_json::Value> for Value {
|
impl PartialEq<serde_json::Value> for Value {
|
||||||
fn eq(&self, other: &serde_json::Value) -> bool {
|
fn eq(&self, other: &serde_json::Value) -> bool {
|
||||||
&self.0 == other
|
&self.0 == other
|
||||||
@@ -65,7 +80,7 @@ impl Display for Value {
|
|||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{}",
|
"{}",
|
||||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
|
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use std::time::Duration;
|
|||||||
use actix_http::body::MessageBody;
|
use actix_http::body::MessageBody;
|
||||||
use actix_web::dev::ServiceResponse;
|
use actix_web::dev::ServiceResponse;
|
||||||
use actix_web::http::StatusCode;
|
use actix_web::http::StatusCode;
|
||||||
use byte_unit::{Byte, ByteUnit};
|
use byte_unit::{Byte, Unit};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
|
use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
|
||||||
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
|
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
|
||||||
@@ -231,9 +231,9 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
env: "development".to_owned(),
|
env: "development".to_owned(),
|
||||||
#[cfg(feature = "analytics")]
|
#[cfg(feature = "analytics")]
|
||||||
no_analytics: true,
|
no_analytics: true,
|
||||||
max_index_size: Byte::from_unit(100.0, ByteUnit::MiB).unwrap(),
|
max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
|
||||||
max_task_db_size: Byte::from_unit(1.0, ByteUnit::GiB).unwrap(),
|
max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),
|
||||||
http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
|
http_payload_size_limit: Byte::from_u64_with_unit(10, Unit::MiB).unwrap(),
|
||||||
snapshot_dir: ".".into(),
|
snapshot_dir: ".".into(),
|
||||||
indexer_options: IndexerOpts {
|
indexer_options: IndexerOpts {
|
||||||
// memory has to be unlimited because several meilisearch are running in test context.
|
// memory has to be unlimited because several meilisearch are running in test context.
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user