mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-19 04:50:37 +00:00
Compare commits
186 Commits
update-yau
...
binary-qua
Author | SHA1 | Date | |
---|---|---|---|
42bfc67871 | |||
05cc2d1fac | |||
22b9c277d0 | |||
16bde973aa | |||
13d1d78a2d | |||
b2b7a633a6 | |||
7be109cafe | |||
6ebefd1067 | |||
d25ae36e22 | |||
b64b4ab6ca | |||
427861b323 | |||
d29cb75061 | |||
128e6c7502 | |||
3129f96603 | |||
c701d89fdc | |||
3d9befd64f | |||
ee14d5196c | |||
d96372b9c4 | |||
ea67816a21 | |||
c885fcebcc | |||
b6e1a1f2f5 | |||
277f4883f6 | |||
015d90a962 | |||
0df84bbba7 | |||
e53de15b8e | |||
8c4921b9dd | |||
f6a00f4a90 | |||
ce08dc509b | |||
1daaed163a | |||
809e742253 | |||
decdfe03bc | |||
aae5c324d7 | |||
a108d8f6f3 | |||
34cf576339 | |||
eb292a7a62 | |||
e28332a904 | |||
a1dcde6b9a | |||
544e98ca99 | |||
1e4699b82c | |||
2c09c324f7 | |||
3d6b61d8d2 | |||
1374b661d1 | |||
7e3c306c54 | |||
2608a596a0 | |||
e16edb2c35 | |||
5c758438fc | |||
ab6cac2321 | |||
6fb36ed30e | |||
dcdc83946f | |||
3c4c46377b | |||
7da21bb601 | |||
13161fd7d0 | |||
b81e2951a9 | |||
d75e0098c7 | |||
27496354e2 | |||
2e0ff56f3f | |||
a74fb87d1e | |||
558b66e535 | |||
cade18bd47 | |||
298c7b0c93 | |||
606e108420 | |||
7be17b7e4c | |||
1693332cab | |||
ddd564665b | |||
2a38f5c757 | |||
133d33d72c | |||
fb683fe88b | |||
4ae11bfd31 | |||
9736e16a88 | |||
6fa4da8ae7 | |||
19d7cdc20d | |||
c229200820 | |||
bad28cc9e2 | |||
534f696b29 | |||
a04041c8f2 | |||
b347b66619 | |||
e580d6b98f | |||
8ba65e333b | |||
43875e6758 | |||
d7844a6e45 | |||
e9bf4c43a4 | |||
a8a0854421 | |||
0a8f50695e | |||
09d9b63e1c | |||
b9b938c902 | |||
6bf07d969e | |||
e35ef31738 | |||
3f212a8202 | |||
bc547dad6f | |||
3bc8f81abc | |||
a89eea233b | |||
34fabed214 | |||
fca9fe39b3 | |||
f5cf01e7d1 | |||
d1dd7e5d09 | |||
d18c1f77d7 | |||
d0b05ae691 | |||
e9bf4eb100 | |||
b368105272 | |||
e0eff08095 | |||
304a9df52d | |||
39f60abd7d | |||
1991bd03da | |||
ee39309aae | |||
0d31be1494 | |||
3493093c4f | |||
7cef2299cf | |||
a838f39fce | |||
600e97d9dc | |||
d1962b2b0f | |||
8b450b84f8 | |||
7add7d053c | |||
7559dfc814 | |||
6c6c4732a1 | |||
0502b17501 | |||
3976fe660e | |||
50f8218a5d | |||
19585f1a4f | |||
8ec6e175e5 | |||
57d066595b | |||
75b2e02cd2 | |||
40f05fe156 | |||
734d1c53ad | |||
52d0d35b39 | |||
5432776132 | |||
66470b27e6 | |||
0a9bd398c7 | |||
7967e93c16 | |||
a6f3a01c6a | |||
4ca4a3f954 | |||
e4a69c5ac3 | |||
ff2e498267 | |||
531e3d7d6a | |||
63dded3961 | |||
2cdcb703d9 | |||
6607875f49 | |||
ea61e5cbec | |||
31a793d226 | |||
d85ab23b82 | |||
b7349910d9 | |||
49fa41ce65 | |||
400cf3eb92 | |||
376b3a19a7 | |||
d92c173fdc | |||
b867829ef1 | |||
6b29676e7e | |||
caad40964a | |||
cc5dca8321 | |||
5d50850e12 | |||
a73ccc78a6 | |||
9eb6f522ea | |||
04f6523f3c | |||
30d66abf8d | |||
84e498299b | |||
7a84697570 | |||
4148fbbe85 | |||
cb765ad249 | |||
2e50c6ec81 | |||
40b2345394 | |||
30293883e0 | |||
b833be46b9 | |||
0a4118329e | |||
261e92d7e6 | |||
5cd08979b1 | |||
2af7e4dbe9 | |||
a998b881f6 | |||
b81953a65d | |||
091bb157f1 | |||
1b639ce44b | |||
87cf8a3c94 | |||
0f578348f1 | |||
fad4675abe | |||
1ab03c4ede | |||
0c6e4b2f00 | |||
42b3f52ef9 | |||
98e062a714 | |||
8412665957 | |||
fc584f1db3 | |||
2b6db6541e | |||
c2fb7afe59 | |||
41976b82b1 | |||
c36410fcbf | |||
7ce2691374 | |||
4f03b0cf5b | |||
c26db7878c | |||
aac1d769a7 |
4
.github/workflows/bench-manual.yml
vendored
4
.github/workflows/bench-manual.yml
vendored
@ -18,11 +18,9 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
|
||||
run: |
|
||||
|
4
.github/workflows/bench-pr.yml
vendored
4
.github/workflows/bench-pr.yml
vendored
@ -35,11 +35,9 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
||||
run: |
|
||||
|
4
.github/workflows/bench-push-indexing.yml
vendored
4
.github/workflows/bench-push-indexing.yml
vendored
@ -12,11 +12,9 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
||||
|
4
.github/workflows/benchmarks-manual.yml
vendored
4
.github/workflows/benchmarks-manual.yml
vendored
@ -18,11 +18,9 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
|
4
.github/workflows/benchmarks-pr.yml
vendored
4
.github/workflows/benchmarks-pr.yml
vendored
@ -13,11 +13,9 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Check for Command
|
||||
id: command
|
||||
|
@ -16,11 +16,9 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
|
@ -15,11 +15,9 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
|
@ -15,11 +15,9 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
|
@ -15,11 +15,9 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
|
7
.github/workflows/flaky-tests.yml
vendored
7
.github/workflows/flaky-tests.yml
vendored
@ -1,4 +1,6 @@
|
||||
name: Look for flaky tests
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
@ -16,10 +18,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
|
7
.github/workflows/fuzzer-indexing.yml
vendored
7
.github/workflows/fuzzer-indexing.yml
vendored
@ -1,5 +1,6 @@
|
||||
name: Run the indexing fuzzer
|
||||
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
@ -12,11 +13,9 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run the fuzzer
|
||||
|
7
.github/workflows/publish-apt-brew-pkg.yml
vendored
7
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -15,6 +15,8 @@ jobs:
|
||||
|
||||
debian:
|
||||
name: Publish debian packagge
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
@ -25,10 +27,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
|
22
.github/workflows/publish-binaries.yml
vendored
22
.github/workflows/publish-binaries.yml
vendored
@ -35,6 +35,8 @@ jobs:
|
||||
publish-linux:
|
||||
name: Publish binary for Linux
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27
|
||||
@ -45,10 +47,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@ -78,10 +77,7 @@ jobs:
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@ -107,12 +103,10 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Installing Rust toolchain
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
override: true
|
||||
- name: Cargo build
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -132,6 +126,8 @@ jobs:
|
||||
name: Publish binary for aarch64
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27
|
||||
image: ubuntu:18.04
|
||||
@ -154,12 +150,10 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
override: true
|
||||
- name: Configure target aarch64 GNU
|
||||
## Environment variable is not passed using env:
|
||||
## LD gold won't work with MUSL
|
||||
|
3
.github/workflows/publish-docker-images.yml
vendored
3
.github/workflows/publish-docker-images.yml
vendored
@ -80,10 +80,11 @@ jobs:
|
||||
type=ref,event=tag
|
||||
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
|
||||
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
43
.github/workflows/test-suite.yml
vendored
43
.github/workflows/test-suite.yml
vendored
@ -21,6 +21,8 @@ jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-18.04
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
@ -31,10 +33,7 @@ jobs:
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
uses: helix-editor/rust-toolchain@v1
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
@ -59,10 +58,7 @@ jobs:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -77,6 +73,8 @@ jobs:
|
||||
test-all-features:
|
||||
name: Tests almost all features
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
@ -87,10 +85,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
@ -100,6 +95,8 @@ jobs:
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:18.04
|
||||
@ -110,13 +107,10 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
echo "lindera has been found in the sources and it shouldn't"
|
||||
exit 1
|
||||
fi
|
||||
@ -127,6 +121,8 @@ jobs:
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
@ -137,10 +133,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run tests in debug
|
||||
@ -154,11 +147,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.75.0
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
@ -173,10 +164,10 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
toolchain: nightly-2024-06-25
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
|
@ -18,11 +18,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Install sd
|
||||
run: cargo install sd
|
||||
- name: Update Cargo.toml file
|
||||
|
@ -109,6 +109,12 @@ They are JSON files with the following structure (comments are not actually supp
|
||||
"run_count": 3,
|
||||
// List of arguments to add to the Meilisearch command line.
|
||||
"extra_cli_args": ["--max-indexing-threads=1"],
|
||||
// An expression that can be parsed as a comma-separated list of targets and levels
|
||||
// as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples).
|
||||
// The expression is used to filter the spans that are measured for profiling purposes.
|
||||
// Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is
|
||||
// "search::=trace"
|
||||
"target": "indexing::=trace",
|
||||
// List of named assets that can be used in the commands.
|
||||
"assets": {
|
||||
// name of the asset.
|
||||
|
342
Cargo.lock
generated
342
Cargo.lock
generated
@ -36,9 +36,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-http"
|
||||
version = "3.6.0"
|
||||
version = "3.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
|
||||
checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d"
|
||||
dependencies = [
|
||||
"actix-codec",
|
||||
"actix-rt",
|
||||
@ -46,7 +46,7 @@ dependencies = [
|
||||
"actix-tls",
|
||||
"actix-utils",
|
||||
"ahash",
|
||||
"base64 0.21.7",
|
||||
"base64 0.22.1",
|
||||
"bitflags 2.5.0",
|
||||
"brotli",
|
||||
"bytes",
|
||||
@ -85,13 +85,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-router"
|
||||
version = "0.5.1"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799"
|
||||
checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8"
|
||||
dependencies = [
|
||||
"bytestring",
|
||||
"cfg-if",
|
||||
"http 0.2.11",
|
||||
"regex",
|
||||
"regex-lite",
|
||||
"serde",
|
||||
"tracing",
|
||||
]
|
||||
@ -138,9 +140,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-tls"
|
||||
version = "3.3.0"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
|
||||
checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389"
|
||||
dependencies = [
|
||||
"actix-rt",
|
||||
"actix-service",
|
||||
@ -167,9 +169,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-web"
|
||||
version = "4.5.1"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
|
||||
checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32"
|
||||
dependencies = [
|
||||
"actix-codec",
|
||||
"actix-http",
|
||||
@ -196,7 +198,7 @@ dependencies = [
|
||||
"mime",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"regex",
|
||||
"regex-lite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
@ -220,8 +222,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-web-static-files"
|
||||
version = "3.0.5"
|
||||
source = "git+https://github.com/kilork/actix-web-static-files.git?rev=2d3b6160#2d3b6160f0de4ba061c5d76b5704f34fb677f6df"
|
||||
version = "4.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adf6d1ef6d7a60e084f9e0595e2a5234abda14e76c105ecf8e2d0e8800c41a1f"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"derive_more",
|
||||
@ -378,9 +381,27 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.3.1"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
|
||||
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"heed",
|
||||
"log",
|
||||
"memmap2 0.9.4",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.4.0"
|
||||
source = "git+https://github.com/meilisearch/arroy?branch=binary-quantization#facc8575222d3f5da5b9a94288e44896911e701f"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@ -613,9 +634,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "3.4.0"
|
||||
version = "6.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
|
||||
checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
@ -624,9 +645,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "brotli-decompressor"
|
||||
version = "2.5.1"
|
||||
version = "4.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
|
||||
checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
@ -676,9 +697,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.15.0"
|
||||
version = "1.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
|
||||
checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
@ -895,9 +916,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.8.10"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
|
||||
checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"cow-utils",
|
||||
@ -986,7 +1007,7 @@ dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
"strsim 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1277,12 +1298,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.20.3"
|
||||
version = "0.20.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
|
||||
checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1"
|
||||
dependencies = [
|
||||
"darling_core 0.20.3",
|
||||
"darling_macro 0.20.3",
|
||||
"darling_core 0.20.9",
|
||||
"darling_macro 0.20.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1295,21 +1316,21 @@ dependencies = [
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim",
|
||||
"strsim 0.10.0",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.20.3"
|
||||
version = "0.20.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
|
||||
checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim",
|
||||
"strsim 0.11.1",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
@ -1326,11 +1347,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.20.3"
|
||||
version = "0.20.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
|
||||
checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178"
|
||||
dependencies = [
|
||||
"darling_core 0.20.3",
|
||||
"darling_core 0.20.9",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
@ -1383,6 +1404,15 @@ dependencies = [
|
||||
"derive_builder_macro 0.13.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
|
||||
dependencies = [
|
||||
"derive_builder_macro 0.20.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_core"
|
||||
version = "0.12.0"
|
||||
@ -1407,6 +1437,18 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_core"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
|
||||
dependencies = [
|
||||
"darling 0.20.9",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_macro"
|
||||
version = "0.12.0"
|
||||
@ -1427,6 +1469,16 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_macro"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
|
||||
dependencies = [
|
||||
"derive_builder_core 0.20.0",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
@ -1454,7 +1506,7 @@ dependencies = [
|
||||
"serde-cs",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"strsim",
|
||||
"strsim 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1707,29 +1759,6 @@ dependencies = [
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"env_filter",
|
||||
"humantime",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
@ -1784,7 +1813,7 @@ version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5"
|
||||
dependencies = [
|
||||
"darling 0.20.3",
|
||||
"darling 0.20.9",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
@ -2262,9 +2291,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.20.1"
|
||||
version = "0.20.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
|
||||
checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"byteorder",
|
||||
@ -2379,12 +2408,6 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.27"
|
||||
@ -2450,6 +2473,7 @@ name = "index-scheduler"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arroy 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"big_s",
|
||||
"bincode",
|
||||
"crossbeam",
|
||||
@ -2460,6 +2484,7 @@ dependencies = [
|
||||
"file-store",
|
||||
"flate2",
|
||||
"insta",
|
||||
"maplit",
|
||||
"meili-snap",
|
||||
"meilisearch-auth",
|
||||
"meilisearch-types",
|
||||
@ -2778,9 +2803,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88"
|
||||
checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f"
|
||||
dependencies = [
|
||||
"lindera-analyzer",
|
||||
"lindera-core",
|
||||
@ -2791,9 +2816,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-analyzer"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7"
|
||||
checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2821,9 +2846,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d"
|
||||
checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@ -2835,29 +2860,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict-builder"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683"
|
||||
checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"yada",
|
||||
"lindera-dictionary-builder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-compress"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500"
|
||||
checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@ -2866,9 +2883,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-core"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8"
|
||||
checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2883,9 +2900,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-decompress"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e"
|
||||
checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@ -2894,9 +2911,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-dictionary"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe"
|
||||
checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2918,10 +2935,32 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-filter"
|
||||
version = "0.30.0"
|
||||
name = "lindera-dictionary-builder"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d"
|
||||
checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"derive_builder 0.20.0",
|
||||
"encoding",
|
||||
"encoding_rs",
|
||||
"encoding_rs_io",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-filter"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"csv",
|
||||
@ -2944,9 +2983,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25"
|
||||
checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@ -2958,31 +2997,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-builder"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90"
|
||||
checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding_rs",
|
||||
"encoding_rs_io",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"serde",
|
||||
"yada",
|
||||
"lindera-dictionary-builder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f"
|
||||
checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@ -2994,31 +3023,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd-builder"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20"
|
||||
checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding_rs",
|
||||
"encoding_rs_io",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"serde",
|
||||
"yada",
|
||||
"lindera-dictionary-builder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48"
|
||||
checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@ -3034,29 +3053,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic-builder"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577"
|
||||
checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"yada",
|
||||
"lindera-dictionary-builder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-tokenizer"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d"
|
||||
checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"lindera-core",
|
||||
@ -3068,9 +3079,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23"
|
||||
checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@ -3086,22 +3097,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic-builder"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008"
|
||||
checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"yada",
|
||||
"lindera-dictionary-builder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3187,9 +3190,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
|
||||
|
||||
[[package]]
|
||||
name = "lmdb-master-sys"
|
||||
version = "0.2.0"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
|
||||
checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"doxygen-rs",
|
||||
@ -3463,7 +3466,7 @@ dependencies = [
|
||||
name = "milli"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"arroy 0.4.0 (git+https://github.com/meilisearch/arroy?branch=binary-quantization)",
|
||||
"big_s",
|
||||
"bimap",
|
||||
"bincode",
|
||||
@ -4340,6 +4343,12 @@ dependencies = [
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-lite"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.2"
|
||||
@ -4388,12 +4397,6 @@ dependencies = [
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "retain_mut"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.8"
|
||||
@ -4411,13 +4414,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.2"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
|
||||
checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"retain_mut",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@ -4900,6 +4902,12 @@ version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.26.2"
|
||||
@ -5313,9 +5321,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-actix-web"
|
||||
version = "0.7.9"
|
||||
version = "0.7.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
|
||||
checksum = "4ee9e39a66d9b615644893ffc1704d2a89b5b315b7fd0228ad3182ca9a306b19"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"mutually_exclusive_features",
|
||||
|
30
README.md
30
README.md
@ -1,9 +1,6 @@
|
||||
<p align="center">
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
</a>
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo" target="_blank">
|
||||
<img src="assets/meilisearch-logo-kawaii.png">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
@ -25,7 +22,7 @@
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
|
||||
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
|
||||
[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||
@ -36,11 +33,18 @@
|
||||
</a>
|
||||
</p>
|
||||
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
||||
## 🖥 Examples
|
||||
|
||||
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
|
||||
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
|
||||
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
|
||||
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
|
||||
|
||||
See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos).
|
||||
|
||||
## ✨ Features
|
||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
|
||||
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
|
||||
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
@ -59,7 +63,7 @@ You can consult Meilisearch's documentation at [meilisearch.com/docs](https://ww
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
## 🌍 Supercharge your Meilisearch experience
|
||||
|
||||
@ -83,7 +87,7 @@ Finally, for more in-depth information, refer to our articles explaining fundame
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
@ -105,11 +109,11 @@ Thank you for your support!
|
||||
|
||||
## 👩💻 Contributing
|
||||
|
||||
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md).
|
||||
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md).
|
||||
|
||||
## 📦 Versioning
|
||||
|
||||
Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases).
|
||||
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
|
||||
|
||||
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
|
||||
|
||||
|
BIN
assets/meilisearch-logo-kawaii.png
Normal file
BIN
assets/meilisearch-logo-kawaii.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 98 KiB |
@ -780,7 +780,7 @@ expression: document
|
||||
1.3484878540039063
|
||||
]
|
||||
],
|
||||
"userProvided": false
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -779,7 +779,7 @@ expression: document
|
||||
1.04031240940094
|
||||
]
|
||||
],
|
||||
"userProvided": false
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -152,6 +152,7 @@ impl Settings<Unchecked> {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
|
@ -182,6 +182,7 @@ impl Settings<Unchecked> {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
|
@ -200,6 +200,7 @@ impl std::ops::Deref for IndexUid {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[cfg_attr(test, serde(rename_all = "camelCase"))]
|
||||
|
Binary file not shown.
@ -40,7 +40,9 @@ ureq = "2.9.7"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.4.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
@ -909,6 +909,7 @@ impl IndexScheduler {
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
@ -951,16 +952,21 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
// don't change the entry if it already exists, because it was user-provided
|
||||
vectors.entry(embedder_name).or_insert_with(|| {
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
),
|
||||
user_provided: false,
|
||||
};
|
||||
serde_json::to_value(embeddings).unwrap()
|
||||
});
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,7 @@ use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
@ -1459,33 +1460,39 @@ impl IndexScheduler {
|
||||
// TODO: consider using a type alias or a struct embedder/template
|
||||
pub fn embedders(
|
||||
&self,
|
||||
embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
|
||||
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
let res: Result<_> = embedding_configs
|
||||
.into_iter()
|
||||
.map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
|
||||
let prompt =
|
||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
return Ok((name, (embedder.clone(), prompt)));
|
||||
.map(
|
||||
|IndexEmbeddingConfig {
|
||||
name,
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
|
||||
..
|
||||
}| {
|
||||
let prompt =
|
||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
return Ok((name, (embedder.clone(), prompt)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add missing embedder
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?,
|
||||
);
|
||||
{
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
Ok((name, (embedder, prompt)))
|
||||
})
|
||||
// add missing embedder
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?,
|
||||
);
|
||||
{
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
Ok((name, (embedder, prompt)))
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
res.map(EmbeddingConfigs::new)
|
||||
}
|
||||
@ -1748,6 +1755,9 @@ mod tests {
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
ReplaceDocuments, UpdateDocuments,
|
||||
};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::IndexSwap;
|
||||
use meilisearch_types::VERSION_FILE_NAME;
|
||||
use tempfile::{NamedTempFile, TempDir};
|
||||
@ -1801,7 +1811,7 @@ mod tests {
|
||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
enable_mdb_writemap: false,
|
||||
index_growth_amount: 1000 * 1000, // 1 MB
|
||||
index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB
|
||||
index_count: 5,
|
||||
indexer_config,
|
||||
autobatching_enabled: true,
|
||||
@ -1826,6 +1836,7 @@ mod tests {
|
||||
assert_eq!(breakpoint, (Init, false));
|
||||
let index_scheduler_handle = IndexSchedulerHandle {
|
||||
_tempdir: tempdir,
|
||||
index_scheduler: index_scheduler.private_clone(),
|
||||
test_breakpoint_rcv: receiver,
|
||||
last_breakpoint: breakpoint.0,
|
||||
};
|
||||
@ -1914,6 +1925,7 @@ mod tests {
|
||||
|
||||
pub struct IndexSchedulerHandle {
|
||||
_tempdir: TempDir,
|
||||
index_scheduler: IndexScheduler,
|
||||
test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
|
||||
last_breakpoint: Breakpoint,
|
||||
}
|
||||
@ -1931,9 +1943,13 @@ mod tests {
|
||||
{
|
||||
Ok(b) => b,
|
||||
Err(RecvTimeoutError::Timeout) => {
|
||||
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
|
||||
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}")
|
||||
}
|
||||
Err(RecvTimeoutError::Disconnected) => {
|
||||
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||
panic!("The scheduler crashed.\n{state}")
|
||||
}
|
||||
Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."),
|
||||
};
|
||||
// if we've already encountered a breakpoint we're supposed to be stuck on the false
|
||||
// and we expect the same variant with the true to come now.
|
||||
@ -1952,9 +1968,13 @@ mod tests {
|
||||
{
|
||||
Ok(b) => b,
|
||||
Err(RecvTimeoutError::Timeout) => {
|
||||
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
|
||||
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}")
|
||||
}
|
||||
Err(RecvTimeoutError::Disconnected) => {
|
||||
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||
panic!("The scheduler crashed.\n{state}")
|
||||
}
|
||||
Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."),
|
||||
};
|
||||
assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite");
|
||||
|
||||
@ -1968,9 +1988,10 @@ mod tests {
|
||||
fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) {
|
||||
for breakpoint in breakpoints {
|
||||
let b = self.advance();
|
||||
let state = snapshot_index_scheduler(&self.index_scheduler);
|
||||
assert_eq!(
|
||||
b, breakpoint,
|
||||
"Was expecting the breakpoint `{:?}` but instead got `{:?}`.",
|
||||
"Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{state}",
|
||||
breakpoint, b
|
||||
);
|
||||
}
|
||||
@ -1995,6 +2016,7 @@ mod tests {
|
||||
// Wait for one successful batch.
|
||||
#[track_caller]
|
||||
fn advance_one_successful_batch(&mut self) {
|
||||
self.index_scheduler.assert_internally_consistent();
|
||||
self.advance_till([Start, BatchCreated]);
|
||||
loop {
|
||||
match self.advance() {
|
||||
@ -2003,13 +2025,17 @@ mod tests {
|
||||
InsideProcessBatch => (),
|
||||
// the batch went successfully, we can stop the loop and go on with the next states.
|
||||
ProcessBatchSucceeded => break,
|
||||
AbortedIndexation => panic!("The batch was aborted."),
|
||||
ProcessBatchFailed => panic!("The batch failed."),
|
||||
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
||||
ProcessBatchFailed => {
|
||||
while self.advance() != Start {}
|
||||
panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler))
|
||||
},
|
||||
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
||||
}
|
||||
}
|
||||
|
||||
self.advance_till([AfterProcessing]);
|
||||
self.index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
// Wait for one failed batch.
|
||||
@ -2023,8 +2049,8 @@ mod tests {
|
||||
InsideProcessBatch => (),
|
||||
// the batch went failed, we can stop the loop and go on with the next states.
|
||||
ProcessBatchFailed => break,
|
||||
ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)"),
|
||||
AbortedIndexation => panic!("The batch was aborted."),
|
||||
ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
||||
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
||||
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
||||
}
|
||||
}
|
||||
@ -3052,8 +3078,10 @@ mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let (_, embedding_config) = configs.first().unwrap();
|
||||
insta::assert_json_snapshot!(embedding_config.embedder_options);
|
||||
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
|
||||
insta::assert_snapshot!(name, @"default");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(config.embedder_options);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -4989,7 +5017,6 @@ mod tests {
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
|
||||
|
||||
@ -5000,7 +5027,7 @@ mod tests {
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
||||
|
||||
{
|
||||
@ -5017,13 +5044,17 @@ mod tests {
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let (name, fakerest_config) = configs.get(0).unwrap();
|
||||
insta::assert_json_snapshot!(name, @r###""A_fakerest""###);
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||
let fakerest_name = name.clone();
|
||||
|
||||
let (name, simple_hf_config) = configs.get(1).unwrap();
|
||||
insta::assert_json_snapshot!(name, @r###""B_small_hf""###);
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
|
||||
configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
@ -5038,25 +5069,25 @@ mod tests {
|
||||
// add one doc, specifying vectors
|
||||
|
||||
let doc = serde_json::json!(
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
&fakerest_name: {
|
||||
// this will never trigger regeneration, which is good because we can't actually generate with
|
||||
// this embedder
|
||||
"userProvided": true,
|
||||
"embeddings": beagle_embed,
|
||||
},
|
||||
&simple_hf_name: {
|
||||
// this will be regenerated on updates
|
||||
"userProvided": false,
|
||||
"embeddings": lab_embed,
|
||||
},
|
||||
"noise": [0.1, 0.2, 0.3]
|
||||
}
|
||||
}
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
&fakerest_name: {
|
||||
// this will never trigger regeneration, which is good because we can't actually generate with
|
||||
// this embedder
|
||||
"regenerate": false,
|
||||
"embeddings": beagle_embed,
|
||||
},
|
||||
&simple_hf_name: {
|
||||
// this will be regenerated on updates
|
||||
"regenerate": true,
|
||||
"embeddings": lab_embed,
|
||||
},
|
||||
"noise": [0.1, 0.2, 0.3]
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap();
|
||||
@ -5078,7 +5109,6 @@ mod tests {
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
|
||||
|
||||
@ -5091,6 +5121,19 @@ mod tests {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||
@ -5140,7 +5183,6 @@ mod tests {
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
||||
|
||||
@ -5153,11 +5195,25 @@ mod tests {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } =
|
||||
configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
// automatically changed to patou
|
||||
// automatically changed to patou because set to regenerate
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
||||
// remained beagle because set to userProvided
|
||||
// remained beagle
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
@ -5176,4 +5232,578 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_vectors_first_and_embedder_later() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "intel",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": vec![1; 384],
|
||||
"unknown embedder": vec![1, 2, 3],
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "max",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": {
|
||||
"regenerate": false,
|
||||
"embeddings": vec![2; 384],
|
||||
},
|
||||
"unknown embedder": vec![4, 5],
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "marcel",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": {
|
||||
"regenerate": true,
|
||||
"embeddings": vec![3; 384],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "sora",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": {
|
||||
"regenerate": true,
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||
.unwrap();
|
||||
snapshot!(documents_count, @"5");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
|
||||
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("my_doggo_embedder") => Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||
document_template: Setting::Set(S("{{doc.doggo}}")),
|
||||
..Default::default()
|
||||
})
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: false,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
handle.advance_one_successful_batch();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
// the all the vectors linked to the new specified embedder have been removed
|
||||
// Only the unknown embedders stays in the document DB
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
// even though we specified the vector for the ID 3, it shouldn't be marked
|
||||
// as user provided since we explicitely marked it as NOT user provided.
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "my_doggo_embedder",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: HuggingFace(
|
||||
EmbedderOptions {
|
||||
model: "sentence-transformers/all-MiniLM-L6-v2",
|
||||
revision: Some(
|
||||
"e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
),
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{{doc.doggo}}",
|
||||
},
|
||||
},
|
||||
user_provided: RoaringBitmap<[1, 2]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
// the document with the id 3 should keep its original embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let mut embeddings = Vec::new();
|
||||
|
||||
'vectors: for i in 0..=u8::MAX {
|
||||
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose();
|
||||
|
||||
let Some(reader) = reader else {
|
||||
break 'vectors;
|
||||
};
|
||||
|
||||
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
|
||||
if let Some(embedding) = embedding {
|
||||
embeddings.push(embedding)
|
||||
} else {
|
||||
break 'vectors;
|
||||
}
|
||||
}
|
||||
|
||||
snapshot!(embeddings.len(), @"1");
|
||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||
|
||||
// If we update marcel it should regenerate its embedding automatically
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "marvel",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "sorry",
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||
.unwrap();
|
||||
snapshot!(documents_count, @"2");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: UpdateDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// the document with the id 3 should have its original embedding updated
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
|
||||
let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
|
||||
snapshot!(json_string!(doc), @r###"
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "marvel"
|
||||
}
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||
|
||||
// the document with the id 4 should generate an embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_document_containing_vector() {
|
||||
// 1. Add an embedder
|
||||
// 2. Push two documents containing a simple vector
|
||||
// 3. Delete the first document
|
||||
// 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore
|
||||
// 5. Clear the index
|
||||
// 6. The user defined roaring bitmap shouldn't contains the id of the second document
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("manual") => Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
|
||||
dimensions: Setting::Set(3),
|
||||
..Default::default()
|
||||
})
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
"_vectors": {
|
||||
"manual": vec![0, 0, 0],
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "intel",
|
||||
"_vectors": {
|
||||
"manual": vec![1, 1, 1],
|
||||
}
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||
.unwrap();
|
||||
snapshot!(documents_count, @"2");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: false,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: S("doggos"),
|
||||
documents_ids: vec![S("1")],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||
},
|
||||
},
|
||||
user_provided: RoaringBitmap<[0]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["manual"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||
},
|
||||
},
|
||||
user_provided: RoaringBitmap<[]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_embedder_with_user_provided_vectors() {
|
||||
// 1. Add two embedders
|
||||
// 2. Push two documents containing a simple vector
|
||||
// 3. The documents must not contain the vectors after the update as they are in the vectors db
|
||||
// 3. Delete the embedders
|
||||
// 4. The documents contain the vectors again
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("manual") => Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
|
||||
dimensions: Setting::Set(3),
|
||||
..Default::default()
|
||||
}),
|
||||
S("my_doggo_embedder") => Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||
document_template: Setting::Set(S("{{doc.doggo}}")),
|
||||
..Default::default()
|
||||
}),
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
"_vectors": {
|
||||
"manual": vec![0, 0, 0],
|
||||
"my_doggo_embedder": vec![1; 384],
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "intel",
|
||||
"_vectors": {
|
||||
"manual": vec![1, 1, 1],
|
||||
}
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||
.unwrap();
|
||||
snapshot!(documents_count, @"2");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: false,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
|
||||
}
|
||||
|
||||
{
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("manual") => Setting::Reset,
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
}
|
||||
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||
}
|
||||
|
||||
{
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Reset,
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
}
|
||||
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// FIXME: redaction
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,10 +6,6 @@ expression: doc
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
"A_fakerest": {
|
||||
"embeddings": "[vector]",
|
||||
"userProvided": true
|
||||
},
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
@ -6,10 +6,6 @@ expression: doc
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
"_vectors": {
|
||||
"A_fakerest": {
|
||||
"embeddings": "[vector]",
|
||||
"userProvided": true
|
||||
},
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
File diff suppressed because one or more lines are too long
@ -188,6 +188,12 @@ impl AuthFilter {
|
||||
self.allow_index_creation && self.is_index_authorized(index)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Return true if a tenant token was used to generate the search rules.
|
||||
pub fn is_tenant_token(&self) -> bool {
|
||||
self.search_rules.is_some()
|
||||
}
|
||||
|
||||
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
|
||||
Self {
|
||||
search_rules: None,
|
||||
@ -205,6 +211,7 @@ impl AuthFilter {
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
/// Check if the index is authorized by the API key and the tenant token.
|
||||
pub fn is_index_authorized(&self, index: &str) -> bool {
|
||||
self.key_authorized_indexes.is_index_authorized(index)
|
||||
&& self
|
||||
@ -214,6 +221,44 @@ impl AuthFilter {
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
/// Only check if the index is authorized by the API key
|
||||
pub fn api_key_is_index_authorized(&self, index: &str) -> bool {
|
||||
self.key_authorized_indexes.is_index_authorized(index)
|
||||
}
|
||||
|
||||
/// Only check if the index is authorized by the tenant token
|
||||
pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool {
|
||||
self.search_rules
|
||||
.as_ref()
|
||||
.map(|search_rules| search_rules.is_index_authorized(index))
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
/// Return the list of authorized indexes by the tenant token if any
|
||||
pub fn tenant_token_list_index_authorized(&self) -> Vec<String> {
|
||||
match self.search_rules {
|
||||
Some(ref search_rules) => {
|
||||
let mut indexes: Vec<_> = match search_rules {
|
||||
SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(),
|
||||
SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(),
|
||||
};
|
||||
indexes.sort_unstable();
|
||||
indexes
|
||||
}
|
||||
None => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the list of authorized indexes by the api key if any
|
||||
pub fn api_key_list_index_authorized(&self) -> Vec<String> {
|
||||
let mut indexes: Vec<_> = match self.key_authorized_indexes {
|
||||
SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(),
|
||||
SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(),
|
||||
};
|
||||
indexes.sort_unstable();
|
||||
indexes
|
||||
}
|
||||
|
||||
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
|
||||
if !self.is_index_authorized(index) {
|
||||
return None;
|
||||
|
@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.5.1", default-features = false }
|
||||
actix-web = { version = "4.6.0", default-features = false }
|
||||
anyhow = "1.0.79"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
@ -30,7 +30,12 @@ serde_json = "1.0.111"
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
time = { version = "0.3.31", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.35"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
@ -49,6 +54,8 @@ chinese-pinyin = ["milli/chinese-pinyin"]
|
||||
hebrew = ["milli/hebrew"]
|
||||
# japanese specialized tokenization
|
||||
japanese = ["milli/japanese"]
|
||||
# korean specialized tokenization
|
||||
korean = ["milli/korean"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
# allow greek specialized tokenization
|
||||
|
@ -189,4 +189,6 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
|
||||
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
|
||||
merge_with_error_impl_take_error_message!(InvalidSimilarId);
|
||||
|
@ -222,6 +222,7 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
@ -240,7 +241,11 @@ InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
@ -268,13 +273,14 @@ InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
@ -379,6 +385,7 @@ impl ErrorCode for milli::Error {
|
||||
Code::IndexPrimaryKeyMultipleCandidatesFound
|
||||
}
|
||||
UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists,
|
||||
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
|
||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||
@ -391,7 +398,8 @@ impl ErrorCode for milli::Error {
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
|
||||
UserError::InvalidVectorsMapType { .. }
|
||||
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
|
||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
@ -505,6 +513,21 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
@ -8,6 +8,7 @@ use std::str::FromStr;
|
||||
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use fst::IntoStreamer;
|
||||
use milli::index::IndexEmbeddingConfig;
|
||||
use milli::proximity::ProximityPrecision;
|
||||
use milli::update::Setting;
|
||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||
@ -672,7 +673,7 @@ pub fn settings(
|
||||
let embedders: BTreeMap<_, _> = index
|
||||
.embedding_configs(rtxn)?
|
||||
.into_iter()
|
||||
.map(|(name, config)| (name, Setting::Set(config.into())))
|
||||
.map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into())))
|
||||
.collect();
|
||||
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
||||
|
||||
|
@ -14,20 +14,20 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.6.0", default-features = false, features = [
|
||||
actix-http = { version = "3.7.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.5.1", default-features = false, features = [
|
||||
actix-web = { version = "4.6.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
actix-web-static-files = { version = "4.0.1", optional = true }
|
||||
anyhow = { version = "1.0.79", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.77"
|
||||
@ -104,13 +104,13 @@ url = { version = "2.5.0", features = ["serde"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.9"
|
||||
tracing-actix-web = "0.7.10"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.9.0"
|
||||
assert-json-diff = "2.0.2"
|
||||
brotli = "3.4.0"
|
||||
brotli = "6.0.0"
|
||||
insta = "1.34.0"
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
@ -150,6 +150,7 @@ chinese = ["meilisearch-types/chinese"]
|
||||
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
korean = ["meilisearch-types/korean"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
@ -157,5 +158,5 @@ vietnamese = ["meilisearch-types/vietnamese"]
|
||||
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
|
||||
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
|
||||
sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
|
||||
|
@ -74,8 +74,8 @@ pub enum DocumentDeletionKind {
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DocumentFetchKind {
|
||||
PerDocumentId,
|
||||
Normal { with_filter: bool, limit: usize, offset: usize },
|
||||
PerDocumentId { retrieve_vectors: bool },
|
||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
||||
}
|
||||
|
||||
pub trait Analytics: Sync + Send {
|
||||
|
@ -597,6 +597,9 @@ pub struct SearchAggregator {
|
||||
// every time a request has a filter, this field must be incremented by one
|
||||
sort_total_number_of_criteria: usize,
|
||||
|
||||
// distinct
|
||||
distinct: bool,
|
||||
|
||||
// filter
|
||||
filter_with_geo_radius: bool,
|
||||
filter_with_geo_bounding_box: bool,
|
||||
@ -622,6 +625,7 @@ pub struct SearchAggregator {
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
hybrid: bool,
|
||||
retrieve_vectors: bool,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
@ -648,6 +652,7 @@ pub struct SearchAggregator {
|
||||
// scoring
|
||||
show_ranking_score: bool,
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
}
|
||||
|
||||
impl SearchAggregator {
|
||||
@ -661,6 +666,7 @@ impl SearchAggregator {
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve: _,
|
||||
retrieve_vectors,
|
||||
attributes_to_crop: _,
|
||||
crop_length,
|
||||
attributes_to_highlight: _,
|
||||
@ -669,6 +675,7 @@ impl SearchAggregator {
|
||||
show_ranking_score_details,
|
||||
filter,
|
||||
sort,
|
||||
distinct,
|
||||
facets: _,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
@ -676,6 +683,7 @@ impl SearchAggregator {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@ -690,6 +698,8 @@ impl SearchAggregator {
|
||||
ret.sort_sum_of_criteria_terms = sort.len();
|
||||
}
|
||||
|
||||
ret.distinct = distinct.is_some();
|
||||
|
||||
if let Some(ref filter) = filter {
|
||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
||||
ret.filter_total_number_of_criteria = 1;
|
||||
@ -726,6 +736,7 @@ impl SearchAggregator {
|
||||
if let Some(ref vector) = vector {
|
||||
ret.max_vector_size = vector.len();
|
||||
}
|
||||
ret.retrieve_vectors |= retrieve_vectors;
|
||||
|
||||
if query.is_finite_pagination() {
|
||||
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||
@ -748,6 +759,7 @@ impl SearchAggregator {
|
||||
|
||||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||
|
||||
if let Some(hybrid) = hybrid {
|
||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||
@ -792,6 +804,7 @@ impl SearchAggregator {
|
||||
sort_with_geo_point,
|
||||
sort_sum_of_criteria_terms,
|
||||
sort_total_number_of_criteria,
|
||||
distinct,
|
||||
filter_with_geo_radius,
|
||||
filter_with_geo_bounding_box,
|
||||
filter_sum_of_criteria_terms,
|
||||
@ -800,6 +813,7 @@ impl SearchAggregator {
|
||||
attributes_to_search_on_total_number_of_uses,
|
||||
max_terms_number,
|
||||
max_vector_size,
|
||||
retrieve_vectors,
|
||||
matching_strategy,
|
||||
max_limit,
|
||||
max_offset,
|
||||
@ -821,6 +835,7 @@ impl SearchAggregator {
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
@ -847,6 +862,9 @@ impl SearchAggregator {
|
||||
self.sort_total_number_of_criteria =
|
||||
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
|
||||
|
||||
// distinct
|
||||
self.distinct |= distinct;
|
||||
|
||||
// filter
|
||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
||||
@ -869,6 +887,7 @@ impl SearchAggregator {
|
||||
|
||||
// vector
|
||||
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
||||
self.retrieve_vectors |= retrieve_vectors;
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.embedder |= embedder;
|
||||
@ -904,6 +923,7 @@ impl SearchAggregator {
|
||||
// scoring
|
||||
self.show_ranking_score |= show_ranking_score;
|
||||
self.show_ranking_score_details |= show_ranking_score_details;
|
||||
self.ranking_score_threshold |= ranking_score_threshold;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
@ -916,6 +936,7 @@ impl SearchAggregator {
|
||||
sort_with_geo_point,
|
||||
sort_sum_of_criteria_terms,
|
||||
sort_total_number_of_criteria,
|
||||
distinct,
|
||||
filter_with_geo_radius,
|
||||
filter_with_geo_bounding_box,
|
||||
filter_sum_of_criteria_terms,
|
||||
@ -924,6 +945,7 @@ impl SearchAggregator {
|
||||
attributes_to_search_on_total_number_of_uses,
|
||||
max_terms_number,
|
||||
max_vector_size,
|
||||
retrieve_vectors,
|
||||
matching_strategy,
|
||||
max_limit,
|
||||
max_offset,
|
||||
@ -945,6 +967,7 @@ impl SearchAggregator {
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
@ -971,6 +994,7 @@ impl SearchAggregator {
|
||||
"with_geoPoint": sort_with_geo_point,
|
||||
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
|
||||
},
|
||||
"distinct": distinct,
|
||||
"filter": {
|
||||
"with_geoRadius": filter_with_geo_radius,
|
||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
||||
@ -985,6 +1009,7 @@ impl SearchAggregator {
|
||||
},
|
||||
"vector": {
|
||||
"max_vector_size": max_vector_size,
|
||||
"retrieve_vectors": retrieve_vectors,
|
||||
},
|
||||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
@ -1015,6 +1040,7 @@ impl SearchAggregator {
|
||||
"scoring": {
|
||||
"show_ranking_score": show_ranking_score,
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
});
|
||||
|
||||
@ -1072,6 +1098,7 @@ impl MultiSearchAggregator {
|
||||
page: _,
|
||||
hits_per_page: _,
|
||||
attributes_to_retrieve: _,
|
||||
retrieve_vectors: _,
|
||||
attributes_to_crop: _,
|
||||
crop_length: _,
|
||||
attributes_to_highlight: _,
|
||||
@ -1080,6 +1107,7 @@ impl MultiSearchAggregator {
|
||||
show_matches_position: _,
|
||||
filter: _,
|
||||
sort: _,
|
||||
distinct: _,
|
||||
facets: _,
|
||||
highlight_pre_tag: _,
|
||||
highlight_post_tag: _,
|
||||
@ -1087,6 +1115,7 @@ impl MultiSearchAggregator {
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
} = query;
|
||||
|
||||
index_uid.as_str()
|
||||
@ -1234,6 +1263,7 @@ impl FacetSearchAggregator {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@ -1248,7 +1278,8 @@ impl FacetSearchAggregator {
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some()
|
||||
|| hybrid.is_some();
|
||||
|| hybrid.is_some()
|
||||
|| ranking_score_threshold.is_some();
|
||||
|
||||
ret
|
||||
}
|
||||
@ -1524,6 +1555,9 @@ pub struct DocumentsFetchAggregator {
|
||||
// if a filter was used
|
||||
per_filter: bool,
|
||||
|
||||
#[serde(rename = "vector.retrieve_vectors")]
|
||||
retrieve_vectors: bool,
|
||||
|
||||
// pagination
|
||||
#[serde(rename = "pagination.max_limit")]
|
||||
max_limit: usize,
|
||||
@ -1533,18 +1567,21 @@ pub struct DocumentsFetchAggregator {
|
||||
|
||||
impl DocumentsFetchAggregator {
|
||||
pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
|
||||
let (limit, offset) = match query {
|
||||
DocumentFetchKind::PerDocumentId => (1, 0),
|
||||
DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
|
||||
let (limit, offset, retrieve_vectors) = match query {
|
||||
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
|
||||
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
|
||||
(*limit, *offset, *retrieve_vectors)
|
||||
}
|
||||
};
|
||||
Self {
|
||||
timestamp: Some(OffsetDateTime::now_utc()),
|
||||
user_agents: extract_user_agents(request).into_iter().collect(),
|
||||
total_received: 1,
|
||||
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
|
||||
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
|
||||
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
||||
max_limit: limit,
|
||||
max_offset: offset,
|
||||
retrieve_vectors,
|
||||
}
|
||||
}
|
||||
|
||||
@ -1558,6 +1595,7 @@ impl DocumentsFetchAggregator {
|
||||
per_filter,
|
||||
max_limit,
|
||||
max_offset,
|
||||
retrieve_vectors,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
@ -1573,6 +1611,8 @@ impl DocumentsFetchAggregator {
|
||||
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
self.max_offset = self.max_offset.max(max_offset);
|
||||
|
||||
self.retrieve_vectors |= retrieve_vectors;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
@ -1613,6 +1653,7 @@ pub struct SimilarAggregator {
|
||||
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
retrieve_vectors: bool,
|
||||
|
||||
// pagination
|
||||
max_limit: usize,
|
||||
@ -1624,6 +1665,7 @@ pub struct SimilarAggregator {
|
||||
// scoring
|
||||
show_ranking_score: bool,
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
}
|
||||
|
||||
impl SimilarAggregator {
|
||||
@ -1635,9 +1677,11 @@ impl SimilarAggregator {
|
||||
offset,
|
||||
limit,
|
||||
attributes_to_retrieve: _,
|
||||
retrieve_vectors,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@ -1675,8 +1719,10 @@ impl SimilarAggregator {
|
||||
|
||||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||
|
||||
ret.embedder = embedder.is_some();
|
||||
ret.retrieve_vectors = *retrieve_vectors;
|
||||
|
||||
ret
|
||||
}
|
||||
@ -1708,6 +1754,8 @@ impl SimilarAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
retrieve_vectors,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
@ -1737,6 +1785,7 @@ impl SimilarAggregator {
|
||||
}
|
||||
|
||||
self.embedder |= embedder;
|
||||
self.retrieve_vectors |= retrieve_vectors;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
@ -1749,6 +1798,7 @@ impl SimilarAggregator {
|
||||
// scoring
|
||||
self.show_ranking_score |= show_ranking_score;
|
||||
self.show_ranking_score_details |= show_ranking_score_details;
|
||||
self.ranking_score_threshold |= ranking_score_threshold;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
@ -1769,6 +1819,8 @@ impl SimilarAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
retrieve_vectors,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
@ -1795,6 +1847,9 @@ impl SimilarAggregator {
|
||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||
},
|
||||
"vector": {
|
||||
"retrieve_vectors": retrieve_vectors,
|
||||
},
|
||||
"hybrid": {
|
||||
"embedder": embedder,
|
||||
},
|
||||
@ -1808,6 +1863,7 @@ impl SimilarAggregator {
|
||||
"scoring": {
|
||||
"show_ranking_score": show_ranking_score,
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
});
|
||||
|
||||
|
@ -98,14 +98,29 @@ impl From<MeilisearchHttpError> for aweb::Error {
|
||||
|
||||
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
|
||||
fn from(error: aweb::error::PayloadError) -> Self {
|
||||
MeilisearchHttpError::Payload(PayloadError::Payload(error))
|
||||
match error {
|
||||
aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
|
||||
PayloadError::Payload(ActixPayloadError::IncompleteError),
|
||||
),
|
||||
_ => MeilisearchHttpError::Payload(PayloadError::Payload(
|
||||
ActixPayloadError::OtherError(error),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ActixPayloadError {
|
||||
#[error("The provided payload is incomplete and cannot be parsed")]
|
||||
IncompleteError,
|
||||
#[error(transparent)]
|
||||
OtherError(aweb::error::PayloadError),
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum PayloadError {
|
||||
#[error(transparent)]
|
||||
Payload(aweb::error::PayloadError),
|
||||
Payload(ActixPayloadError),
|
||||
#[error(transparent)]
|
||||
Json(JsonPayloadError),
|
||||
#[error(transparent)]
|
||||
@ -122,13 +137,15 @@ impl ErrorCode for PayloadError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
PayloadError::Payload(e) => match e {
|
||||
aweb::error::PayloadError::Incomplete(_) => Code::Internal,
|
||||
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
|
||||
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
||||
aweb::error::PayloadError::UnknownLength => Code::Internal,
|
||||
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
|
||||
aweb::error::PayloadError::Io(_) => Code::Internal,
|
||||
_ => todo!(),
|
||||
ActixPayloadError::IncompleteError => Code::BadRequest,
|
||||
ActixPayloadError::OtherError(error) => match error {
|
||||
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
|
||||
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
||||
aweb::error::PayloadError::UnknownLength => Code::Internal,
|
||||
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
|
||||
aweb::error::PayloadError::Io(_) => Code::Internal,
|
||||
_ => todo!(),
|
||||
},
|
||||
},
|
||||
PayloadError::Json(err) => match err {
|
||||
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
|
||||
|
@ -12,6 +12,8 @@ use futures::Future;
|
||||
use meilisearch_auth::{AuthController, AuthFilter};
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
|
||||
use self::policies::AuthError;
|
||||
|
||||
pub struct GuardedData<P, D> {
|
||||
data: D,
|
||||
filters: AuthFilter,
|
||||
@ -35,12 +37,12 @@ impl<P, D> GuardedData<P, D> {
|
||||
let missing_master_key = auth.get_master_key().is_none();
|
||||
|
||||
match Self::authenticate(auth, token, index).await? {
|
||||
Some(filters) => match data {
|
||||
Ok(filters) => match data {
|
||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||
},
|
||||
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
None => Err(AuthenticationError::InvalidToken.into()),
|
||||
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -51,12 +53,12 @@ impl<P, D> GuardedData<P, D> {
|
||||
let missing_master_key = auth.get_master_key().is_none();
|
||||
|
||||
match Self::authenticate(auth, String::new(), None).await? {
|
||||
Some(filters) => match data {
|
||||
Ok(filters) => match data {
|
||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||
},
|
||||
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
|
||||
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()),
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,7 +66,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
) -> Result<Option<AuthFilter>, ResponseError>
|
||||
) -> Result<Result<AuthFilter, AuthError>, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
@ -127,13 +129,14 @@ pub trait Policy {
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter>;
|
||||
) -> Result<AuthFilter, policies::AuthError>;
|
||||
}
|
||||
|
||||
pub mod policies {
|
||||
use actix_web::web::Data;
|
||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
// reexport actions in policies in order to be used in routes configuration.
|
||||
pub use meilisearch_types::keys::{actions, Action};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -144,11 +147,53 @@ pub mod policies {
|
||||
|
||||
enum TenantTokenOutcome {
|
||||
NotATenantToken,
|
||||
Invalid,
|
||||
Expired,
|
||||
Valid(Uuid, SearchRules),
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum AuthError {
|
||||
#[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
|
||||
ExpiredTenantToken { exp: i64, now: i64 },
|
||||
#[error("The provided API key is invalid.")]
|
||||
InvalidApiKey,
|
||||
#[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
|
||||
TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
|
||||
#[error(
|
||||
"The API key used to generate this tenant token cannot acces the index `{index}`."
|
||||
)]
|
||||
TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
|
||||
#[error(
|
||||
"The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
|
||||
)]
|
||||
ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
|
||||
#[error("The provided tenant token is invalid.")]
|
||||
InvalidTenantToken,
|
||||
#[error("Could not decode tenant token, {0}.")]
|
||||
CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
|
||||
#[error("Invalid action `{0}`.")]
|
||||
InternalInvalidAction(u8),
|
||||
}
|
||||
|
||||
impl From<jsonwebtoken::errors::Error> for AuthError {
|
||||
fn from(error: jsonwebtoken::errors::Error) -> Self {
|
||||
use jsonwebtoken::errors::ErrorKind;
|
||||
|
||||
match error.kind() {
|
||||
ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
|
||||
_ => AuthError::CouldNotDecodeTenantToken(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for AuthError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
AuthError::InternalInvalidAction(_) => Code::Internal,
|
||||
_ => Code::InvalidApiKey,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn tenant_token_validation() -> Validation {
|
||||
let mut validation = Validation::default();
|
||||
validation.validate_exp = false;
|
||||
@ -158,15 +203,15 @@ pub mod policies {
|
||||
}
|
||||
|
||||
/// Extracts the key id used to sign the payload, without performing any validation.
|
||||
fn extract_key_id(token: &str) -> Option<Uuid> {
|
||||
fn extract_key_id(token: &str) -> Result<Uuid, AuthError> {
|
||||
let mut validation = tenant_token_validation();
|
||||
validation.insecure_disable_signature_validation();
|
||||
let dummy_key = DecodingKey::from_secret(b"secret");
|
||||
let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
|
||||
let token_data = decode::<Claims>(token, &dummy_key, &validation)?;
|
||||
|
||||
// get token fields without validating it.
|
||||
let Claims { api_key_uid, .. } = token_data.claims;
|
||||
Some(api_key_uid)
|
||||
Ok(api_key_uid)
|
||||
}
|
||||
|
||||
fn is_keys_action(action: u8) -> bool {
|
||||
@ -187,76 +232,102 @@ pub mod policies {
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter> {
|
||||
) -> Result<AuthFilter, AuthError> {
|
||||
// authenticate if token is the master key.
|
||||
// Without a master key, all routes are accessible except the key-related routes.
|
||||
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
|
||||
return Some(AuthFilter::default());
|
||||
return Ok(AuthFilter::default());
|
||||
}
|
||||
|
||||
let (key_uuid, search_rules) =
|
||||
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
|
||||
TenantTokenOutcome::Valid(key_uuid, search_rules) => {
|
||||
Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => {
|
||||
(key_uuid, Some(search_rules))
|
||||
}
|
||||
TenantTokenOutcome::Expired => return None,
|
||||
TenantTokenOutcome::Invalid => return None,
|
||||
TenantTokenOutcome::NotATenantToken => {
|
||||
(auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
|
||||
}
|
||||
Ok(TenantTokenOutcome::NotATenantToken)
|
||||
| Err(AuthError::InvalidTenantToken) => (
|
||||
auth.get_optional_uid_from_encoded_key(token.as_bytes())
|
||||
.map_err(|_e| AuthError::InvalidApiKey)?
|
||||
.ok_or(AuthError::InvalidApiKey)?,
|
||||
None,
|
||||
),
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
// check that the indexes are allowed
|
||||
let action = Action::from_repr(A)?;
|
||||
let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
|
||||
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
|
||||
&& index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
|
||||
{
|
||||
return Some(auth_filter);
|
||||
let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?;
|
||||
let auth_filter = auth
|
||||
.get_key_filters(key_uuid, search_rules)
|
||||
.map_err(|_e| AuthError::InvalidApiKey)?;
|
||||
|
||||
// First check if the index is authorized in the tenant token, this is a public
|
||||
// information, we can return a nice error message.
|
||||
if let Some(index) = index {
|
||||
if !auth_filter.tenant_token_is_index_authorized(index) {
|
||||
return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
|
||||
index: index.to_string(),
|
||||
allowed: auth_filter.tenant_token_list_index_authorized(),
|
||||
});
|
||||
}
|
||||
if !auth_filter.api_key_is_index_authorized(index) {
|
||||
if auth_filter.is_tenant_token() {
|
||||
// If the error comes from a tenant token we cannot share the list
|
||||
// of authorized indexes in the API key. This is not public information.
|
||||
return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
|
||||
index: index.to_string(),
|
||||
});
|
||||
} else {
|
||||
// Otherwise we can share the list
|
||||
// of authorized indexes in the API key.
|
||||
return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
|
||||
index: index.to_string(),
|
||||
allowed: auth_filter.api_key_list_index_authorized(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
|
||||
return Ok(auth_filter);
|
||||
}
|
||||
|
||||
None
|
||||
Err(AuthError::InvalidApiKey)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const A: u8> ActionPolicy<A> {
|
||||
fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
|
||||
fn authenticate_tenant_token(
|
||||
auth: &AuthController,
|
||||
token: &str,
|
||||
) -> Result<TenantTokenOutcome, AuthError> {
|
||||
// Only search action can be accessed by a tenant token.
|
||||
if A != actions::SEARCH {
|
||||
return TenantTokenOutcome::NotATenantToken;
|
||||
return Ok(TenantTokenOutcome::NotATenantToken);
|
||||
}
|
||||
|
||||
let uid = if let Some(uid) = extract_key_id(token) {
|
||||
uid
|
||||
} else {
|
||||
return TenantTokenOutcome::NotATenantToken;
|
||||
};
|
||||
let uid = extract_key_id(token)?;
|
||||
|
||||
// Check if tenant token is valid.
|
||||
let key = if let Some(key) = auth.generate_key(uid) {
|
||||
key
|
||||
} else {
|
||||
return TenantTokenOutcome::Invalid;
|
||||
return Err(AuthError::InvalidTenantToken);
|
||||
};
|
||||
|
||||
let data = if let Ok(data) = decode::<Claims>(
|
||||
let data = decode::<Claims>(
|
||||
token,
|
||||
&DecodingKey::from_secret(key.as_bytes()),
|
||||
&tenant_token_validation(),
|
||||
) {
|
||||
data
|
||||
} else {
|
||||
return TenantTokenOutcome::Invalid;
|
||||
};
|
||||
)?;
|
||||
|
||||
// Check if token is expired.
|
||||
if let Some(exp) = data.claims.exp {
|
||||
if OffsetDateTime::now_utc().unix_timestamp() > exp {
|
||||
return TenantTokenOutcome::Expired;
|
||||
let now = OffsetDateTime::now_utc().unix_timestamp();
|
||||
if now > exp {
|
||||
return Err(AuthError::ExpiredTenantToken { exp, now });
|
||||
}
|
||||
}
|
||||
|
||||
TenantTokenOutcome::Valid(uid, data.claims.search_rules)
|
||||
Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::DocumentId;
|
||||
use meilisearch_types::star_or::OptionStarOrList;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
@ -39,7 +40,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
};
|
||||
use crate::search::parse_filter;
|
||||
use crate::search::{parse_filter, RetrieveVectors};
|
||||
use crate::Opt;
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
@ -94,6 +95,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
pub struct GetDocument {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
||||
fields: OptionStarOrList<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
}
|
||||
|
||||
pub async fn get_document(
|
||||
@ -107,13 +110,20 @@ pub async fn get_document(
|
||||
debug!(parameters = ?params, "Get document");
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
|
||||
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
|
||||
|
||||
let GetDocument { fields } = params.into_inner();
|
||||
let GetDocument { fields, retrieve_vectors: param_retrieve_vectors } = params.into_inner();
|
||||
let attributes_to_retrieve = fields.merge_star_and_none();
|
||||
|
||||
let features = index_scheduler.features();
|
||||
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
|
||||
|
||||
analytics.get_fetch_documents(
|
||||
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
|
||||
&req,
|
||||
);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
|
||||
let document =
|
||||
retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors)?;
|
||||
debug!(returns = ?document, "Get document");
|
||||
Ok(HttpResponse::Ok().json(document))
|
||||
}
|
||||
@ -153,6 +163,8 @@ pub struct BrowseQueryGet {
|
||||
limit: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
||||
fields: OptionStarOrList<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
||||
filter: Option<String>,
|
||||
}
|
||||
@ -166,6 +178,8 @@ pub struct BrowseQuery {
|
||||
limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
|
||||
fields: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||
filter: Option<Value>,
|
||||
}
|
||||
@ -185,6 +199,7 @@ pub async fn documents_by_query_post(
|
||||
with_filter: body.filter.is_some(),
|
||||
limit: body.limit,
|
||||
offset: body.offset,
|
||||
retrieve_vectors: body.retrieve_vectors,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
@ -201,7 +216,7 @@ pub async fn get_documents(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Get documents GET");
|
||||
|
||||
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
|
||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
|
||||
|
||||
let filter = match filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
@ -215,6 +230,7 @@ pub async fn get_documents(
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
fields: fields.merge_star_and_none(),
|
||||
retrieve_vectors: retrieve_vectors.0,
|
||||
filter,
|
||||
};
|
||||
|
||||
@ -223,6 +239,7 @@ pub async fn get_documents(
|
||||
with_filter: query.filter.is_some(),
|
||||
limit: query.limit,
|
||||
offset: query.offset,
|
||||
retrieve_vectors: query.retrieve_vectors,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
@ -236,10 +253,14 @@ fn documents_by_query(
|
||||
query: BrowseQuery,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let BrowseQuery { offset, limit, fields, filter } = query;
|
||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
|
||||
let (total, documents) =
|
||||
retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
|
||||
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
|
||||
@ -579,13 +600,44 @@ fn some_documents<'a, 't: 'a>(
|
||||
index: &'a Index,
|
||||
rtxn: &'t RoTxn,
|
||||
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
|
||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
|
||||
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
|
||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
match retrieve_vectors {
|
||||
RetrieveVectors::Ignore => {}
|
||||
RetrieveVectors::Hide => {
|
||||
document.remove("_vectors");
|
||||
}
|
||||
RetrieveVectors::Retrieve => {
|
||||
let mut vectors = match document.remove("_vectors") {
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(vector.into()),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||
);
|
||||
}
|
||||
document.insert("_vectors".into(), vectors.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(document)
|
||||
})
|
||||
}))
|
||||
}
|
||||
@ -596,6 +648,7 @@ fn retrieve_documents<S: AsRef<str>>(
|
||||
limit: usize,
|
||||
filter: Option<Value>,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let filter = &filter;
|
||||
@ -620,53 +673,57 @@ fn retrieve_documents<S: AsRef<str>>(
|
||||
let (it, number_of_documents) = {
|
||||
let number_of_documents = candidates.len();
|
||||
(
|
||||
some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
|
||||
some_documents(
|
||||
index,
|
||||
&rtxn,
|
||||
candidates.into_iter().skip(offset).take(limit),
|
||||
retrieve_vectors,
|
||||
)?,
|
||||
number_of_documents,
|
||||
)
|
||||
};
|
||||
|
||||
let documents: Result<Vec<_>, ResponseError> = it
|
||||
let documents: Vec<_> = it
|
||||
.map(|document| {
|
||||
Ok(match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document?,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()).chain(
|
||||
(retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"),
|
||||
),
|
||||
),
|
||||
None => document?,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
.collect::<Result<_, ResponseError>>()?;
|
||||
|
||||
Ok((number_of_documents, documents?))
|
||||
Ok((number_of_documents, documents))
|
||||
}
|
||||
|
||||
fn retrieve_document<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
doc_id: &str,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<Document, ResponseError> {
|
||||
let txn = index.read_txn()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&txn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let internal_id = index
|
||||
.external_documents_ids()
|
||||
.get(&txn, doc_id)?
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = index
|
||||
.documents(&txn, std::iter::once(internal_id))?
|
||||
.into_iter()
|
||||
let document = some_documents(index, &txn, Some(internal_id), retrieve_vectors)?
|
||||
.next()
|
||||
.map(|(_, d)| d)
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))??;
|
||||
|
||||
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
attributes_to_retrieve
|
||||
.iter()
|
||||
.map(|s| s.as_ref())
|
||||
.chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")),
|
||||
),
|
||||
None => document,
|
||||
};
|
||||
|
@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
@ -46,6 +46,8 @@ pub struct FacetSearchQuery {
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
@ -103,6 +105,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = value;
|
||||
|
||||
SearchQuery {
|
||||
@ -112,6 +115,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
page: None,
|
||||
hits_per_page: None,
|
||||
attributes_to_retrieve: None,
|
||||
retrieve_vectors: false,
|
||||
attributes_to_crop: None,
|
||||
crop_length: DEFAULT_CROP_LENGTH(),
|
||||
attributes_to_highlight: None,
|
||||
@ -120,6 +124,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
show_ranking_score_details: false,
|
||||
filter,
|
||||
sort: None,
|
||||
distinct: None,
|
||||
facets: None,
|
||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
@ -128,6 +133,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
vector,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,9 +19,10 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
|
||||
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@ -50,6 +51,8 @@ pub struct SearchQueryGet {
|
||||
hits_per_page: Option<Param<usize>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
||||
attributes_to_retrieve: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
||||
attributes_to_crop: Option<CS<String>>,
|
||||
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
||||
@ -60,6 +63,8 @@ pub struct SearchQueryGet {
|
||||
filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
||||
sort: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
|
||||
distinct: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||
show_matches_position: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
||||
@ -82,6 +87,21 @@ pub struct SearchQueryGet {
|
||||
pub hybrid_embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
|
||||
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
|
||||
|
||||
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
||||
type Error = InvalidSearchRankingScoreThreshold;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
|
||||
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
|
||||
@ -137,11 +157,13 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
page: other.page.as_deref().copied(),
|
||||
hits_per_page: other.hits_per_page.as_deref().copied(),
|
||||
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||
retrieve_vectors: other.retrieve_vectors.0,
|
||||
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
||||
crop_length: other.crop_length.0,
|
||||
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
||||
filter,
|
||||
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||
distinct: other.distinct,
|
||||
show_matches_position: other.show_matches_position.0,
|
||||
show_ranking_score: other.show_ranking_score.0,
|
||||
show_ranking_score_details: other.show_ranking_score_details.0,
|
||||
@ -152,6 +174,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
matching_strategy: other.matching_strategy,
|
||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -205,10 +228,12 @@ pub async fn search_with_url_query(
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
let _permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector)
|
||||
})
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
@ -245,10 +270,13 @@ pub async fn search_with_post(
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
|
||||
let _permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vectors)
|
||||
})
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
if search_result.degraded {
|
||||
@ -270,11 +298,10 @@ pub fn search_kind(
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchKind, ResponseError> {
|
||||
if query.vector.is_some() {
|
||||
features.check_vector("Passing `vector` as a query parameter")?;
|
||||
features.check_vector("Passing `vector` as a parameter")?;
|
||||
}
|
||||
|
||||
if query.hybrid.is_some() {
|
||||
features.check_vector("Passing `hybrid` as a query parameter")?;
|
||||
features.check_vector("Passing `hybrid` as a parameter")?;
|
||||
}
|
||||
|
||||
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
||||
|
@ -4,11 +4,7 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
|
||||
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
|
||||
InvalidSimilarShowRankingScoreDetails,
|
||||
};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
@ -21,8 +17,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
||||
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@ -42,9 +38,7 @@ pub async fn similar_get(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||
})?;
|
||||
let query = params.0.try_into()?;
|
||||
|
||||
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||
|
||||
@ -99,6 +93,8 @@ async fn similar(
|
||||
|
||||
features.check_vector("Using the similar API")?;
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
@ -109,8 +105,10 @@ async fn similar(
|
||||
let (embedder_name, embedder) =
|
||||
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||
|
||||
tokio::task::spawn_blocking(move || perform_similar(&index, query, embedder_name, embedder))
|
||||
.await?
|
||||
tokio::task::spawn_blocking(move || {
|
||||
perform_similar(&index, query, embedder_name, embedder, retrieve_vectors)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr)]
|
||||
@ -124,18 +122,35 @@ pub struct SimilarQueryGet {
|
||||
limit: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
|
||||
attributes_to_retrieve: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
|
||||
filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
|
||||
show_ranking_score: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
|
||||
show_ranking_score_details: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub embedder: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
|
||||
|
||||
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
||||
type Error = InvalidSimilarRankingScoreThreshold;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
|
||||
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
type Error = InvalidSimilarId;
|
||||
type Error = ResponseError;
|
||||
|
||||
fn try_from(
|
||||
SimilarQueryGet {
|
||||
@ -143,10 +158,12 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
offset,
|
||||
limit,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
filter,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
}: SimilarQueryGet,
|
||||
) -> Result<Self, Self::Error> {
|
||||
let filter = match filter {
|
||||
@ -158,14 +175,18 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
};
|
||||
|
||||
Ok(SimilarQuery {
|
||||
id: id.0.try_into()?,
|
||||
id: id.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||
})?,
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
filter,
|
||||
embedder,
|
||||
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||
retrieve_vectors: retrieve_vectors.0,
|
||||
show_ranking_score: show_ranking_score.0,
|
||||
show_ranking_score_details: show_ranking_score_details.0,
|
||||
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@ -83,11 +83,14 @@ pub async fn multi_search_with_post(
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
|
||||
.with_index(query_index)?;
|
||||
let retrieve_vector =
|
||||
RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind))
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
|
@ -15,6 +15,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::vector::Embedder;
|
||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
@ -59,6 +60,8 @@ pub struct SearchQuery {
|
||||
pub hits_per_page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
|
||||
pub retrieve_vectors: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||
pub attributes_to_crop: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||
@ -75,6 +78,8 @@ pub struct SearchQuery {
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
pub sort: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
|
||||
pub distinct: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||
pub facets: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||
@ -87,6 +92,44 @@ pub struct SearchQuery {
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
|
||||
pub struct RankingScoreThreshold(f64);
|
||||
|
||||
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
|
||||
type Error = InvalidSearchRankingScoreThreshold;
|
||||
|
||||
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
|
||||
#[allow(clippy::manual_range_contains)]
|
||||
if f > 1.0 || f < 0.0 {
|
||||
Err(InvalidSearchRankingScoreThreshold)
|
||||
} else {
|
||||
Ok(RankingScoreThreshold(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdSimilar(f64);
|
||||
|
||||
impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
|
||||
type Error = InvalidSimilarRankingScoreThreshold;
|
||||
|
||||
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
|
||||
#[allow(clippy::manual_range_contains)]
|
||||
if f > 1.0 || f < 0.0 {
|
||||
Err(InvalidSimilarRankingScoreThreshold)
|
||||
} else {
|
||||
Ok(Self(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
|
||||
@ -103,6 +146,7 @@ impl fmt::Debug for SearchQuery {
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
@ -111,12 +155,14 @@ impl fmt::Debug for SearchQuery {
|
||||
show_ranking_score_details,
|
||||
filter,
|
||||
sort,
|
||||
distinct,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchQuery");
|
||||
@ -134,6 +180,9 @@ impl fmt::Debug for SearchQuery {
|
||||
if let Some(q) = q {
|
||||
debug.field("q", &q);
|
||||
}
|
||||
if *retrieve_vectors {
|
||||
debug.field("retrieve_vectors", &retrieve_vectors);
|
||||
}
|
||||
if let Some(v) = vector {
|
||||
if v.len() < 10 {
|
||||
debug.field("vector", &v);
|
||||
@ -156,6 +205,9 @@ impl fmt::Debug for SearchQuery {
|
||||
if let Some(sort) = sort {
|
||||
debug.field("sort", &sort);
|
||||
}
|
||||
if let Some(distinct) = distinct {
|
||||
debug.field("distinct", &distinct);
|
||||
}
|
||||
if let Some(facets) = facets {
|
||||
debug.field("facets", &facets);
|
||||
}
|
||||
@ -188,6 +240,9 @@ impl fmt::Debug for SearchQuery {
|
||||
debug.field("highlight_pre_tag", &highlight_pre_tag);
|
||||
debug.field("highlight_post_tag", &highlight_post_tag);
|
||||
debug.field("crop_marker", &crop_marker);
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
debug.field("ranking_score_threshold", &ranking_score_threshold);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
@ -328,6 +383,8 @@ pub struct SearchQueryWithIndex {
|
||||
pub hits_per_page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
|
||||
pub retrieve_vectors: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||
pub attributes_to_crop: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||
@ -344,6 +401,8 @@ pub struct SearchQueryWithIndex {
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
pub sort: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
|
||||
pub distinct: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||
pub facets: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||
@ -356,6 +415,8 @@ pub struct SearchQueryWithIndex {
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
}
|
||||
|
||||
impl SearchQueryWithIndex {
|
||||
@ -369,6 +430,7 @@ impl SearchQueryWithIndex {
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
@ -377,6 +439,7 @@ impl SearchQueryWithIndex {
|
||||
show_matches_position,
|
||||
filter,
|
||||
sort,
|
||||
distinct,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
@ -384,6 +447,7 @@ impl SearchQueryWithIndex {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@ -395,6 +459,7 @@ impl SearchQueryWithIndex {
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
@ -403,6 +468,7 @@ impl SearchQueryWithIndex {
|
||||
show_matches_position,
|
||||
filter,
|
||||
sort,
|
||||
distinct,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
@ -410,6 +476,7 @@ impl SearchQueryWithIndex {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@ -432,10 +499,14 @@ pub struct SimilarQuery {
|
||||
pub embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
|
||||
pub retrieve_vectors: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)]
|
||||
pub show_ranking_score: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
|
||||
pub show_ranking_score_details: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||
@ -664,6 +735,13 @@ fn prepare_search<'t>(
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(rtxn);
|
||||
search.time_budget(time_budget);
|
||||
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
|
||||
search.ranking_score_threshold(ranking_score_threshold.0);
|
||||
}
|
||||
|
||||
if let Some(distinct) = &query.distinct {
|
||||
search.distinct(distinct.clone());
|
||||
}
|
||||
|
||||
match search_kind {
|
||||
SearchKind::KeywordOnly => {
|
||||
@ -674,10 +752,15 @@ fn prepare_search<'t>(
|
||||
SearchKind::SemanticOnly { embedder_name, embedder } => {
|
||||
let vector = match query.vector.clone() {
|
||||
Some(vector) => vector,
|
||||
None => embedder
|
||||
.embed_one(query.q.clone().unwrap())
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?,
|
||||
None => {
|
||||
let span = tracing::trace_span!(target: "search::vector", "embed_one");
|
||||
let _entered = span.enter();
|
||||
|
||||
embedder
|
||||
.embed_one(query.q.clone().unwrap())
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?
|
||||
}
|
||||
};
|
||||
|
||||
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
|
||||
@ -705,11 +788,16 @@ fn prepare_search<'t>(
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
|
||||
ScoringStrategy::Detailed
|
||||
} else {
|
||||
ScoringStrategy::Skip
|
||||
});
|
||||
search.scoring_strategy(
|
||||
if query.show_ranking_score
|
||||
|| query.show_ranking_score_details
|
||||
|| query.ranking_score_threshold.is_some()
|
||||
{
|
||||
ScoringStrategy::Detailed
|
||||
} else {
|
||||
ScoringStrategy::Skip
|
||||
},
|
||||
);
|
||||
|
||||
// compute the offset on the limit depending on the pagination mode.
|
||||
let (offset, limit) = if is_finite_pagination {
|
||||
@ -754,6 +842,7 @@ pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
@ -787,32 +876,37 @@ pub fn perform_search(
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
vector: _,
|
||||
hybrid: _,
|
||||
// already computed from prepare_search
|
||||
offset: _,
|
||||
limit,
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
// use the enum passed as parameter
|
||||
retrieve_vectors: _,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
show_matches_position,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter: _,
|
||||
sort,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
// already used in prepare_search
|
||||
vector: _,
|
||||
hybrid: _,
|
||||
offset: _,
|
||||
ranking_score_threshold: _,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
filter: _,
|
||||
distinct: _,
|
||||
} = query;
|
||||
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_highlight,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
@ -896,6 +990,7 @@ pub fn perform_search(
|
||||
|
||||
struct AttributesFormat {
|
||||
attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
attributes_to_highlight: Option<HashSet<String>>,
|
||||
attributes_to_crop: Option<Vec<String>>,
|
||||
crop_length: usize,
|
||||
@ -908,6 +1003,36 @@ struct AttributesFormat {
|
||||
show_ranking_score_details: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum RetrieveVectors {
|
||||
/// Do not touch the `_vectors` field
|
||||
///
|
||||
/// this is the behavior when the vectorStore feature is disabled
|
||||
Ignore,
|
||||
/// Remove the `_vectors` field
|
||||
///
|
||||
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `false`
|
||||
Hide,
|
||||
/// Retrieve vectors from the DB and merge them into the `_vectors` field
|
||||
///
|
||||
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `true`
|
||||
Retrieve,
|
||||
}
|
||||
|
||||
impl RetrieveVectors {
|
||||
pub fn new(
|
||||
retrieve_vector: bool,
|
||||
features: index_scheduler::RoFeatures,
|
||||
) -> Result<Self, index_scheduler::Error> {
|
||||
match (retrieve_vector, features.check_vector("Passing `retrieveVectors` as a parameter")) {
|
||||
(true, Ok(())) => Ok(Self::Retrieve),
|
||||
(true, Err(error)) => Err(error),
|
||||
(false, Ok(())) => Ok(Self::Hide),
|
||||
(false, Err(_)) => Ok(Self::Ignore),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn make_hits(
|
||||
index: &Index,
|
||||
rtxn: &RoTxn<'_>,
|
||||
@ -917,10 +1042,32 @@ fn make_hits(
|
||||
document_scores: Vec<Vec<ScoreDetails>>,
|
||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
|
||||
let displayed_ids = index
|
||||
.displayed_fields_ids(rtxn)?
|
||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||
let displayed_ids =
|
||||
index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
|
||||
|
||||
let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
|
||||
|
||||
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
|
||||
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
|
||||
(None, _) => false,
|
||||
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
|
||||
(Some(_), None) => true,
|
||||
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
|
||||
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
|
||||
};
|
||||
|
||||
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
|
||||
if vectors_is_hidden {
|
||||
RetrieveVectors::Hide
|
||||
} else {
|
||||
RetrieveVectors::Retrieve
|
||||
}
|
||||
} else {
|
||||
format.retrieve_vectors
|
||||
};
|
||||
|
||||
let displayed_ids =
|
||||
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||
let fids = |attrs: &BTreeSet<String>| {
|
||||
let mut ids = BTreeSet::new();
|
||||
for attr in attrs {
|
||||
@ -943,6 +1090,7 @@ fn make_hits(
|
||||
.intersection(&displayed_ids)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
|
||||
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
|
||||
let formatted_options = compute_formatted_options(
|
||||
@ -976,18 +1124,48 @@ fn make_hits(
|
||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||
let mut documents = Vec::new();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
let documents_iter = index.documents(rtxn, documents_ids)?;
|
||||
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
let add_vectors_fid =
|
||||
vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve);
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = to_retrieve_ids
|
||||
.iter()
|
||||
// skip the vectors_fid if RetrieveVectors::Hide
|
||||
.filter(|fid| match vectors_fid {
|
||||
Some(vectors_fid) => {
|
||||
!(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
|
||||
}
|
||||
None => true,
|
||||
})
|
||||
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
|
||||
.chain(add_vectors_fid.iter())
|
||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||
let mut document =
|
||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||
|
||||
if retrieve_vectors == RetrieveVectors::Retrieve {
|
||||
let mut vectors = match document.remove("_vectors") {
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, id)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
|
||||
vectors.insert(name, serde_json::to_value(embeddings)?);
|
||||
}
|
||||
document.insert("_vectors".into(), vectors.into());
|
||||
}
|
||||
|
||||
let (matches_position, formatted) = format_fields(
|
||||
&displayed_document,
|
||||
&fields_ids_map,
|
||||
@ -1057,6 +1235,7 @@ pub fn perform_similar(
|
||||
query: SimilarQuery,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
@ -1068,8 +1247,10 @@ pub fn perform_similar(
|
||||
filter: _,
|
||||
embedder: _,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors: _,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
// using let-else rather than `?` so that the borrow checker identifies we're always returning here,
|
||||
@ -1093,6 +1274,10 @@ pub fn perform_similar(
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
similar.ranking_score_threshold(ranking_score_threshold.0);
|
||||
}
|
||||
|
||||
let milli::SearchResult {
|
||||
documents_ids,
|
||||
matching_words: _,
|
||||
@ -1109,6 +1294,7 @@ pub fn perform_similar(
|
||||
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_highlight: None,
|
||||
attributes_to_crop: None,
|
||||
crop_length: DEFAULT_CROP_LENGTH(),
|
||||
@ -1150,13 +1336,23 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
// TODO: TAMO: milli encountered an internal error, what do we want to do?
|
||||
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
|
||||
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
|
||||
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
|
||||
if let Some((lat, lng)) =
|
||||
extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"]))
|
||||
{
|
||||
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
|
||||
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_geo_value(value: &Value) -> Option<f64> {
|
||||
match value {
|
||||
Value::Number(n) => n.as_f64(),
|
||||
Value::String(s) => s.parse().ok(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_formatted_options(
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
attr_to_crop: &[String],
|
||||
@ -1530,4 +1726,54 @@ mod test {
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_geo_distance_with_coords_as_string() {
|
||||
let value: Document = serde_json::from_str(
|
||||
r#"{
|
||||
"_geo": {
|
||||
"lat": "50",
|
||||
"lng": 3
|
||||
}
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let sorters = &["_geoPoint(50,3):desc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let value: Document = serde_json::from_str(
|
||||
r#"{
|
||||
"_geo": {
|
||||
"lat": "50",
|
||||
"lng": "3"
|
||||
},
|
||||
"id": "1"
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let sorters = &["_geoPoint(50,3):desc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let value: Document = serde_json::from_str(
|
||||
r#"{
|
||||
"_geo": {
|
||||
"lat": 50,
|
||||
"lng": "3"
|
||||
},
|
||||
"id": "1"
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let sorters = &["_geoPoint(50,3):desc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
}
|
||||
}
|
||||
|
@ -40,8 +40,9 @@ pub struct Permit {
|
||||
|
||||
impl Drop for Permit {
|
||||
fn drop(&mut self) {
|
||||
let sender = self.sender.clone();
|
||||
// if the channel is closed then the whole instance is down
|
||||
let _ = futures::executor::block_on(self.sender.send(()));
|
||||
std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
|
||||
});
|
||||
|
||||
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
||||
json!({"message": "The provided API key is invalid.",
|
||||
json!({"message": null,
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
@ -119,7 +119,8 @@ async fn error_access_expired_key() {
|
||||
thread::sleep(time::Duration::new(1, 0));
|
||||
|
||||
for (method, route) in AUTHORIZATIONS.keys() {
|
||||
let (response, code) = server.dummy_request(method, route).await;
|
||||
let (mut response, code) = server.dummy_request(method, route).await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
|
||||
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
||||
assert_eq!(403, code, "{:?}", &response);
|
||||
@ -149,7 +150,8 @@ async fn error_access_unauthorized_index() {
|
||||
// filter `products` index routes
|
||||
.filter(|(_, route)| route.starts_with("/indexes/products"))
|
||||
{
|
||||
let (response, code) = server.dummy_request(method, route).await;
|
||||
let (mut response, code) = server.dummy_request(method, route).await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
|
||||
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
||||
assert_eq!(403, code, "{:?}", &response);
|
||||
@ -176,7 +178,8 @@ async fn error_access_unauthorized_action() {
|
||||
|
||||
let key = response["key"].as_str().unwrap();
|
||||
server.use_api_key(key);
|
||||
let (response, code) = server.dummy_request(method, route).await;
|
||||
let (mut response, code) = server.dummy_request(method, route).await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
|
||||
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
|
||||
assert_eq!(403, code, "{:?}", &response);
|
||||
@ -280,7 +283,7 @@ async fn access_authorized_no_index_restriction() {
|
||||
route,
|
||||
action
|
||||
);
|
||||
assert_ne!(code, 403);
|
||||
assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use actix_web::test;
|
||||
use http::StatusCode;
|
||||
use jsonwebtoken::{EncodingKey, Header};
|
||||
use meili_snap::*;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -436,3 +439,262 @@ async fn patch_api_keys_unknown_field() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
async fn send_request_with_custom_auth(
|
||||
app: impl actix_web::dev::Service<
|
||||
actix_http::Request,
|
||||
Response = actix_web::dev::ServiceResponse<impl actix_web::body::MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
>,
|
||||
url: &str,
|
||||
auth: &str,
|
||||
) -> (Value, StatusCode) {
|
||||
let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let status_code = res.status();
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
|
||||
(response, status_code)
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn invalid_auth_format() {
|
||||
let server = Server::new_auth().await;
|
||||
let app = server.init_web_app().await;
|
||||
|
||||
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let status_code = res.status();
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
snapshot!(status_code, @"401 Unauthorized");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
"###);
|
||||
|
||||
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let status_code = res.status();
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
snapshot!(status_code, @"401 Unauthorized");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The provided API key is invalid.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn invalid_api_key() {
|
||||
let server = Server::new_auth().await;
|
||||
let app = server.init_web_app().await;
|
||||
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The provided API key is invalid.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
let uuid = Uuid::nil();
|
||||
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
|
||||
let req = test::TestRequest::post()
|
||||
.uri("/keys")
|
||||
.insert_header(("Authorization", "Bearer MASTER_KEY"))
|
||||
.set_json(&key)
|
||||
.to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"name": null,
|
||||
"description": null,
|
||||
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
|
||||
"uid": "00000000-0000-0000-0000-000000000000",
|
||||
"actions": [
|
||||
"search"
|
||||
],
|
||||
"indexes": [
|
||||
"dog"
|
||||
],
|
||||
"expiresAt": null,
|
||||
"createdAt": "[date]",
|
||||
"updatedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let key = response["key"].as_str().unwrap();
|
||||
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}"))
|
||||
.await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn invalid_tenant_token() {
|
||||
let server = Server::new_auth().await;
|
||||
let app = server.init_web_app().await;
|
||||
|
||||
// The tenant token won't be recognized at all if we're not on a search route
|
||||
let claims = json!({ "tamo": "kefir" });
|
||||
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||
.unwrap();
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}"))
|
||||
.await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The provided API key is invalid.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
let claims = json!({ "tamo": "kefir" });
|
||||
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||
.unwrap();
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
// The error messages are not ideal but that's expected since we cannot _yet_ use deserr
|
||||
let claims = json!({ "searchRules": "kefir" });
|
||||
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||
.unwrap();
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
let uuid = Uuid::nil();
|
||||
let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() });
|
||||
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
|
||||
.unwrap();
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Could not decode tenant token, InvalidSignature.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
// ~~ For the next tests we first need a valid API key
|
||||
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
|
||||
let req = test::TestRequest::post()
|
||||
.uri("/keys")
|
||||
.insert_header(("Authorization", "Bearer MASTER_KEY"))
|
||||
.set_json(&key)
|
||||
.to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"name": null,
|
||||
"description": null,
|
||||
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
|
||||
"uid": "00000000-0000-0000-0000-000000000000",
|
||||
"actions": [
|
||||
"search"
|
||||
],
|
||||
"indexes": [
|
||||
"dog"
|
||||
],
|
||||
"expiresAt": null,
|
||||
"createdAt": "[date]",
|
||||
"updatedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let key = response["key"].as_str().unwrap();
|
||||
|
||||
let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() });
|
||||
let jwt = jsonwebtoken::encode(
|
||||
&Header::default(),
|
||||
&claims,
|
||||
&EncodingKey::from_secret(key.as_bytes()),
|
||||
)
|
||||
.unwrap();
|
||||
// Try to access an index that is not authorized by the tenant token
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt
|
||||
let (response, status_code) =
|
||||
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}"))
|
||||
.await;
|
||||
snapshot!(status_code, @"403 Forbidden");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The API key used to generate this tenant token cannot acces the index `doggo`.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -53,7 +53,8 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
});
|
||||
|
||||
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
||||
json!({"message": "The provided API key is invalid.",
|
||||
json!({
|
||||
"message": null,
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
@ -191,7 +192,9 @@ macro_rules! compute_forbidden_search {
|
||||
server.use_api_key(&web_token);
|
||||
let index = server.index("sales");
|
||||
index
|
||||
.search(json!({}), |response, code| {
|
||||
.search(json!({}), |mut response, code| {
|
||||
// We don't assert anything on the message since it may change between cases
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_eq!(
|
||||
response,
|
||||
INVALID_RESPONSE.clone(),
|
||||
@ -495,7 +498,8 @@ async fn error_access_forbidden_routes() {
|
||||
|
||||
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
||||
if !actions.contains("search") {
|
||||
let (response, code) = server.dummy_request(method, route).await;
|
||||
let (mut response, code) = server.dummy_request(method, route).await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_eq!(response, INVALID_RESPONSE.clone());
|
||||
assert_eq!(code, 403);
|
||||
}
|
||||
@ -529,14 +533,16 @@ async fn error_access_expired_parent_key() {
|
||||
server.use_api_key(&web_token);
|
||||
|
||||
// test search request while parent_key is not expired
|
||||
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_ne!(response, INVALID_RESPONSE.clone());
|
||||
assert_ne!(code, 403);
|
||||
|
||||
// wait until the key is expired.
|
||||
thread::sleep(time::Duration::new(1, 0));
|
||||
|
||||
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_eq!(response, INVALID_RESPONSE.clone());
|
||||
assert_eq!(code, 403);
|
||||
}
|
||||
@ -585,7 +591,8 @@ async fn error_access_modified_token() {
|
||||
.join(".");
|
||||
|
||||
server.use_api_key(&altered_token);
|
||||
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_eq!(response, INVALID_RESPONSE.clone());
|
||||
assert_eq!(code, 403);
|
||||
}
|
||||
|
@ -109,9 +109,11 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
|
||||
fn invalid_response(query_index: Option<usize>) -> Value {
|
||||
let message = if let Some(query_index) = query_index {
|
||||
format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")
|
||||
json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid."))
|
||||
} else {
|
||||
"The provided API key is invalid.".to_string()
|
||||
// if it's anything else we simply return null and will tests all the
|
||||
// error messages somewhere else
|
||||
json!(null)
|
||||
};
|
||||
json!({"message": message,
|
||||
"code": "invalid_api_key",
|
||||
@ -414,7 +416,10 @@ macro_rules! compute_forbidden_single_search {
|
||||
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
|
||||
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
|
||||
server.use_api_key(&web_token);
|
||||
let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
|
||||
let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
|
||||
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||
response["message"] = serde_json::json!(null);
|
||||
}
|
||||
assert_eq!(
|
||||
response,
|
||||
invalid_response(failed_query_index),
|
||||
@ -469,10 +474,13 @@ macro_rules! compute_forbidden_multiple_search {
|
||||
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
|
||||
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
|
||||
server.use_api_key(&web_token);
|
||||
let (response, code) = server.multi_search(json!({"queries" : [
|
||||
let (mut response, code) = server.multi_search(json!({"queries" : [
|
||||
{"indexUid": "sales"},
|
||||
{"indexUid": "products"},
|
||||
]})).await;
|
||||
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||
response["message"] = serde_json::json!(null);
|
||||
}
|
||||
assert_eq!(
|
||||
response,
|
||||
invalid_response(failed_query_index),
|
||||
@ -1073,18 +1081,20 @@ async fn error_access_expired_parent_key() {
|
||||
server.use_api_key(&web_token);
|
||||
|
||||
// test search request while parent_key is not expired
|
||||
let (response, code) = server
|
||||
let (mut response, code) = server
|
||||
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
|
||||
.await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_ne!(response, invalid_response(None));
|
||||
assert_ne!(code, 403);
|
||||
|
||||
// wait until the key is expired.
|
||||
thread::sleep(time::Duration::new(1, 0));
|
||||
|
||||
let (response, code) = server
|
||||
let (mut response, code) = server
|
||||
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
|
||||
.await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_eq!(response, invalid_response(None));
|
||||
assert_eq!(code, 403);
|
||||
}
|
||||
@ -1134,8 +1144,9 @@ async fn error_access_modified_token() {
|
||||
.join(".");
|
||||
|
||||
server.use_api_key(&altered_token);
|
||||
let (response, code) =
|
||||
let (mut response, code) =
|
||||
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
|
||||
response["message"] = serde_json::json!(null);
|
||||
assert_eq!(response, invalid_response(None));
|
||||
assert_eq!(code, 403);
|
||||
}
|
||||
|
@ -182,14 +182,10 @@ impl Index<'_> {
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
pub async fn get_document(
|
||||
&self,
|
||||
id: u64,
|
||||
options: Option<GetDocumentOptions>,
|
||||
) -> (Value, StatusCode) {
|
||||
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
|
||||
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
|
||||
if let Some(fields) = options.and_then(|o| o.fields) {
|
||||
let _ = write!(url, "?fields={}", fields.join(","));
|
||||
if let Some(options) = options {
|
||||
write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap();
|
||||
}
|
||||
self.service.get(url).await
|
||||
}
|
||||
@ -205,18 +201,11 @@ impl Index<'_> {
|
||||
}
|
||||
|
||||
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
|
||||
let mut url = format!("/indexes/{}/documents?", urlencode(self.uid.as_ref()));
|
||||
if let Some(limit) = options.limit {
|
||||
let _ = write!(url, "limit={}&", limit);
|
||||
}
|
||||
|
||||
if let Some(offset) = options.offset {
|
||||
let _ = write!(url, "offset={}&", offset);
|
||||
}
|
||||
|
||||
if let Some(attributes_to_retrieve) = options.attributes_to_retrieve {
|
||||
let _ = write!(url, "fields={}&", attributes_to_retrieve.join(","));
|
||||
}
|
||||
let url = format!(
|
||||
"/indexes/{}/documents{}",
|
||||
urlencode(self.uid.as_ref()),
|
||||
yaup::to_string(&options).unwrap()
|
||||
);
|
||||
|
||||
self.service.get(url).await
|
||||
}
|
||||
@ -435,13 +424,14 @@ impl Index<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GetDocumentOptions {
|
||||
pub fields: Option<Vec<&'static str>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug, Default, serde::Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GetAllDocumentsOptions {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub limit: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub offset: Option<usize>,
|
||||
pub attributes_to_retrieve: Option<Vec<&'static str>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub fields: Option<Vec<&'static str>>,
|
||||
pub retrieve_vectors: bool,
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ pub mod service;
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
#[allow(unused)]
|
||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
||||
pub use index::GetAllDocumentsOptions;
|
||||
use meili_snap::json_string;
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[allow(unused)]
|
||||
@ -42,6 +42,12 @@ impl std::ops::Deref for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::DerefMut for Value {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<serde_json::Value> for Value {
|
||||
fn eq(&self, other: &serde_json::Value) -> bool {
|
||||
&self.0 == other
|
||||
@ -65,7 +71,7 @@ impl Display for Value {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
|
||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -183,6 +183,58 @@ async fn add_single_document_gzip_encoded() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
#[actix_rt::test]
|
||||
async fn add_single_document_gzip_encoded_with_incomplete_error() {
|
||||
let document = json!("kefir");
|
||||
|
||||
// this is a what is expected and should work
|
||||
let server = Server::new().await;
|
||||
let app = server.init_web_app().await;
|
||||
// post
|
||||
let document = serde_json::to_string(&document).unwrap();
|
||||
let req = test::TestRequest::post()
|
||||
.uri("/indexes/dog/documents")
|
||||
.set_payload(document.to_string())
|
||||
.insert_header(("content-type", "application/json"))
|
||||
.insert_header(("content-encoding", "gzip"))
|
||||
.to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let status_code = res.status();
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
snapshot!(status_code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response),
|
||||
@r###"
|
||||
{
|
||||
"message": "The provided payload is incomplete and cannot be parsed",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// put
|
||||
let req = test::TestRequest::put()
|
||||
.uri("/indexes/dog/documents")
|
||||
.set_payload(document.to_string())
|
||||
.insert_header(("content-type", "application/json"))
|
||||
.insert_header(("content-encoding", "gzip"))
|
||||
.to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let status_code = res.status();
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
snapshot!(status_code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response),
|
||||
@r###"
|
||||
{
|
||||
"message": "The provided payload is incomplete and cannot be parsed",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
/// Here we try document request with every encoding
|
||||
#[actix_rt::test]
|
||||
@ -1040,6 +1092,52 @@ async fn document_addition_with_primary_key() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn document_addition_with_huge_int_primary_key() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"primary": 14630868576586246730u64,
|
||||
"content": "foo",
|
||||
}
|
||||
]);
|
||||
let (response, code) = index.add_documents(documents, Some("primary")).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(response,
|
||||
@r###"
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document(14630868576586246730u64, None).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response),
|
||||
@r###"
|
||||
{
|
||||
"primary": 14630868576586246730,
|
||||
"content": "foo"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn replace_document() {
|
||||
let server = Server::new().await;
|
||||
|
@ -719,7 +719,7 @@ async fn fetch_document_by_filter() {
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!(null)).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type: expected an object, but found null",
|
||||
"code": "bad_request",
|
||||
@ -730,7 +730,7 @@ async fn fetch_document_by_filter() {
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_document_offset",
|
||||
@ -741,7 +741,7 @@ async fn fetch_document_by_filter() {
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_document_limit",
|
||||
@ -752,7 +752,7 @@ async fn fetch_document_by_filter() {
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_document_fields",
|
||||
@ -763,7 +763,7 @@ async fn fetch_document_by_filter() {
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
|
||||
"code": "invalid_document_filter",
|
||||
@ -774,7 +774,7 @@ async fn fetch_document_by_filter() {
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
|
||||
"code": "invalid_document_filter",
|
||||
@ -786,7 +786,7 @@ async fn fetch_document_by_filter() {
|
||||
let (response, code) =
|
||||
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
|
||||
"code": "invalid_document_filter",
|
||||
@ -795,3 +795,70 @@ async fn fetch_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn retrieve_vectors() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
|
||||
// GET ALL DOCUMENTS BY QUERY
|
||||
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
|
||||
"code": "invalid_document_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=true").await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// FETCH ALL DOCUMENTS BY POST
|
||||
let (response, _code) =
|
||||
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",
|
||||
"code": "invalid_document_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
let (response, _code) = index.get_document_by_filter(json!({ "retrieveVectors": true })).await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// GET A SINGLE DOCUMENT
|
||||
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": "tamo"}))).await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
|
||||
"code": "invalid_document_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ use meili_snap::*;
|
||||
use urlencoding::encode as urlencode;
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server, Value};
|
||||
use crate::common::{GetAllDocumentsOptions, Server, Value};
|
||||
use crate::json;
|
||||
|
||||
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
|
||||
@ -59,8 +59,7 @@ async fn get_document() {
|
||||
})
|
||||
);
|
||||
|
||||
let (response, code) =
|
||||
index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["id"]) })).await;
|
||||
let (response, code) = index.get_document(0, Some(json!({ "fields": ["id"] }))).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
response,
|
||||
@ -69,9 +68,8 @@ async fn get_document() {
|
||||
})
|
||||
);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["nested.content"]) }))
|
||||
.await;
|
||||
let (response, code) =
|
||||
index.get_document(0, Some(json!({ "fields": ["nested.content"] }))).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
response,
|
||||
@ -211,7 +209,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
attributes_to_retrieve: Some(vec!["name"]),
|
||||
fields: Some(vec!["name"]),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@ -225,9 +223,19 @@ async fn test_get_all_documents_attributes_to_retrieve() {
|
||||
assert_eq!(response["limit"], json!(20));
|
||||
assert_eq!(response["total"], json!(77));
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?fields=").await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 20);
|
||||
for results in response["results"].as_array().unwrap() {
|
||||
assert_eq!(results.as_object().unwrap().keys().count(), 0);
|
||||
}
|
||||
assert_eq!(response["offset"], json!(0));
|
||||
assert_eq!(response["limit"], json!(20));
|
||||
assert_eq!(response["total"], json!(77));
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
attributes_to_retrieve: Some(vec![]),
|
||||
fields: Some(vec!["wrong"]),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@ -242,22 +250,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
attributes_to_retrieve: Some(vec!["wrong"]),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 20);
|
||||
for results in response["results"].as_array().unwrap() {
|
||||
assert_eq!(results.as_object().unwrap().keys().count(), 0);
|
||||
}
|
||||
assert_eq!(response["offset"], json!(0));
|
||||
assert_eq!(response["limit"], json!(20));
|
||||
assert_eq!(response["total"], json!(77));
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
attributes_to_retrieve: Some(vec!["name", "tags"]),
|
||||
fields: Some(vec!["name", "tags"]),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@ -270,10 +263,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
|
||||
}
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
attributes_to_retrieve: Some(vec!["*"]),
|
||||
..Default::default()
|
||||
})
|
||||
.get_all_documents(GetAllDocumentsOptions { fields: Some(vec!["*"]), ..Default::default() })
|
||||
.await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 20);
|
||||
@ -283,7 +273,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
attributes_to_retrieve: Some(vec!["*", "wrong"]),
|
||||
fields: Some(vec!["*", "wrong"]),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@ -316,12 +306,10 @@ async fn get_document_s_nested_attributes_to_retrieve() {
|
||||
assert_eq!(code, 202);
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) =
|
||||
index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await;
|
||||
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response, json!({}));
|
||||
let (response, code) =
|
||||
index.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await;
|
||||
let (response, code) = index.get_document(1, Some(json!({ "fields": ["content"] }))).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
response,
|
||||
@ -333,9 +321,7 @@ async fn get_document_s_nested_attributes_to_retrieve() {
|
||||
})
|
||||
);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) }))
|
||||
.await;
|
||||
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content.truc"] }))).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
response,
|
||||
@ -343,9 +329,7 @@ async fn get_document_s_nested_attributes_to_retrieve() {
|
||||
"content.truc": "foobar",
|
||||
})
|
||||
);
|
||||
let (response, code) = index
|
||||
.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) }))
|
||||
.await;
|
||||
let (response, code) = index.get_document(1, Some(json!({ "fields": ["content.truc"] }))).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
response,
|
||||
@ -540,3 +524,207 @@ async fn get_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_with_vectors() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": null }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
// by default you shouldn't see the `_vectors` object
|
||||
let (documents, _code) = index.get_all_documents(Default::default()).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
let (documents, _code) = index.get_document(0, None).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir"
|
||||
}
|
||||
"###);
|
||||
|
||||
// if we try to retrieve the vectors with the `fields` parameter they
|
||||
// still shouldn't be displayed
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
fields: Some(vec!["name", "_vectors"]),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"name": "kefir"
|
||||
},
|
||||
{
|
||||
"name": "echo"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
let (documents, _code) =
|
||||
index.get_document(0, Some(json!({"fields": ["name", "_vectors"]}))).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"name": "kefir"
|
||||
}
|
||||
"###);
|
||||
|
||||
// If we specify the retrieve vectors boolean and nothing else we should get the vectors
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
let (documents, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// If we specify the retrieve vectors boolean and exclude vectors form the `fields` we should still get the vectors
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
retrieve_vectors: true,
|
||||
fields: Some(vec!["name"]),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "echo",
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
let (documents, _code) =
|
||||
index.get_document(0, Some(json!({"retrieveVectors": true, "fields": ["name"]}))).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -1938,3 +1938,210 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
// In this test we must generate the dump ourselves to ensure the
|
||||
// `user provided` vectors are well set
|
||||
#[actix_rt::test]
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
async fn generate_and_import_dump_containing_vectors() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let mut opt = default_settings(temp.path());
|
||||
let server = Server::new_with_options(opt.clone()).await.unwrap();
|
||||
let (code, _) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
let index = server.index("pets");
|
||||
let (response, code) = index
|
||||
.update_settings(json!(
|
||||
{
|
||||
"embedders": {
|
||||
"doggo_embedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"documentTemplate": "{{doc.doggo}}",
|
||||
}
|
||||
}
|
||||
}
|
||||
))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(response);
|
||||
let (response, code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{"id": 0, "doggo": "kefir", "_vectors": { "doggo_embedder": vec![0; 384] }},
|
||||
{"id": 1, "doggo": "echo", "_vectors": { "doggo_embedder": { "regenerate": false, "embeddings": vec![1; 384] }}},
|
||||
{"id": 2, "doggo": "intel", "_vectors": { "doggo_embedder": { "regenerate": true, "embeddings": vec![2; 384] }}},
|
||||
{"id": 3, "doggo": "bill", "_vectors": { "doggo_embedder": { "regenerate": true }}},
|
||||
{"id": 4, "doggo": "max" },
|
||||
]),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(response);
|
||||
|
||||
let (response, code) = server.create_dump().await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(response["status"], @r###""succeeded""###);
|
||||
|
||||
// ========= We made a dump, now we should clear the DB and try to import our dump
|
||||
drop(server);
|
||||
tokio::fs::remove_dir_all(&opt.db_path).await.unwrap();
|
||||
let dump_name = format!("{}.dump", response["details"]["dumpUid"].as_str().unwrap());
|
||||
let dump_path = opt.dump_dir.join(dump_name);
|
||||
assert!(dump_path.exists(), "path: `{}`", dump_path.display());
|
||||
|
||||
opt.import_dump = Some(dump_path);
|
||||
// NOTE: We shouldn't have to change the database path but I lost one hour
|
||||
// because of a « bad path » error and that fixed it.
|
||||
opt.db_path = temp.path().join("data.ms");
|
||||
|
||||
let mut server = Server::new_auth_with_options(opt, temp).await;
|
||||
server.use_api_key("MASTER_KEY");
|
||||
|
||||
let (indexes, code) = server.list_indexes(None, None).await;
|
||||
assert_eq!(code, 200, "{indexes}");
|
||||
|
||||
snapshot!(indexes["results"].as_array().unwrap().len(), @"1");
|
||||
snapshot!(indexes["results"][0]["uid"], @r###""pets""###);
|
||||
snapshot!(indexes["results"][0]["primaryKey"], @r###""id""###);
|
||||
|
||||
let (response, code) = server.get_features().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let index = server.index("pets");
|
||||
|
||||
let (response, code) = index.settings().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"sort",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"nonSeparatorTokens": [],
|
||||
"separatorTokens": [],
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
"oneTypo": 5,
|
||||
"twoTypos": 9
|
||||
},
|
||||
"disableOnWords": [],
|
||||
"disableOnAttributes": []
|
||||
},
|
||||
"faceting": {
|
||||
"maxValuesPerFacet": 100,
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
}
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {
|
||||
"doggo_embedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"documentTemplate": "{{doc.doggo}}"
|
||||
}
|
||||
},
|
||||
"searchCutoffMs": null
|
||||
}
|
||||
"###);
|
||||
|
||||
index
|
||||
.search(json!({"retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"], { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
"_vectors": {
|
||||
"doggo_embedder": {
|
||||
"embeddings": "[vector]",
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "echo",
|
||||
"_vectors": {
|
||||
"doggo_embedder": {
|
||||
"embeddings": "[vector]",
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "intel",
|
||||
"_vectors": {
|
||||
"doggo_embedder": {
|
||||
"embeddings": "[vector]",
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bill",
|
||||
"_vectors": {
|
||||
"doggo_embedder": {
|
||||
"embeddings": "[vector]",
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "max",
|
||||
"_vectors": {
|
||||
"doggo_embedder": {
|
||||
"embeddings": "[vector]",
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
@ -0,0 +1,25 @@
|
||||
---
|
||||
source: meilisearch/tests/dumps/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "pets",
|
||||
"status": "succeeded",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"embedders": {
|
||||
"doggo_embedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"documentTemplate": "{{doc.doggo}}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
---
|
||||
source: meilisearch/tests/dumps/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "pets",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 5,
|
||||
"indexedDocuments": 5
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
@ -13,6 +13,7 @@ mod snapshot;
|
||||
mod stats;
|
||||
mod swap_indexes;
|
||||
mod tasks;
|
||||
mod vector;
|
||||
|
||||
// Tests are isolated by features in different modules to allow better readability, test
|
||||
// targetability, and improved incremental compilation times.
|
||||
|
@ -107,6 +107,39 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
])
|
||||
});
|
||||
|
||||
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": { "main": "Brown", "pattern": "stripped" },
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": { "main": "Black", "pattern": "stripped" },
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": { "main": "Blue", "pattern": "used" },
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": { "main": "Blue", "pattern": "stripped" },
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
static DOCUMENT_PRIMARY_KEY: &str = "id";
|
||||
static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
||||
|
||||
@ -239,3 +272,35 @@ async fn distinct_search_with_pagination_no_ranking() {
|
||||
snapshot!(response["totalPages"], @"2");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn distinct_at_search_time() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("tamo");
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await;
|
||||
let task = index.wait_task(task.uid()).await;
|
||||
snapshot!(task, name: "succeed");
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<String> {
|
||||
let hits_array = response["hits"]
|
||||
.as_array()
|
||||
.unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap()));
|
||||
hits_array
|
||||
.iter()
|
||||
.map(|h| h[DOCUMENT_PRIMARY_KEY].as_number().unwrap().to_string())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 1, "hitsPerPage": 3, "distinct": "color.main"})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"3");
|
||||
snapshot!(format!("{:?}", hits), @r###"["1", "2", "3"]"###);
|
||||
snapshot!(response["page"], @"1");
|
||||
snapshot!(response["totalPages"], @"1");
|
||||
snapshot!(response["totalHits"], @"3");
|
||||
}
|
||||
|
@ -167,6 +167,74 @@ async fn search_bad_hits_per_page() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_bad_attributes_to_retrieve() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.search_post(json!({"attributesToRetrieve": "doggo"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.attributesToRetrieve`: expected an array, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_search_attributes_to_retrieve",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_retrieve"
|
||||
}
|
||||
"###);
|
||||
// Can't make the `attributes_to_retrieve` fail with a get search since it'll accept anything as an array of strings.
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_bad_retrieve_vectors() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.search_post(json!({"retrieveVectors": "doggo"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_search_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_post(json!({"retrieveVectors": [true]})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`",
|
||||
"code": "invalid_search_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("?retrieveVectors=").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`",
|
||||
"code": "invalid_search_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("?retrieveVectors=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`",
|
||||
"code": "invalid_search_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_bad_attributes_to_crop() {
|
||||
let server = Server::new().await;
|
||||
@ -321,6 +389,40 @@ async fn search_bad_facets() {
|
||||
// Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings.
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_bad_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_search_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_invalid_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_non_filterable_facets() {
|
||||
let server = Server::new().await;
|
||||
@ -1038,3 +1140,66 @@ async fn search_on_unknown_field_plus_joker() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn distinct_at_search_time() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("tamo");
|
||||
let (task, _) = index.create(None).await;
|
||||
let task = index.wait_task(task.uid()).await;
|
||||
snapshot!(task, name: "task-succeed");
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.distinct`: expected a string, but found a boolean: `true`",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -150,7 +150,8 @@ async fn bug_4640() {
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
},
|
||||
"_geoDistance": 0
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
|
@ -124,32 +124,61 @@ async fn simple_search() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}}}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"2");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn limit_offset() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"1");
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search_hf() {
|
||||
let server = Server::new().await;
|
||||
@ -204,10 +233,10 @@ async fn distribution_shift() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
|
||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
|
||||
let (response, code) = index.search_post(search.clone()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
@ -228,7 +257,7 @@ async fn distribution_shift() {
|
||||
|
||||
let (response, code) = index.search_post(search).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.1920928955078125e-7}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7}]"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -239,20 +268,23 @@ async fn highlighter() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.2},
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
"retrieveVectors": true,
|
||||
"attributesToHighlight": [
|
||||
"desc",
|
||||
"_vectors",
|
||||
],
|
||||
"highlightPreTag": "**BEGIN**",
|
||||
"highlightPostTag": "**END**"
|
||||
"highlightPreTag": "**BEGIN**",
|
||||
"highlightPostTag": "**END**",
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.8},
|
||||
"retrieveVectors": true,
|
||||
"showRankingScore": true,
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
@ -262,13 +294,14 @@ async fn highlighter() {
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
|
||||
// no highlighting on full semantic
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 1.0},
|
||||
"retrieveVectors": true,
|
||||
"showRankingScore": true,
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
@ -278,7 +311,7 @@ async fn highlighter() {
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
}
|
||||
|
||||
@ -361,12 +394,12 @@ async fn single_document() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0}"###);
|
||||
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0}"###);
|
||||
snapshot!(response["semanticHitCount"], @"1");
|
||||
}
|
||||
|
||||
@ -377,25 +410,25 @@ async fn query_combination() {
|
||||
|
||||
// search without query and vector, but with hybrid => still placeholder
|
||||
let (response, code) = index
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"null");
|
||||
|
||||
// same with a different semantic ratio
|
||||
let (response, code) = index
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true}))
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"null");
|
||||
|
||||
// wrong vector dimensions
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@ -410,34 +443,34 @@ async fn query_combination() {
|
||||
|
||||
// full vector
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.6581138968467712}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.6581138968467712}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
|
||||
// full keyword, without a query
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"null");
|
||||
|
||||
// query + vector, full keyword => keyword
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"null");
|
||||
|
||||
// query + vector, no hybrid keyword =>
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true}))
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@ -453,7 +486,7 @@ async fn query_combination() {
|
||||
// full vector, without a vector => error
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@ -470,11 +503,93 @@ async fn query_combination() {
|
||||
// hybrid without a vector => full keyword
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}),
|
||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn retrieve_vectors() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "2",
|
||||
"_vectors": {
|
||||
"default": {
|
||||
"embeddings": "[vectors]",
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "3",
|
||||
"_vectors": {
|
||||
"default": {
|
||||
"embeddings": "[vectors]",
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1",
|
||||
"_vectors": {
|
||||
"default": {
|
||||
"embeddings": "[vectors]",
|
||||
"regenerate": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
||||
// remove `_vectors` from displayed attributes
|
||||
let (response, code) =
|
||||
index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "2"
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "3"
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -48,6 +48,31 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
])
|
||||
});
|
||||
|
||||
static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
},
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
@ -276,7 +301,7 @@ async fn negative_special_cases_search() {
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index.update_settings(json!({"synonyms": { "escape": ["glass"] }})).await;
|
||||
index.update_settings(json!({"synonyms": { "escape": ["gläss"] }})).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass
|
||||
@ -960,6 +985,213 @@ async fn test_score_details() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_score() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
|
||||
let res = index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(res.0.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": "Badman the dark knight returns 1",
|
||||
"showRankingScore": true,
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.9746605609456898
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.8055252965383685
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_rankingScore": 0.16666666666666666
|
||||
},
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
"_rankingScore": 0.07702020202020202
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
"_rankingScore": 0.07702020202020202
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_score_threshold() {
|
||||
let query = "Badman dark returns 1";
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
|
||||
let res = index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(res.0.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.0
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.6685627880184332
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_rankingScore": 0.25
|
||||
},
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
"_rankingScore": 0.11553030303030302
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
"_rankingScore": 0.11553030303030302
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.2
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.6685627880184332
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_rankingScore": 0.25
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.5
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.6685627880184332
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.8
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 1.0
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###);
|
||||
// nobody is perfect
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_degraded_score_details() {
|
||||
let server = Server::new().await;
|
||||
@ -1058,21 +1290,38 @@ async fn experimental_feature_vector_store() {
|
||||
index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
index
|
||||
.search(json!({
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"showRankingScore": true
|
||||
}))
|
||||
}), |response, code|{
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
index
|
||||
.search(json!({
|
||||
"retrieveVectors": true,
|
||||
"showRankingScore": true
|
||||
}), |response, code|{
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Passing `vector` as a query parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
@ -1105,6 +1354,7 @@ async fn experimental_feature_vector_store() {
|
||||
.search_post(json!({
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"showRankingScore": true,
|
||||
"retrieveVectors": true,
|
||||
}))
|
||||
.await;
|
||||
|
||||
@ -1116,11 +1366,16 @@ async fn experimental_feature_vector_store() {
|
||||
"title": "Shazam!",
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1.0,
|
||||
2.0,
|
||||
3.0
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
1.0,
|
||||
2.0,
|
||||
3.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 1.0
|
||||
},
|
||||
@ -1128,11 +1383,16 @@ async fn experimental_feature_vector_store() {
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1.0,
|
||||
2.0,
|
||||
54.0
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
1.0,
|
||||
2.0,
|
||||
54.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.9129111766815186
|
||||
},
|
||||
@ -1140,11 +1400,16 @@ async fn experimental_feature_vector_store() {
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100.0,
|
||||
340.0,
|
||||
90.0
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-100.0,
|
||||
340.0,
|
||||
90.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.8106412887573242
|
||||
},
|
||||
@ -1152,11 +1417,16 @@ async fn experimental_feature_vector_store() {
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100.0,
|
||||
231.0,
|
||||
32.0
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-100.0,
|
||||
231.0,
|
||||
32.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.7412010431289673
|
||||
},
|
||||
@ -1164,11 +1434,16 @@ async fn experimental_feature_vector_store() {
|
||||
"title": "Escape Room",
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
10.0,
|
||||
-23.0,
|
||||
32.0
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
10.0,
|
||||
-23.0,
|
||||
32.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.6972063183784485
|
||||
}
|
||||
|
@ -0,0 +1,20 @@
|
||||
---
|
||||
source: meilisearch/tests/search/distinct.rs
|
||||
---
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "tamo",
|
||||
"status": "succeeded",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"filterableAttributes": [
|
||||
"color.main"
|
||||
]
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
---
|
||||
source: meilisearch/tests/search/errors.rs
|
||||
---
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "tamo",
|
||||
"status": "succeeded",
|
||||
"type": "indexCreation",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"primaryKey": null
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
@ -87,6 +87,68 @@ async fn similar_bad_id() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_bad_ranking_score_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`",
|
||||
"code": "invalid_similar_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_invalid_ranking_score_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_similar_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_invalid_id() {
|
||||
let server = Server::new().await;
|
||||
@ -694,3 +756,54 @@ async fn filter_reserved_geo_point_string() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_bad_retrieve_vectors() {
|
||||
let server = Server::new().await;
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_similar_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`",
|
||||
"code": "invalid_similar_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("?retrieveVectors=").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`",
|
||||
"code": "invalid_similar_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("?retrieveVectors=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`",
|
||||
"code": "invalid_similar_retrieve_vectors",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ async fn basic() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143}), |response, code| {
|
||||
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
@ -87,11 +87,16 @@ async fn basic() {
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -99,11 +104,16 @@ async fn basic() {
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -111,11 +121,16 @@ async fn basic() {
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -123,11 +138,16 @@ async fn basic() {
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.800000011920929,
|
||||
0.4000000059604645,
|
||||
-0.5
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -136,7 +156,7 @@ async fn basic() {
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": "299537"}), |response, code| {
|
||||
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
@ -145,11 +165,16 @@ async fn basic() {
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -157,11 +182,16 @@ async fn basic() {
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.800000011920929,
|
||||
0.4000000059604645,
|
||||
-0.5
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -169,11 +199,16 @@ async fn basic() {
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -181,11 +216,16 @@ async fn basic() {
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-0.5,
|
||||
0.3,
|
||||
0.85
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-0.5,
|
||||
0.30000001192092896,
|
||||
0.8500000238418579
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -194,6 +234,285 @@ async fn basic() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn ranking_score_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.39060014486312866
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.2819308042526245
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.800000011920929,
|
||||
0.4000000059604645,
|
||||
-0.5
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.1662663221359253
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.39060014486312866
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.2819308042526245
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.39060014486312866
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter() {
|
||||
let server = Server::new().await;
|
||||
@ -227,71 +546,97 @@ async fn filter() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.800000011920929,
|
||||
0.4000000059604645,
|
||||
-0.5
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "All Quiet on the Western Front",
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-0.5,
|
||||
0.3,
|
||||
0.85
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "All Quiet on the Western Front",
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-0.5,
|
||||
0.30000001192092896,
|
||||
0.8500000238418579
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@ -328,7 +673,7 @@ async fn limit_and_offset() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143, "limit": 1}), |response, code| {
|
||||
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
@ -337,11 +682,16 @@ async fn limit_and_offset() {
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -350,24 +700,32 @@ async fn limit_and_offset() {
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ mod errors;
|
||||
mod webhook;
|
||||
|
||||
use meili_snap::insta::assert_json_snapshot;
|
||||
use meili_snap::snapshot;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@ -738,11 +739,9 @@ async fn test_summarized_index_creation() {
|
||||
async fn test_summarized_index_deletion() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.delete().await;
|
||||
index.wait_task(0).await;
|
||||
let (task, _) = index.get_task(0).await;
|
||||
assert_json_snapshot!(task,
|
||||
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
|
||||
let (ret, _code) = index.delete().await;
|
||||
let task = index.wait_task(ret.uid()).await;
|
||||
snapshot!(task,
|
||||
@r###"
|
||||
{
|
||||
"uid": 0,
|
||||
@ -767,12 +766,34 @@ async fn test_summarized_index_deletion() {
|
||||
"###);
|
||||
|
||||
// is the details correctly set when documents are actually deleted.
|
||||
index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
|
||||
index.delete().await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = index.get_task(2).await;
|
||||
assert_json_snapshot!(task,
|
||||
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
|
||||
// /!\ We need to wait for the document addition to be processed otherwise, if the test runs too slow,
|
||||
// both tasks may get autobatched and the deleted documents count will be wrong.
|
||||
let (ret, _code) =
|
||||
index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
|
||||
let task = index.wait_task(ret.uid()).await;
|
||||
snapshot!(task,
|
||||
@r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (ret, _code) = index.delete().await;
|
||||
let task = index.wait_task(ret.uid()).await;
|
||||
snapshot!(task,
|
||||
@r###"
|
||||
{
|
||||
"uid": 2,
|
||||
@ -792,22 +813,25 @@ async fn test_summarized_index_deletion() {
|
||||
"###);
|
||||
|
||||
// What happens when you delete an index that doesn't exists.
|
||||
index.delete().await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = index.get_task(2).await;
|
||||
assert_json_snapshot!(task,
|
||||
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
|
||||
let (ret, _code) = index.delete().await;
|
||||
let task = index.wait_task(ret.uid()).await;
|
||||
snapshot!(task,
|
||||
@r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"uid": 3,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"status": "failed",
|
||||
"type": "indexDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"deletedDocuments": 1
|
||||
"deletedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
|
588
meilisearch/tests/vector/mod.rs
Normal file
588
meilisearch/tests/vector/mod.rs
Normal file
@ -0,0 +1,588 @@
|
||||
mod settings;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use crate::common::index::Index;
|
||||
use crate::common::{GetAllDocumentsOptions, Server};
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_remove_user_provided() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [10, 10, 10] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": null }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
10.0,
|
||||
10.0,
|
||||
10.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
|
||||
let (value, code) = index.delete_document(0).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
async fn generate_default_user_provided_documents(server: &Server) -> Index {
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }},
|
||||
{"id": 2, "name": "billou", "_vectors": { "manual": [[2, 2, 2], [2, 2, 3]] }},
|
||||
{"id": 3, "name": "intel", "_vectors": { "manual": { "regenerate": false, "embeddings": [3, 3, 3] }}},
|
||||
{"id": 4, "name": "max", "_vectors": { "manual": { "regenerate": false, "embeddings": [[4, 4, 4], [4, 4, 5]] }}},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn user_provided_embeddings_error() {
|
||||
let server = Server::new().await;
|
||||
let index = generate_default_user_provided_documents(&server).await;
|
||||
|
||||
// First case, we forget to specify the `regenerate`
|
||||
let documents =
|
||||
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Second case, we don't specify anything
|
||||
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 3,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Third case, we specify something wrong in place of regenerate
|
||||
let documents =
|
||||
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 4,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let documents =
|
||||
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 5,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let documents =
|
||||
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 6,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let documents =
|
||||
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 7,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task["status"], @r###""succeeded""###);
|
||||
|
||||
let documents =
|
||||
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task["status"], @r###""succeeded""###);
|
||||
|
||||
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 10,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 11,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}});
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": 12,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn clear_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = generate_default_user_provided_documents(&server).await;
|
||||
|
||||
let (value, _code) = index.clear_all_documents().await;
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
// Make sure the documents DB has been cleared
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 0
|
||||
}
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||
snapshot!(documents, @r###"
|
||||
{
|
||||
"hits": [],
|
||||
"query": "",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 0,
|
||||
"semanticHitCount": 0
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_remove_one_vector_4588() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/4588
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = server.wait_task(response.uid()).await;
|
||||
snapshot!(task, name: "settings-processed");
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, name: "document-added");
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": null }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, name: "document-deleted");
|
||||
|
||||
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
|
||||
snapshot!(documents, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir"
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"semanticHitCount": 1
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"###);
|
||||
}
|
228
meilisearch/tests/vector/settings.rs
Normal file
228
meilisearch/tests/vector/settings.rs
Normal file
@ -0,0 +1,228 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use crate::common::{GetAllDocumentsOptions, Server};
|
||||
use crate::json;
|
||||
use crate::vector::generate_default_user_provided_documents;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_embedder() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": { "manual": {}},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 2,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let ret = server.wait_task(response.uid()).await;
|
||||
snapshot!(ret, @r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 2
|
||||
}
|
||||
}
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn reset_embedder_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = generate_default_user_provided_documents(&server).await;
|
||||
|
||||
let (response, code) = index.delete_settings().await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
// Make sure the documents are still present
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
limit: None,
|
||||
offset: None,
|
||||
retrieve_vectors: false,
|
||||
fields: None,
|
||||
})
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "billou"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "intel"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "max"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 5
|
||||
}
|
||||
"###);
|
||||
|
||||
// Make sure we are still able to retrieve their vectors
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "billou",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
2.0
|
||||
],
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
3.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "intel",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
3.0,
|
||||
3.0,
|
||||
3.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "max",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
4.0,
|
||||
4.0,
|
||||
4.0
|
||||
],
|
||||
[
|
||||
4.0,
|
||||
4.0,
|
||||
5.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 5
|
||||
}
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"message": "Cannot find embedder with name `default`.",
|
||||
"code": "invalid_embedder",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_embedder"
|
||||
}
|
||||
"###);
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
---
|
||||
source: meilisearch/tests/vector/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
---
|
||||
source: meilisearch/tests/vector/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
---
|
||||
source: meilisearch/tests/vector/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3
|
||||
}
|
||||
}
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.9.0"
|
||||
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.8.10", default-features = false }
|
||||
charabia = { version = "0.8.11", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.11"
|
||||
deserr = "0.6.1"
|
||||
@ -44,7 +44,7 @@ once_cell = "1.19.0"
|
||||
ordered-float = "4.2.0"
|
||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||
rayon = "1.8.0"
|
||||
roaring = "0.10.2"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
rstar = { version = "0.11.0", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
@ -71,15 +71,15 @@ csv = "1.3.0"
|
||||
candle-core = { version = "0.4.1" }
|
||||
candle-transformers = { version = "0.4.1" }
|
||||
candle-nn = { version = "0.4.1" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default_features = false, features = [
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
|
||||
"onig",
|
||||
] }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
tiktoken-rs = "0.5.8"
|
||||
liquid = "0.26.4"
|
||||
arroy = "0.3.1"
|
||||
arroy = { git = "https://github.com/meilisearch/arroy", branch = "binary-quantization" }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
ureq = { version = "2.9.7", features = ["json"] }
|
||||
|
@ -59,6 +59,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
false,
|
||||
universe,
|
||||
&None,
|
||||
&None,
|
||||
GeoSortStrategy::default(),
|
||||
0,
|
||||
20,
|
||||
@ -66,6 +67,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
&mut DefaultSearchLogger,
|
||||
logger,
|
||||
TimeBudget::max(),
|
||||
None,
|
||||
)?;
|
||||
if let Some((logger, dir)) = detailed_logger {
|
||||
logger.finish(&mut ctx, Path::new(dir))?;
|
||||
|
@ -166,7 +166,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserE
|
||||
Some(s) => Ok(s.to_string()),
|
||||
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
|
||||
},
|
||||
Value::Number(number) if number.is_i64() => Ok(number.to_string()),
|
||||
Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
|
||||
content => Err(UserError::InvalidDocumentId { document_id: content }),
|
||||
}
|
||||
}
|
||||
|
@ -119,6 +119,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
InvalidVectorDimensions { expected: usize, found: usize },
|
||||
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
|
||||
InvalidVectorsMapType { document_id: String, value: Value },
|
||||
#[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
|
||||
InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError },
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
|
||||
@ -134,6 +136,17 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
}
|
||||
)]
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured filterable attributes.".to_string(),
|
||||
false => format!("Available filterable attributes are: `{}{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
#[error("Attribute `{}` is not facet-searchable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
@ -270,8 +283,9 @@ impl From<arroy::Error> for Error {
|
||||
arroy::Error::DatabaseFull
|
||||
| arroy::Error::InvalidItemAppend
|
||||
| arroy::Error::UnmatchingDistance { .. }
|
||||
| arroy::Error::MissingNode
|
||||
| arroy::Error::MissingMetadata => {
|
||||
| arroy::Error::NeedBuild(_)
|
||||
| arroy::Error::MissingKey { .. }
|
||||
| arroy::Error::MissingMetadata(_) => {
|
||||
Error::InternalError(InternalError::ArroyError(value))
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::{FieldId, FieldsIdsMap, Weight};
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
@ -23,7 +24,13 @@ impl FieldidsWeightsMap {
|
||||
/// Should only be called in the case there are NO searchable attributes.
|
||||
/// All the fields will be inserted in the order of the fields ids map with a weight of 0.
|
||||
pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
|
||||
FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() }
|
||||
FieldidsWeightsMap {
|
||||
map: fid_map
|
||||
.iter()
|
||||
.filter(|(_fid, name)| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
|
||||
.map(|(fid, _name)| (fid, 0))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a field id from the map, returning the associated weight previously in the map.
|
||||
|
@ -41,6 +41,16 @@ impl FieldsIdsMap {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the ids of a field and all its nested fields based on its name.
|
||||
pub fn nested_ids(&self, name: &str) -> Vec<FieldId> {
|
||||
self.names_ids
|
||||
.range(name.to_string()..)
|
||||
.take_while(|(key, _)| key.starts_with(name))
|
||||
.filter(|(key, _)| crate::is_faceted_by(key, name))
|
||||
.map(|(_name, id)| *id)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get the id of a field based on its name.
|
||||
pub fn id(&self, name: &str) -> Option<FieldId> {
|
||||
self.names_ids.get(name).copied()
|
||||
@ -126,4 +136,32 @@ mod tests {
|
||||
assert_eq!(iter.next(), Some((3, "title")));
|
||||
assert_eq!(iter.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nested_fields() {
|
||||
let mut map = FieldsIdsMap::new();
|
||||
|
||||
assert_eq!(map.insert("id"), Some(0));
|
||||
assert_eq!(map.insert("doggo"), Some(1));
|
||||
assert_eq!(map.insert("doggo.name"), Some(2));
|
||||
assert_eq!(map.insert("doggolution"), Some(3));
|
||||
assert_eq!(map.insert("doggo.breed.name"), Some(4));
|
||||
assert_eq!(map.insert("description"), Some(5));
|
||||
|
||||
insta::assert_debug_snapshot!(map.nested_ids("doggo"), @r###"
|
||||
[
|
||||
1,
|
||||
4,
|
||||
2,
|
||||
]
|
||||
"###);
|
||||
|
||||
insta::assert_debug_snapshot!(map.nested_ids("doggo.breed"), @r###"
|
||||
[
|
||||
4,
|
||||
]
|
||||
"###);
|
||||
|
||||
insta::assert_debug_snapshot!(map.nested_ids("_vector"), @"[]");
|
||||
}
|
||||
}
|
||||
|
@ -47,6 +47,12 @@ pub struct FacetGroupValue {
|
||||
pub bitmap: RoaringBitmap,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FacetGroupLazyValue<'b> {
|
||||
pub size: u8,
|
||||
pub bitmap_bytes: &'b [u8],
|
||||
}
|
||||
|
||||
pub struct FacetGroupKeyCodec<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
@ -69,6 +75,7 @@ where
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||
where
|
||||
T: BytesDecode<'a>,
|
||||
@ -84,6 +91,7 @@ where
|
||||
}
|
||||
|
||||
pub struct FacetGroupValueCodec;
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
@ -93,11 +101,23 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
Ok(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetGroupLazyValueCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
|
||||
type DItem = FacetGroupLazyValue<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io;
|
||||
use std::io::{self, Cursor};
|
||||
use std::mem::size_of;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn intersection_with_serialized(
|
||||
mut bytes: &[u8],
|
||||
other: &RoaringBitmap,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// See above `deserialize_from` method for implementation details.
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||
if other.contains(integer) {
|
||||
bitmap.insert(integer);
|
||||
}
|
||||
}
|
||||
Ok(bitmap)
|
||||
} else {
|
||||
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||
///
|
||||
/// if the merged values length is under the threshold, values are directly
|
||||
|
@ -9,6 +9,7 @@ use heed::types::*;
|
||||
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::documents::PrimaryKey;
|
||||
@ -23,6 +24,7 @@ use crate::heed_codec::{
|
||||
};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::vector::{Embedding, EmbeddingConfig};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
@ -167,7 +169,7 @@ pub struct Index {
|
||||
/// Maps an embedder name to its id in the arroy store.
|
||||
pub embedder_category_id: Database<Str, U8>,
|
||||
/// Vector store based on arroy™.
|
||||
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
|
||||
pub vector_arroy: arroy::Database<arroy::distances::BinaryQuantizedEuclidean>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||
@ -644,6 +646,7 @@ impl Index {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
user_fields: &[&str],
|
||||
non_searchable_fields_ids: &[FieldId],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
) -> Result<()> {
|
||||
// We can write the user defined searchable fields as-is.
|
||||
@ -662,6 +665,7 @@ impl Index {
|
||||
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||
if crate::is_faceted_by(field_from_map, user_field)
|
||||
&& !real_fields.contains(&field_from_map)
|
||||
&& !non_searchable_fields_ids.contains(&id)
|
||||
{
|
||||
real_fields.push(field_from_map);
|
||||
|
||||
@ -708,6 +712,7 @@ impl Index {
|
||||
Ok(self
|
||||
.fields_ids_map(rtxn)?
|
||||
.names()
|
||||
.filter(|name| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
|
||||
.map(|field| Cow::Owned(field.to_string()))
|
||||
.collect())
|
||||
})
|
||||
@ -1568,12 +1573,16 @@ impl Index {
|
||||
Ok(script_language)
|
||||
}
|
||||
|
||||
/// Put the embedding configs:
|
||||
/// 1. The name of the embedder
|
||||
/// 2. The configuration option for this embedder
|
||||
/// 3. The list of documents with a user provided embedding
|
||||
pub(crate) fn put_embedding_configs(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
configs: Vec<(String, EmbeddingConfig)>,
|
||||
configs: Vec<IndexEmbeddingConfig>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>().put(
|
||||
self.main.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>().put(
|
||||
wtxn,
|
||||
main_key::EMBEDDING_CONFIGS,
|
||||
&configs,
|
||||
@ -1584,13 +1593,10 @@ impl Index {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS)
|
||||
}
|
||||
|
||||
pub fn embedding_configs(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
) -> Result<Vec<(String, crate::vector::EmbeddingConfig)>> {
|
||||
pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> Result<Vec<IndexEmbeddingConfig>> {
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>()
|
||||
.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>()
|
||||
.get(rtxn, main_key::EMBEDDING_CONFIGS)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
@ -1599,12 +1605,13 @@ impl Index {
|
||||
&'a self,
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
embedder_id: u8,
|
||||
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::Angular>>> + 'a {
|
||||
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::BinaryQuantizedEuclidean>>> + 'a
|
||||
{
|
||||
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
||||
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e.into()),
|
||||
})
|
||||
.transpose()
|
||||
@ -1637,7 +1644,7 @@ impl Index {
|
||||
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose();
|
||||
@ -1662,6 +1669,13 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct IndexEmbeddingConfig {
|
||||
pub name: String,
|
||||
pub config: EmbeddingConfig,
|
||||
pub user_provided: RoaringBitmap,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use std::collections::HashSet;
|
||||
@ -1669,15 +1683,17 @@ pub(crate) mod tests {
|
||||
|
||||
use big_s::S;
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use maplit::hashset;
|
||||
use maplit::{btreemap, hashset};
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::documents::DocumentsBatchReader;
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::update::{
|
||||
self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||
self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
|
||||
Settings,
|
||||
};
|
||||
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
|
||||
use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult};
|
||||
|
||||
pub(crate) struct TempIndex {
|
||||
@ -2783,4 +2799,95 @@ pub(crate) mod tests {
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vectors_are_never_indexed_as_searchable_or_filterable() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "_vectors": { "doggo": [2345] } },
|
||||
{ "id": 1, "_vectors": { "doggo": [6789] } },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["id"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("2345").execute().unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
drop(rtxn);
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
|
||||
settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @"[]");
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("2345").execute().unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search
|
||||
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
|
||||
.execute()
|
||||
.unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_embedder_settings(btreemap! {
|
||||
S("doggo") => Setting::Set(EmbeddingSettings {
|
||||
dimensions: Setting::Set(1),
|
||||
source: Setting::Set(EmbedderSource::UserProvided),
|
||||
..EmbeddingSettings::default()}),
|
||||
});
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @"[]");
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("2345").execute().unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search
|
||||
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
|
||||
.execute()
|
||||
.unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
}
|
||||
}
|
||||
|
@ -6,9 +6,11 @@ use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::DocumentId;
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId};
|
||||
|
||||
/// Call the given closure on the facet distribution of the candidate documents.
|
||||
///
|
||||
@ -31,14 +33,11 @@ pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
|
||||
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
|
||||
let highest_level = get_highest_level(
|
||||
rtxn,
|
||||
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
field_id,
|
||||
)?;
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||
Ok(())
|
||||
} else {
|
||||
@ -75,13 +74,10 @@ where
|
||||
|
||||
// Represents the list of keys that we must explore.
|
||||
let mut heap = BinaryHeap::new();
|
||||
let highest_level = get_highest_level(
|
||||
rtxn,
|
||||
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
field_id,
|
||||
)?;
|
||||
let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
// We first fill the heap with values from the highest level
|
||||
let starting_key =
|
||||
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
@ -92,7 +88,10 @@ where
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = value.bitmap & candidates;
|
||||
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
let count = intersection.len();
|
||||
if count != 0 {
|
||||
heap.push(LevelEntry {
|
||||
@ -121,7 +120,10 @@ where
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = value.bitmap & candidates;
|
||||
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
let count = intersection.len();
|
||||
if count != 0 {
|
||||
heap.push(LevelEntry {
|
||||
@ -146,7 +148,7 @@ where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
|
||||
field_id: u16,
|
||||
callback: CB,
|
||||
}
|
||||
@ -171,7 +173,10 @@ where
|
||||
if key.field_id != self.field_id {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
let docids_in_common = value.bitmap & candidates;
|
||||
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
if !docids_in_common.is_empty() {
|
||||
let any_docid_in_common = docids_in_common.min().unwrap();
|
||||
match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)?
|
||||
@ -205,7 +210,10 @@ where
|
||||
if key.field_id != self.field_id {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
let docids_in_common = value.bitmap & candidates;
|
||||
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
if !docids_in_common.is_empty() {
|
||||
let cf = self.iterate(
|
||||
&docids_in_common,
|
||||
|
@ -4,9 +4,11 @@ use heed::BytesEncode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::Result;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
/// Find all the document ids for which the given field contains a value contained within
|
||||
/// the two bounds.
|
||||
@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||
field_id: u16,
|
||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
docids: &mut RoaringBitmap,
|
||||
) -> Result<()>
|
||||
where
|
||||
@ -46,13 +49,15 @@ where
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||
let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(starting_left_bound) =
|
||||
get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
|
||||
{
|
||||
let rightmost_bound =
|
||||
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
let group_size = usize::MAX;
|
||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||
Ok(())
|
||||
@ -64,12 +69,16 @@ where
|
||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
|
||||
field_id: u16,
|
||||
left: Bound<&'b [u8]>,
|
||||
right: Bound<&'b [u8]>,
|
||||
/// The subset of documents ids that are useful for this search.
|
||||
/// Great performance optimizations can be achieved by only fetching values matching this subset.
|
||||
universe: Option<&'bitmap RoaringBitmap>,
|
||||
docids: &'bitmap mut RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
||||
let left_key =
|
||||
@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
}
|
||||
|
||||
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
|
||||
*self.docids |= value.bitmap;
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||
};
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
left_condition && right_condition
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= &previous_value.bitmap;
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
previous_key = next_key;
|
||||
previous_value = next_value;
|
||||
continue;
|
||||
@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
left_condition && right_condition
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= &previous_value.bitmap;
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
} else {
|
||||
let level = level - 1;
|
||||
let starting_left_bound = previous_key.left_bound;
|
||||
@ -365,6 +392,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -384,6 +412,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -418,6 +447,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -439,6 +469,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -474,6 +505,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -499,6 +531,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -537,6 +570,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -556,6 +590,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -571,6 +606,7 @@ mod tests {
|
||||
0,
|
||||
&Bound::Unbounded,
|
||||
&Bound::Unbounded,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -586,6 +622,7 @@ mod tests {
|
||||
1,
|
||||
&Bound::Unbounded,
|
||||
&Bound::Unbounded,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -621,6 +658,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -634,6 +672,7 @@ mod tests {
|
||||
1,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
|
@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>(
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||
|
||||
|
@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>(
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||
let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
|
||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||
Ok(itertools::Either::Left(DescendingFacetSort {
|
||||
|
@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||
use roaring::RoaringBitmap;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use serde_json::Value;
|
||||
|
||||
use super::facet_range_search;
|
||||
@ -224,14 +224,14 @@ impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields, None)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
field_id: FieldId,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
operator: &Condition<'a>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let numbers_db = index.facet_id_f64_docids;
|
||||
@ -291,14 +291,22 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
Condition::NotEqual(val) => {
|
||||
let operator = Condition::Equal(val.clone());
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
return Ok(all_ids - docids);
|
||||
}
|
||||
};
|
||||
|
||||
let mut output = RoaringBitmap::new();
|
||||
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
numbers_db,
|
||||
field_id,
|
||||
left,
|
||||
right,
|
||||
universe,
|
||||
&mut output,
|
||||
)?;
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
@ -310,6 +318,7 @@ impl<'a> Filter<'a> {
|
||||
field_id: FieldId,
|
||||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
match (left, right) {
|
||||
@ -321,7 +330,7 @@ impl<'a> Filter<'a> {
|
||||
(_, _) => (),
|
||||
}
|
||||
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
rtxn, db, field_id, &left, &right, output,
|
||||
rtxn, db, field_id, &left, &right, universe, output,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
@ -332,31 +341,37 @@ impl<'a> Filter<'a> {
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
filterable_fields: &HashSet<String>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
if universe.map_or(false, |u| u.is_empty()) {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
|
||||
match &self.condition {
|
||||
FilterCondition::Not(f) => {
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
let selected = Self::inner_evaluate(
|
||||
&(f.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
Ok(all_ids - selected)
|
||||
match universe {
|
||||
Some(universe) => Ok(universe - selected),
|
||||
None => {
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
Ok(all_ids - selected)
|
||||
}
|
||||
}
|
||||
}
|
||||
FilterCondition::In { fid, els } => {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
|
||||
for el in els {
|
||||
let op = Condition::Equal(el.clone());
|
||||
let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
|
||||
bitmap |= el_bitmap;
|
||||
}
|
||||
Ok(bitmap)
|
||||
els.iter()
|
||||
.map(|el| Condition::Equal(el.clone()))
|
||||
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
|
||||
.union()
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
@ -371,7 +386,7 @@ impl<'a> Filter<'a> {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
Self::evaluate_operator(rtxn, index, fid, op)
|
||||
Self::evaluate_operator(rtxn, index, fid, universe, op)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
@ -382,14 +397,11 @@ impl<'a> Filter<'a> {
|
||||
}))?
|
||||
}
|
||||
}
|
||||
FilterCondition::Or(subfilters) => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
for f in subfilters {
|
||||
bitmap |=
|
||||
Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
|
||||
}
|
||||
Ok(bitmap)
|
||||
}
|
||||
FilterCondition::Or(subfilters) => subfilters
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
|
||||
.union(),
|
||||
FilterCondition::And(subfilters) => {
|
||||
let mut subfilters_iter = subfilters.iter();
|
||||
if let Some(first_subfilter) = subfilters_iter.next() {
|
||||
@ -398,16 +410,21 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
for f in subfilters_iter {
|
||||
if bitmap.is_empty() {
|
||||
return Ok(bitmap);
|
||||
}
|
||||
// TODO We are doing the intersections two times,
|
||||
// it could be more efficient
|
||||
// Can't I just replace this `&=` by an `=`?
|
||||
bitmap &= Self::inner_evaluate(
|
||||
&(f.clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
Some(&bitmap),
|
||||
)?;
|
||||
}
|
||||
Ok(bitmap)
|
||||
@ -507,6 +524,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
let geo_lng_token = Token::new(
|
||||
@ -539,6 +557,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
let condition_right = FilterCondition::Condition {
|
||||
@ -552,6 +571,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
left | right
|
||||
@ -567,6 +587,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?
|
||||
};
|
||||
|
||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::{BadGeoError, Filter};
|
||||
pub use self::search::{FacetValueHit, SearchForFacetValues};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{Index, Result};
|
||||
|
||||
@ -54,9 +54,9 @@ pub fn facet_max_value<'t>(
|
||||
}
|
||||
|
||||
/// Get the first facet value in the facet database
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -78,9 +78,9 @@ where
|
||||
}
|
||||
|
||||
/// Get the last facet value in the facet database
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -102,9 +102,9 @@ where
|
||||
}
|
||||
|
||||
/// Get the height of the highest level in the facet database
|
||||
pub(crate) fn get_highest_level<'t>(
|
||||
pub(crate) fn get_highest_level<'t, DC>(
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<u8> {
|
||||
let field_id_prefix = &field_id.to_be_bytes();
|
||||
|
@ -17,6 +17,7 @@ struct ScoreWithRatioResult {
|
||||
|
||||
type ScoreWithRatio = (Vec<ScoreDetails>, f32);
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
|
||||
fn compare_scores(
|
||||
&(ref left_scores, left_ratio): &ScoreWithRatio,
|
||||
&(ref right_scores, right_ratio): &ScoreWithRatio,
|
||||
@ -84,6 +85,7 @@ impl ScoreWithRatioResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
|
||||
fn merge(
|
||||
vector_results: Self,
|
||||
keyword_results: Self,
|
||||
@ -150,6 +152,7 @@ impl ScoreWithRatioResult {
|
||||
}
|
||||
|
||||
impl<'a> Search<'a> {
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
|
||||
pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option<u32>)> {
|
||||
// TODO: find classier way to achieve that than to reset vector and query params
|
||||
// create separate keyword and semantic searches
|
||||
@ -159,6 +162,7 @@ impl<'a> Search<'a> {
|
||||
offset: 0,
|
||||
limit: self.limit + self.offset,
|
||||
sort_criteria: self.sort_criteria.clone(),
|
||||
distinct: self.distinct.clone(),
|
||||
searchable_attributes: self.searchable_attributes,
|
||||
geo_strategy: self.geo_strategy,
|
||||
terms_matching_strategy: self.terms_matching_strategy,
|
||||
@ -169,6 +173,7 @@ impl<'a> Search<'a> {
|
||||
index: self.index,
|
||||
semantic: self.semantic.clone(),
|
||||
time_budget: self.time_budget.clone(),
|
||||
ranking_score_threshold: self.ranking_score_threshold,
|
||||
};
|
||||
|
||||
let semantic = search.semantic.take();
|
||||
@ -176,22 +181,25 @@ impl<'a> Search<'a> {
|
||||
|
||||
// completely skip semantic search if the results of the keyword search are good enough
|
||||
if self.results_good_enough(&keyword_results, semantic_ratio) {
|
||||
return Ok((keyword_results, Some(0)));
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
}
|
||||
|
||||
// no vector search against placeholder search
|
||||
let Some(query) = search.query.take() else {
|
||||
return Ok((keyword_results, Some(0)));
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
};
|
||||
// no embedder, no semantic search
|
||||
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
||||
return Ok((keyword_results, Some(0)));
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
};
|
||||
|
||||
let vector_query = match vector {
|
||||
Some(vector_query) => vector_query,
|
||||
None => {
|
||||
// attempt to embed the vector
|
||||
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
|
||||
let _entered = span.enter();
|
||||
|
||||
match embedder.embed_one(query) {
|
||||
Ok(embedding) => embedding,
|
||||
Err(error) => {
|
||||
@ -237,3 +245,44 @@ impl<'a> Search<'a> {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
fn return_keyword_results(
|
||||
limit: usize,
|
||||
offset: usize,
|
||||
SearchResult {
|
||||
matching_words,
|
||||
candidates,
|
||||
mut documents_ids,
|
||||
mut document_scores,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
}: SearchResult,
|
||||
) -> (SearchResult, Option<u32>) {
|
||||
let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
|
||||
// technically redudant because documents_ids.len() == document_scores.len(),
|
||||
// defensive programming
|
||||
offset >= document_scores.len()
|
||||
{
|
||||
(vec![], vec![])
|
||||
} else {
|
||||
// PANICS: offset < len
|
||||
documents_ids.rotate_left(offset);
|
||||
documents_ids.truncate(limit);
|
||||
|
||||
// PANICS: offset < len
|
||||
document_scores.rotate_left(offset);
|
||||
document_scores.truncate(limit);
|
||||
(documents_ids, document_scores)
|
||||
};
|
||||
(
|
||||
SearchResult {
|
||||
matching_words,
|
||||
candidates,
|
||||
documents_ids,
|
||||
document_scores,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
},
|
||||
Some(0),
|
||||
)
|
||||
}
|
||||
|
@ -11,8 +11,8 @@ use self::new::{execute_vector_search, PartialSearchResult};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::vector::Embedder;
|
||||
use crate::{
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
|
||||
SearchContext, TimeBudget,
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index,
|
||||
Result, SearchContext, TimeBudget, UserError,
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
@ -40,6 +40,7 @@ pub struct Search<'a> {
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
distinct: Option<String>,
|
||||
searchable_attributes: Option<&'a [String]>,
|
||||
geo_strategy: new::GeoSortStrategy,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
@ -50,6 +51,7 @@ pub struct Search<'a> {
|
||||
index: &'a Index,
|
||||
semantic: Option<SemanticSearch>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
}
|
||||
|
||||
impl<'a> Search<'a> {
|
||||
@ -60,6 +62,7 @@ impl<'a> Search<'a> {
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
sort_criteria: None,
|
||||
distinct: None,
|
||||
searchable_attributes: None,
|
||||
geo_strategy: new::GeoSortStrategy::default(),
|
||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||
@ -70,6 +73,7 @@ impl<'a> Search<'a> {
|
||||
index,
|
||||
semantic: None,
|
||||
time_budget: TimeBudget::max(),
|
||||
ranking_score_threshold: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,6 +107,11 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> {
|
||||
self.distinct = Some(distinct);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
|
||||
self.searchable_attributes = Some(searchable);
|
||||
self
|
||||
@ -146,6 +155,11 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Search<'a> {
|
||||
self.ranking_score_threshold = Some(ranking_score_threshold);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||
if has_vector_search {
|
||||
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||
@ -162,6 +176,19 @@ impl<'a> Search<'a> {
|
||||
ctx.attributes_to_search_on(searchable_attributes)?;
|
||||
}
|
||||
|
||||
if let Some(distinct) = &self.distinct {
|
||||
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
|
||||
if !crate::is_faceted(distinct, &filterable_fields) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
|
||||
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
|
||||
field: distinct.clone(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
|
||||
let PartialSearchResult {
|
||||
located_query_terms,
|
||||
@ -178,12 +205,14 @@ impl<'a> Search<'a> {
|
||||
self.scoring_strategy,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.offset,
|
||||
self.limit,
|
||||
embedder_name,
|
||||
embedder,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?
|
||||
}
|
||||
_ => execute_search(
|
||||
@ -194,6 +223,7 @@ impl<'a> Search<'a> {
|
||||
self.exhaustive_number_hits,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.offset,
|
||||
self.limit,
|
||||
@ -201,6 +231,7 @@ impl<'a> Search<'a> {
|
||||
&mut DefaultSearchLogger,
|
||||
&mut DefaultSearchLogger,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?,
|
||||
};
|
||||
|
||||
@ -229,6 +260,7 @@ impl fmt::Debug for Search<'_> {
|
||||
offset,
|
||||
limit,
|
||||
sort_criteria,
|
||||
distinct,
|
||||
searchable_attributes,
|
||||
geo_strategy: _,
|
||||
terms_matching_strategy,
|
||||
@ -239,6 +271,7 @@ impl fmt::Debug for Search<'_> {
|
||||
index: _,
|
||||
semantic,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
f.debug_struct("Search")
|
||||
.field("query", query)
|
||||
@ -247,6 +280,7 @@ impl fmt::Debug for Search<'_> {
|
||||
.field("offset", offset)
|
||||
.field("limit", limit)
|
||||
.field("sort_criteria", sort_criteria)
|
||||
.field("distinct", distinct)
|
||||
.field("searchable_attributes", searchable_attributes)
|
||||
.field("terms_matching_strategy", terms_matching_strategy)
|
||||
.field("scoring_strategy", scoring_strategy)
|
||||
@ -257,6 +291,7 @@ impl fmt::Debug for Search<'_> {
|
||||
&semantic.as_ref().map(|semantic| &semantic.embedder_name),
|
||||
)
|
||||
.field("time_budget", time_budget)
|
||||
.field("ranking_score_threshold", ranking_score_threshold)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@ -336,4 +371,28 @@ mod test {
|
||||
|
||||
assert_eq!(documents_ids, vec![1]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "korean")]
|
||||
#[test]
|
||||
fn test_hangul_language_detection() {
|
||||
use crate::index::tests::TempIndex;
|
||||
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||
{ "id": 1, "title": "김밥먹을래。" },
|
||||
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let txn = index.write_txn().unwrap();
|
||||
let mut search = Search::new(&txn, &index);
|
||||
|
||||
search.query("김밥");
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
assert_eq!(documents_ids, vec![1]);
|
||||
}
|
||||
}
|
||||
|
@ -22,18 +22,25 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
|
||||
query: &Q,
|
||||
distinct: Option<&str>,
|
||||
universe: &RoaringBitmap,
|
||||
from: usize,
|
||||
length: usize,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
) -> Result<BucketSortOutput> {
|
||||
logger.initial_query(query);
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
logger.initial_universe(universe);
|
||||
|
||||
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? {
|
||||
let distinct_field = match distinct {
|
||||
Some(distinct) => Some(distinct),
|
||||
None => ctx.index.distinct_field(ctx.txn)?,
|
||||
};
|
||||
|
||||
let distinct_fid = if let Some(field) = distinct_field {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.id(field)
|
||||
} else {
|
||||
None
|
||||
@ -164,7 +171,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
loop {
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
maybe_add_to_results!(bucket);
|
||||
|
||||
ranking_rule_scores.pop();
|
||||
|
||||
if cur_ranking_rule_index == 0 {
|
||||
@ -194,9 +213,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
continue;
|
||||
}
|
||||
|
||||
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
|
||||
let entered = span.enter();
|
||||
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
|
||||
ctx,
|
||||
logger,
|
||||
@ -206,7 +222,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
back!();
|
||||
continue;
|
||||
};
|
||||
drop(entered);
|
||||
|
||||
ranking_rule_scores.push(next_bucket.score);
|
||||
|
||||
@ -220,6 +235,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
debug_assert!(
|
||||
ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates)
|
||||
);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -=
|
||||
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
|
||||
|
||||
if cur_ranking_rule_index == ranking_rules_len - 1
|
||||
|
@ -27,6 +27,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
|
||||
"exact_attribute".to_owned()
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
@ -38,6 +39,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
@ -51,6 +53,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
|
||||
fn end_iteration(
|
||||
&mut self,
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
|
@ -209,6 +209,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
"geo_sort".to_owned()
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
@ -234,6 +235,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
|
||||
#[allow(clippy::only_used_in_recursion)]
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
@ -285,6 +287,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
self.next_bucket(ctx, logger, universe)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
|
||||
fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger<Q>) {
|
||||
// we do not reset the rtree here, it could be used in a next iteration
|
||||
self.query = None;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user