Compare commits

..

1 Commits

Author SHA1 Message Date
Louis Dureuil
40215bfec3 TMP: check windows free disk space 2024-05-29 11:27:27 +02:00
192 changed files with 2256 additions and 9256 deletions

View File

@@ -18,9 +18,11 @@ jobs:
timeout-minutes: 180 # 3h timeout-minutes: 180 # 3h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }} - name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
run: | run: |

View File

@@ -35,9 +35,11 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }} ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
- name: Run benchmarks on PR ${{ github.event.issue.id }} - name: Run benchmarks on PR ${{ github.event.issue.id }}
run: | run: |

View File

@@ -12,9 +12,11 @@ jobs:
timeout-minutes: 180 # 3h timeout-minutes: 180 # 3h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
# Run benchmarks # Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }} - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}

View File

@@ -18,9 +18,11 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
# Set variables # Set variables
- name: Set current branch name - name: Set current branch name

View File

@@ -13,9 +13,11 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
- name: Check for Command - name: Check for Command
id: command id: command

View File

@@ -16,9 +16,11 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
# Set variables # Set variables
- name: Set current branch name - name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
# Set variables # Set variables
- name: Set current branch name - name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
# Set variables # Set variables
- name: Set current branch name - name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
# Set variables # Set variables
- name: Set current branch name - name: Set current branch name

View File

@@ -1,6 +1,4 @@
name: Look for flaky tests name: Look for flaky tests
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on: on:
workflow_dispatch: workflow_dispatch:
schedule: schedule:
@@ -18,7 +16,10 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-flaky - name: Install cargo-flaky
run: cargo install cargo-flaky run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps - name: Run cargo flaky in the dumps

View File

@@ -1,6 +1,5 @@
name: Run the indexing fuzzer name: Run the indexing fuzzer
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on: on:
push: push:
branches: branches:
@@ -13,9 +12,11 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
# Run benchmarks # Run benchmarks
- name: Run the fuzzer - name: Run the fuzzer

View File

@@ -15,8 +15,6 @@ jobs:
debian: debian:
name: Publish debian packagge name: Publish debian packagge
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
container: container:
@@ -27,7 +25,10 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-deb - name: Install cargo-deb
run: cargo install cargo-deb run: cargo install cargo-deb
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@@ -35,8 +35,6 @@ jobs:
publish-linux: publish-linux:
name: Publish binary for Linux name: Publish binary for Linux
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
needs: check-version needs: check-version
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27 # Use ubuntu-18.04 to compile with glibc 2.27
@@ -47,7 +45,10 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build - name: Build
run: cargo build --release --locked run: cargo build --release --locked
# No need to upload binaries for dry run (cron) # No need to upload binaries for dry run (cron)
@@ -77,7 +78,10 @@ jobs:
asset_name: meilisearch-windows-amd64.exe asset_name: meilisearch-windows-amd64.exe
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build - name: Build
run: cargo build --release --locked run: cargo build --release --locked
# No need to upload binaries for dry run (cron) # No need to upload binaries for dry run (cron)
@@ -103,10 +107,12 @@ jobs:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Installing Rust toolchain - name: Installing Rust toolchain
uses: helix-editor/rust-toolchain@v1 uses: actions-rs/toolchain@v1
with: with:
toolchain: stable
profile: minimal profile: minimal
target: ${{ matrix.target }} target: ${{ matrix.target }}
override: true
- name: Cargo build - name: Cargo build
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -126,8 +132,6 @@ jobs:
name: Publish binary for aarch64 name: Publish binary for aarch64
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27 # Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04 image: ubuntu:18.04
@@ -150,10 +154,12 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain - name: Installing Rust toolchain
uses: helix-editor/rust-toolchain@v1 uses: actions-rs/toolchain@v1
with: with:
toolchain: stable
profile: minimal profile: minimal
target: ${{ matrix.target }} target: ${{ matrix.target }}
override: true
- name: Configure target aarch64 GNU - name: Configure target aarch64 GNU
## Environment variable is not passed using env: ## Environment variable is not passed using env:
## LD gold won't work with MUSL ## LD gold won't work with MUSL

View File

@@ -80,11 +80,10 @@ jobs:
type=ref,event=tag type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }} type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }} type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }} type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push - name: Build and push
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
push: true push: true
platforms: linux/amd64,linux/arm64 platforms: linux/amd64,linux/arm64

View File

@@ -21,8 +21,6 @@ jobs:
test-linux: test-linux:
name: Tests on ubuntu-18.04 name: Tests on ubuntu-18.04
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04 image: ubuntu:18.04
@@ -33,7 +31,10 @@ jobs:
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- name: Setup test with Rust stable - name: Setup test with Rust stable
uses: helix-editor/rust-toolchain@v1 uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.7.1
- name: Run cargo check without any default features - name: Run cargo check without any default features
@@ -55,15 +56,36 @@ jobs:
matrix: matrix:
os: [macos-12, windows-2022] os: [macos-12, windows-2022]
steps: steps:
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.7.1
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- name: Run cargo check without any default features - name: Run cargo check without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: build command: build
args: --locked --release --no-default-features --all args: --locked --release --no-default-features --all
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- name: Run cargo test - name: Run cargo test
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -73,8 +95,6 @@ jobs:
test-all-features: test-all-features:
name: Tests almost all features name: Tests almost all features
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04 image: ubuntu:18.04
@@ -85,7 +105,10 @@ jobs:
run: | run: |
apt-get update apt-get update
apt-get install --assume-yes build-essential curl apt-get install --assume-yes build-essential curl
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo build with almost all features - name: Run cargo build with almost all features
run: | run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
@@ -95,8 +118,6 @@ jobs:
test-disabled-tokenization: test-disabled-tokenization:
name: Test disabled tokenization name: Test disabled tokenization
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: ubuntu:18.04 image: ubuntu:18.04
@@ -107,10 +128,13 @@ jobs:
run: | run: |
apt-get update apt-get update
apt-get install --assume-yes build-essential curl apt-get install --assume-yes build-essential curl
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo tree without default features and check lindera is not present - name: Run cargo tree without default features and check lindera is not present
run: | run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then
echo "lindera has been found in the sources and it shouldn't" echo "lindera has been found in the sources and it shouldn't"
exit 1 exit 1
fi fi
@@ -121,8 +145,6 @@ jobs:
# We run tests in debug also, to make sure that the debug_assertions are hit # We run tests in debug also, to make sure that the debug_assertions are hit
test-debug: test-debug:
name: Run tests in debug name: Run tests in debug
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
@@ -133,7 +155,10 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.7.1
- name: Run tests in debug - name: Run tests in debug
@@ -147,9 +172,11 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: 1.75.0
override: true
components: clippy components: clippy
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.7.1
@@ -164,10 +191,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: nightly-2024-06-25 toolchain: nightly
override: true override: true
components: rustfmt components: rustfmt
- name: Cache dependencies - name: Cache dependencies

View File

@@ -18,9 +18,11 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: stable
override: true
- name: Install sd - name: Install sd
run: cargo install sd run: cargo install sd
- name: Update Cargo.toml file - name: Update Cargo.toml file

View File

@@ -109,12 +109,6 @@ They are JSON files with the following structure (comments are not actually supp
"run_count": 3, "run_count": 3,
// List of arguments to add to the Meilisearch command line. // List of arguments to add to the Meilisearch command line.
"extra_cli_args": ["--max-indexing-threads=1"], "extra_cli_args": ["--max-indexing-threads=1"],
// An expression that can be parsed as a comma-separated list of targets and levels
// as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples).
// The expression is used to filter the spans that are measured for profiling purposes.
// Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is
// "search::=trace"
"target": "indexing::=trace",
// List of named assets that can be used in the commands. // List of named assets that can be used in the commands.
"assets": { "assets": {
// name of the asset. // name of the asset.

369
Cargo.lock generated
View File

@@ -36,9 +36,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-http" name = "actix-http"
version = "3.7.0" version = "3.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d" checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
dependencies = [ dependencies = [
"actix-codec", "actix-codec",
"actix-rt", "actix-rt",
@@ -46,7 +46,7 @@ dependencies = [
"actix-tls", "actix-tls",
"actix-utils", "actix-utils",
"ahash", "ahash",
"base64 0.22.1", "base64 0.21.7",
"bitflags 2.5.0", "bitflags 2.5.0",
"brotli", "brotli",
"bytes", "bytes",
@@ -85,15 +85,13 @@ dependencies = [
[[package]] [[package]]
name = "actix-router" name = "actix-router"
version = "0.5.3" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8" checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799"
dependencies = [ dependencies = [
"bytestring", "bytestring",
"cfg-if",
"http 0.2.11", "http 0.2.11",
"regex", "regex",
"regex-lite",
"serde", "serde",
"tracing", "tracing",
] ]
@@ -140,9 +138,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-tls" name = "actix-tls"
version = "3.4.0" version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389" checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
dependencies = [ dependencies = [
"actix-rt", "actix-rt",
"actix-service", "actix-service",
@@ -169,9 +167,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-web" name = "actix-web"
version = "4.6.0" version = "4.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32" checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
dependencies = [ dependencies = [
"actix-codec", "actix-codec",
"actix-http", "actix-http",
@@ -198,7 +196,7 @@ dependencies = [
"mime", "mime",
"once_cell", "once_cell",
"pin-project-lite", "pin-project-lite",
"regex-lite", "regex",
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
@@ -222,9 +220,8 @@ dependencies = [
[[package]] [[package]]
name = "actix-web-static-files" name = "actix-web-static-files"
version = "4.0.1" version = "3.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/kilork/actix-web-static-files.git?rev=2d3b6160#2d3b6160f0de4ba061c5d76b5704f34fb677f6df"
checksum = "adf6d1ef6d7a60e084f9e0595e2a5234abda14e76c105ecf8e2d0e8800c41a1f"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"derive_more", "derive_more",
@@ -381,9 +378,9 @@ dependencies = [
[[package]] [[package]]
name = "arroy" name = "arroy"
version = "0.4.0" version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a" checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
@@ -503,7 +500,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
@@ -616,9 +613,9 @@ dependencies = [
[[package]] [[package]]
name = "brotli" name = "brotli"
version = "6.0.0" version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
dependencies = [ dependencies = [
"alloc-no-stdlib", "alloc-no-stdlib",
"alloc-stdlib", "alloc-stdlib",
@@ -627,9 +624,9 @@ dependencies = [
[[package]] [[package]]
name = "brotli-decompressor" name = "brotli-decompressor"
version = "4.0.1" version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
dependencies = [ dependencies = [
"alloc-no-stdlib", "alloc-no-stdlib",
"alloc-stdlib", "alloc-stdlib",
@@ -648,7 +645,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@@ -679,9 +676,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.16.1" version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
dependencies = [ dependencies = [
"bytemuck_derive", "bytemuck_derive",
] ]
@@ -898,9 +895,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.8.11" version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6" checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"cow-utils", "cow-utils",
@@ -989,7 +986,7 @@ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
"clap_lex", "clap_lex",
"strsim 0.10.0", "strsim",
] ]
[[package]] [[package]]
@@ -1280,12 +1277,12 @@ dependencies = [
[[package]] [[package]]
name = "darling" name = "darling"
version = "0.20.9" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
dependencies = [ dependencies = [
"darling_core 0.20.9", "darling_core 0.20.3",
"darling_macro 0.20.9", "darling_macro 0.20.3",
] ]
[[package]] [[package]]
@@ -1298,21 +1295,21 @@ dependencies = [
"ident_case", "ident_case",
"proc-macro2", "proc-macro2",
"quote", "quote",
"strsim 0.10.0", "strsim",
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]] [[package]]
name = "darling_core" name = "darling_core"
version = "0.20.9" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
dependencies = [ dependencies = [
"fnv", "fnv",
"ident_case", "ident_case",
"proc-macro2", "proc-macro2",
"quote", "quote",
"strsim 0.11.1", "strsim",
"syn 2.0.60", "syn 2.0.60",
] ]
@@ -1329,11 +1326,11 @@ dependencies = [
[[package]] [[package]]
name = "darling_macro" name = "darling_macro"
version = "0.20.9" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
dependencies = [ dependencies = [
"darling_core 0.20.9", "darling_core 0.20.3",
"quote", "quote",
"syn 2.0.60", "syn 2.0.60",
] ]
@@ -1386,15 +1383,6 @@ dependencies = [
"derive_builder_macro 0.13.1", "derive_builder_macro 0.13.1",
] ]
[[package]]
name = "derive_builder"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
dependencies = [
"derive_builder_macro 0.20.0",
]
[[package]] [[package]]
name = "derive_builder_core" name = "derive_builder_core"
version = "0.12.0" version = "0.12.0"
@@ -1419,18 +1407,6 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "derive_builder_core"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
dependencies = [
"darling 0.20.9",
"proc-macro2",
"quote",
"syn 2.0.60",
]
[[package]] [[package]]
name = "derive_builder_macro" name = "derive_builder_macro"
version = "0.12.0" version = "0.12.0"
@@ -1451,16 +1427,6 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "derive_builder_macro"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
dependencies = [
"derive_builder_core 0.20.0",
"syn 2.0.60",
]
[[package]] [[package]]
name = "derive_more" name = "derive_more"
version = "0.99.17" version = "0.99.17"
@@ -1488,7 +1454,7 @@ dependencies = [
"serde-cs", "serde-cs",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
"strsim 0.10.0", "strsim",
] ]
[[package]] [[package]]
@@ -1579,7 +1545,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -1741,6 +1707,29 @@ dependencies = [
"syn 2.0.60", "syn 2.0.60",
] ]
[[package]]
name = "env_filter"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
dependencies = [
"log",
"regex",
]
[[package]]
name = "env_logger"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
dependencies = [
"anstream",
"anstyle",
"env_filter",
"humantime",
"log",
]
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.1" version = "1.0.1"
@@ -1795,7 +1784,7 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5" checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5"
dependencies = [ dependencies = [
"darling 0.20.9", "darling 0.20.3",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.60", "syn 2.0.60",
@@ -1804,7 +1793,7 @@ dependencies = [
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"faux", "faux",
"tempfile", "tempfile",
@@ -1827,7 +1816,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@@ -1847,7 +1836,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -1965,7 +1954,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"clap", "clap",
@@ -2273,9 +2262,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]] [[package]]
name = "heed" name = "heed"
version = "0.20.2" version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8" checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
dependencies = [ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"byteorder", "byteorder",
@@ -2390,6 +2379,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]] [[package]]
name = "hyper" name = "hyper"
version = "0.14.27" version = "0.14.27"
@@ -2452,10 +2447,9 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy",
"big_s", "big_s",
"bincode", "bincode",
"crossbeam", "crossbeam",
@@ -2466,7 +2460,6 @@ dependencies = [
"file-store", "file-store",
"flate2", "flate2",
"insta", "insta",
"maplit",
"meili-snap", "meili-snap",
"meilisearch-auth", "meilisearch-auth",
"meilisearch-types", "meilisearch-types",
@@ -2649,7 +2642,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -2785,9 +2778,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera" name = "lindera"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f" checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88"
dependencies = [ dependencies = [
"lindera-analyzer", "lindera-analyzer",
"lindera-core", "lindera-core",
@@ -2798,9 +2791,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-analyzer" name = "lindera-analyzer"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133" checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -2828,9 +2821,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict" name = "lindera-cc-cedict"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1" checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -2842,21 +2835,29 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict-builder" name = "lindera-cc-cedict-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820" checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"yada",
] ]
[[package]] [[package]]
name = "lindera-compress" name = "lindera-compress"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c" checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@@ -2865,9 +2866,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-core" name = "lindera-core"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15" checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -2882,9 +2883,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-decompress" name = "lindera-decompress"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12" checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@@ -2893,9 +2894,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary" name = "lindera-dictionary"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e" checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -2916,33 +2917,11 @@ dependencies = [
"strum_macros", "strum_macros",
] ]
[[package]]
name = "lindera-dictionary-builder"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"csv",
"derive_builder 0.20.0",
"encoding",
"encoding_rs",
"encoding_rs_io",
"glob",
"lindera-compress",
"lindera-core",
"lindera-decompress",
"log",
"yada",
]
[[package]] [[package]]
name = "lindera-filter" name = "lindera-filter"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a" checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"csv", "csv",
@@ -2965,9 +2944,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic" name = "lindera-ipadic"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664" checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -2979,21 +2958,31 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-builder" name = "lindera-ipadic-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d" checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"serde",
"yada",
] ]
[[package]] [[package]]
name = "lindera-ipadic-neologd" name = "lindera-ipadic-neologd"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1" checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3005,21 +2994,31 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd-builder" name = "lindera-ipadic-neologd-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d" checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"serde",
"yada",
] ]
[[package]] [[package]]
name = "lindera-ko-dic" name = "lindera-ko-dic"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db" checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3035,21 +3034,29 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic-builder" name = "lindera-ko-dic-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51" checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"yada",
] ]
[[package]] [[package]]
name = "lindera-tokenizer" name = "lindera-tokenizer"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14" checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d"
dependencies = [ dependencies = [
"bincode", "bincode",
"lindera-core", "lindera-core",
@@ -3061,9 +3068,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic" name = "lindera-unidic"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597" checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3079,14 +3086,22 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic-builder" name = "lindera-unidic-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974" checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"yada",
] ]
[[package]] [[package]]
@@ -3172,9 +3187,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
[[package]] [[package]]
name = "lmdb-master-sys" name = "lmdb-master-sys"
version = "0.2.1" version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3" checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
dependencies = [ dependencies = [
"cc", "cc",
"doxygen-rs", "doxygen-rs",
@@ -3257,7 +3272,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@@ -3266,7 +3281,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@@ -3358,7 +3373,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"base64 0.21.7", "base64 0.21.7",
"enum-iterator", "enum-iterator",
@@ -3377,7 +3392,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@@ -3407,7 +3422,7 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@@ -3446,7 +3461,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"arroy", "arroy",
"big_s", "big_s",
@@ -3886,7 +3901,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@@ -4325,12 +4340,6 @@ dependencies = [
"regex-syntax", "regex-syntax",
] ]
[[package]]
name = "regex-lite"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.2" version = "0.8.2"
@@ -4379,6 +4388,12 @@ dependencies = [
"winreg", "winreg",
] ]
[[package]]
name = "retain_mut"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.8" version = "0.17.8"
@@ -4396,12 +4411,13 @@ dependencies = [
[[package]] [[package]]
name = "roaring" name = "roaring"
version = "0.10.5" version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8" checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
"retain_mut",
"serde", "serde",
] ]
@@ -4884,12 +4900,6 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]] [[package]]
name = "strum" name = "strum"
version = "0.26.2" version = "0.26.2"
@@ -5053,18 +5063,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.61" version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.61" version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -5303,9 +5313,9 @@ dependencies = [
[[package]] [[package]]
name = "tracing-actix-web" name = "tracing-actix-web"
version = "0.7.11" version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee9e39a66d9b615644893ffc1704d2a89b5b315b7fd0228ad3182ca9a306b19" checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"mutually_exclusive_features", "mutually_exclusive_features",
@@ -6042,7 +6052,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",
@@ -6080,13 +6090,12 @@ dependencies = [
[[package]] [[package]]
name = "yaup" name = "yaup"
version = "0.3.1" version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0144f1a16a199846cb21024da74edd930b43443463292f536b7110b4855b5c6" checksum = "a59e7d27bed43f7c37c25df5192ea9d435a8092a902e02203359ac9ce3e429d9"
dependencies = [ dependencies = [
"form_urlencoded",
"serde", "serde",
"thiserror", "url",
] ]
[[package]] [[package]]

View File

@@ -22,7 +22,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.9.0" version = "1.8.0"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",

View File

@@ -1,6 +1,9 @@
<p align="center"> <p align="center">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo" target="_blank"> <a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
<img src="assets/meilisearch-logo-kawaii.png"> <img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
</a>
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
</a> </a>
</p> </p>
@@ -22,7 +25,7 @@
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p> <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow. Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
<p align="center" name="demo"> <p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank"> <a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
@@ -33,18 +36,11 @@
</a> </a>
</p> </p>
## 🖥 Examples 🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos).
## ✨ Features ## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience - **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
@@ -59,15 +55,15 @@ See the list of all our example apps in our [demos repository](https://github.co
## 📖 Documentation ## 📖 Documentation
You can consult Meilisearch's documentation at [meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs). You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
## 🚀 Getting started ## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
## 🌍 Supercharge your Meilisearch experience ## Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Additional features include analytics & monitoring in many regions around the world. No credit card is required. Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
## 🧰 SDKs & integration tools ## 🧰 SDKs & integration tools
@@ -87,15 +83,15 @@ Finally, for more in-depth information, refer to our articles explaining fundame
## 📊 Telemetry ## 📊 Telemetry
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want. Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data. To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation. If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
## 📫 Get in touch! ## 📫 Get in touch!
Meilisearch is a search engine created by [Meili]([https://www.welcometothejungle.com/en/companies/meilisearch](https://www.meilisearch.com/careers)), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact) Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months. 🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
@@ -109,11 +105,11 @@ Thank you for your support!
## 👩‍💻 Contributing ## 👩‍💻 Contributing
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md). Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md).
## 📦 Versioning ## 📦 Versioning
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases). Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md). The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -780,7 +780,7 @@ expression: document
1.3484878540039063 1.3484878540039063
] ]
], ],
"regenerate": true "userProvided": false
} }
} }
} }

View File

@@ -779,7 +779,7 @@ expression: document
1.04031240940094 1.04031240940094
] ]
], ],
"regenerate": true "userProvided": false
} }
} }
} }

View File

@@ -152,7 +152,6 @@ impl Settings<Unchecked> {
} }
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
#[cfg_attr(test, derive(serde::Serialize))] #[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]

View File

@@ -182,7 +182,6 @@ impl Settings<Unchecked> {
} }
} }
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))] #[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]

View File

@@ -200,7 +200,6 @@ impl std::ops::Deref for IndexUid {
} }
} }
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
#[derive(Debug)] #[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))] #[cfg_attr(test, derive(serde::Serialize))]
#[cfg_attr(test, serde(rename_all = "camelCase"))] #[cfg_attr(test, serde(rename_all = "camelCase"))]

View File

@@ -40,9 +40,7 @@ ureq = "2.9.7"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.4.0"
big_s = "1.0.2" big_s = "1.0.2"
crossbeam = "0.8.4" crossbeam = "0.8.4"
insta = { version = "1.34.0", features = ["json", "redactions"] } insta = { version = "1.34.0", features = ["json", "redactions"] }
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }

View File

@@ -909,7 +909,6 @@ impl IndexScheduler {
let fields_ids_map = index.fields_ids_map(&rtxn)?; let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(&rtxn)?;
// 3.1. Dump the documents // 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? { for ret in index.all_documents(&rtxn)? {
@@ -952,21 +951,16 @@ impl IndexScheduler {
}; };
for (embedder_name, embeddings) in embeddings { for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs // don't change the entry if it already exists, because it was user-provided
.iter() vectors.entry(embedder_name).or_insert_with(|| {
.find(|conf| conf.name == embedder_name) let embeddings = ExplicitVectors {
.is_some_and(|conf| conf.user_provided.contains(id)); embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
let embeddings = ExplicitVectors { ),
embeddings: Some( user_provided: false,
VectorOrArrayOfVectors::from_array_of_vectors(embeddings), };
), serde_json::to_value(embeddings).unwrap()
regenerate: !user_provided, });
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
} }
} }

View File

@@ -53,7 +53,6 @@ use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
@@ -1460,39 +1459,33 @@ impl IndexScheduler {
// TODO: consider using a type alias or a struct embedder/template // TODO: consider using a type alias or a struct embedder/template
pub fn embedders( pub fn embedders(
&self, &self,
embedding_configs: Vec<IndexEmbeddingConfig>, embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
) -> Result<EmbeddingConfigs> { ) -> Result<EmbeddingConfigs> {
let res: Result<_> = embedding_configs let res: Result<_> = embedding_configs
.into_iter() .into_iter()
.map( .map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
|IndexEmbeddingConfig { let prompt =
name, Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
config: milli::vector::EmbeddingConfig { embedder_options, prompt }, // optimistically return existing embedder
.. {
}| { let embedders = self.embedders.read().unwrap();
let prompt = if let Some(embedder) = embedders.get(&embedder_options) {
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?); return Ok((name, (embedder.clone(), prompt)));
// optimistically return existing embedder
{
let embedders = self.embedders.read().unwrap();
if let Some(embedder) = embedders.get(&embedder_options) {
return Ok((name, (embedder.clone(), prompt)));
}
} }
}
// add missing embedder // add missing embedder
let embedder = Arc::new( let embedder = Arc::new(
Embedder::new(embedder_options.clone()) Embedder::new(embedder_options.clone())
.map_err(meilisearch_types::milli::vector::Error::from) .map_err(meilisearch_types::milli::vector::Error::from)
.map_err(meilisearch_types::milli::Error::from)?, .map_err(meilisearch_types::milli::Error::from)?,
); );
{ {
let mut embedders = self.embedders.write().unwrap(); let mut embedders = self.embedders.write().unwrap();
embedders.insert(embedder_options, embedder.clone()); embedders.insert(embedder_options, embedder.clone());
} }
Ok((name, (embedder, prompt))) Ok((name, (embedder, prompt)))
}, })
)
.collect(); .collect();
res.map(EmbeddingConfigs::new) res.map(EmbeddingConfigs::new)
} }
@@ -1755,9 +1748,6 @@ mod tests {
use meilisearch_types::milli::update::IndexDocumentsMethod::{ use meilisearch_types::milli::update::IndexDocumentsMethod::{
ReplaceDocuments, UpdateDocuments, ReplaceDocuments, UpdateDocuments,
}; };
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::IndexSwap; use meilisearch_types::tasks::IndexSwap;
use meilisearch_types::VERSION_FILE_NAME; use meilisearch_types::VERSION_FILE_NAME;
use tempfile::{NamedTempFile, TempDir}; use tempfile::{NamedTempFile, TempDir};
@@ -1811,7 +1801,7 @@ mod tests {
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false, enable_mdb_writemap: false,
index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB index_growth_amount: 1000 * 1000, // 1 MB
index_count: 5, index_count: 5,
indexer_config, indexer_config,
autobatching_enabled: true, autobatching_enabled: true,
@@ -1836,7 +1826,6 @@ mod tests {
assert_eq!(breakpoint, (Init, false)); assert_eq!(breakpoint, (Init, false));
let index_scheduler_handle = IndexSchedulerHandle { let index_scheduler_handle = IndexSchedulerHandle {
_tempdir: tempdir, _tempdir: tempdir,
index_scheduler: index_scheduler.private_clone(),
test_breakpoint_rcv: receiver, test_breakpoint_rcv: receiver,
last_breakpoint: breakpoint.0, last_breakpoint: breakpoint.0,
}; };
@@ -1925,7 +1914,6 @@ mod tests {
pub struct IndexSchedulerHandle { pub struct IndexSchedulerHandle {
_tempdir: TempDir, _tempdir: TempDir,
index_scheduler: IndexScheduler,
test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>, test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
last_breakpoint: Breakpoint, last_breakpoint: Breakpoint,
} }
@@ -1943,13 +1931,9 @@ mod tests {
{ {
Ok(b) => b, Ok(b) => b,
Err(RecvTimeoutError::Timeout) => { Err(RecvTimeoutError::Timeout) => {
let state = snapshot_index_scheduler(&self.index_scheduler); panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}")
}
Err(RecvTimeoutError::Disconnected) => {
let state = snapshot_index_scheduler(&self.index_scheduler);
panic!("The scheduler crashed.\n{state}")
} }
Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."),
}; };
// if we've already encountered a breakpoint we're supposed to be stuck on the false // if we've already encountered a breakpoint we're supposed to be stuck on the false
// and we expect the same variant with the true to come now. // and we expect the same variant with the true to come now.
@@ -1968,13 +1952,9 @@ mod tests {
{ {
Ok(b) => b, Ok(b) => b,
Err(RecvTimeoutError::Timeout) => { Err(RecvTimeoutError::Timeout) => {
let state = snapshot_index_scheduler(&self.index_scheduler); panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}")
}
Err(RecvTimeoutError::Disconnected) => {
let state = snapshot_index_scheduler(&self.index_scheduler);
panic!("The scheduler crashed.\n{state}")
} }
Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."),
}; };
assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite");
@@ -1988,10 +1968,9 @@ mod tests {
fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) { fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) {
for breakpoint in breakpoints { for breakpoint in breakpoints {
let b = self.advance(); let b = self.advance();
let state = snapshot_index_scheduler(&self.index_scheduler);
assert_eq!( assert_eq!(
b, breakpoint, b, breakpoint,
"Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{state}", "Was expecting the breakpoint `{:?}` but instead got `{:?}`.",
breakpoint, b breakpoint, b
); );
} }
@@ -2016,7 +1995,6 @@ mod tests {
// Wait for one successful batch. // Wait for one successful batch.
#[track_caller] #[track_caller]
fn advance_one_successful_batch(&mut self) { fn advance_one_successful_batch(&mut self) {
self.index_scheduler.assert_internally_consistent();
self.advance_till([Start, BatchCreated]); self.advance_till([Start, BatchCreated]);
loop { loop {
match self.advance() { match self.advance() {
@@ -2025,17 +2003,13 @@ mod tests {
InsideProcessBatch => (), InsideProcessBatch => (),
// the batch went successfully, we can stop the loop and go on with the next states. // the batch went successfully, we can stop the loop and go on with the next states.
ProcessBatchSucceeded => break, ProcessBatchSucceeded => break,
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), AbortedIndexation => panic!("The batch was aborted."),
ProcessBatchFailed => { ProcessBatchFailed => panic!("The batch failed."),
while self.advance() != Start {}
panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler))
},
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
} }
} }
self.advance_till([AfterProcessing]); self.advance_till([AfterProcessing]);
self.index_scheduler.assert_internally_consistent();
} }
// Wait for one failed batch. // Wait for one failed batch.
@@ -2049,8 +2023,8 @@ mod tests {
InsideProcessBatch => (), InsideProcessBatch => (),
// the batch went failed, we can stop the loop and go on with the next states. // the batch went failed, we can stop the loop and go on with the next states.
ProcessBatchFailed => break, ProcessBatchFailed => break,
ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)), ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)"),
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), AbortedIndexation => panic!("The batch was aborted."),
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
} }
} }
@@ -3078,10 +3052,8 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap(); let configs = index.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap(); let (_, embedding_config) = configs.first().unwrap();
insta::assert_snapshot!(name, @"default"); insta::assert_json_snapshot!(embedding_config.embedder_options);
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(config.embedder_options);
} }
#[test] #[test]
@@ -5017,6 +4989,7 @@ mod tests {
false, false,
) )
.unwrap(); .unwrap();
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
@@ -5027,7 +5000,7 @@ mod tests {
insta::assert_json_snapshot!(task.details); insta::assert_json_snapshot!(task.details);
} }
handle.advance_one_successful_batch(); handle.advance_n_successful_batches(1);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
{ {
@@ -5044,17 +5017,13 @@ mod tests {
let configs = index.embedding_configs(&rtxn).unwrap(); let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below // for consistency with the below
#[allow(clippy::get_first)] #[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } = let (name, fakerest_config) = configs.get(0).unwrap();
configs.get(0).unwrap(); insta::assert_json_snapshot!(name, @r###""A_fakerest""###);
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(fakerest_config.embedder_options); insta::assert_json_snapshot!(fakerest_config.embedder_options);
let fakerest_name = name.clone(); let fakerest_name = name.clone();
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } = let (name, simple_hf_config) = configs.get(1).unwrap();
configs.get(1).unwrap(); insta::assert_json_snapshot!(name, @r###""B_small_hf""###);
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(simple_hf_config.embedder_options); insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone(); let simple_hf_name = name.clone();
@@ -5069,25 +5038,25 @@ mod tests {
// add one doc, specifying vectors // add one doc, specifying vectors
let doc = serde_json::json!( let doc = serde_json::json!(
{ {
"id": 0, "id": 0,
"doggo": "Intel", "doggo": "Intel",
"breed": "beagle", "breed": "beagle",
"_vectors": { "_vectors": {
&fakerest_name: { &fakerest_name: {
// this will never trigger regeneration, which is good because we can't actually generate with // this will never trigger regeneration, which is good because we can't actually generate with
// this embedder // this embedder
"regenerate": false, "userProvided": true,
"embeddings": beagle_embed, "embeddings": beagle_embed,
}, },
&simple_hf_name: { &simple_hf_name: {
// this will be regenerated on updates // this will be regenerated on updates
"regenerate": true, "userProvided": false,
"embeddings": lab_embed, "embeddings": lab_embed,
}, },
"noise": [0.1, 0.2, 0.3] "noise": [0.1, 0.2, 0.3]
} }
} }
); );
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap(); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap();
@@ -5109,6 +5078,7 @@ mod tests {
false, false,
) )
.unwrap(); .unwrap();
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
@@ -5121,19 +5091,6 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap(); let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
let embeddings = index.embeddings(&rtxn, 0).unwrap(); let embeddings = index.embeddings(&rtxn, 0).unwrap();
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
@@ -5183,6 +5140,7 @@ mod tests {
false, false,
) )
.unwrap(); .unwrap();
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
@@ -5195,25 +5153,11 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap(); let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
let IndexEmbeddingConfig { name, config: _, user_provided } =
configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
let embeddings = index.embeddings(&rtxn, 0).unwrap(); let embeddings = index.embeddings(&rtxn, 0).unwrap();
// automatically changed to patou because set to regenerate // automatically changed to patou
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
// remained beagle // remained beagle because set to userProvided
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
@@ -5232,578 +5176,4 @@ mod tests {
} }
} }
} }
#[test]
fn import_vectors_first_and_embedder_later() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let content = serde_json::json!(
[
{
"id": 0,
"doggo": "kefir",
},
{
"id": 1,
"doggo": "intel",
"_vectors": {
"my_doggo_embedder": vec![1; 384],
"unknown embedder": vec![1, 2, 3],
}
},
{
"id": 2,
"doggo": "max",
"_vectors": {
"my_doggo_embedder": {
"regenerate": false,
"embeddings": vec![2; 384],
},
"unknown embedder": vec![4, 5],
},
},
{
"id": 3,
"doggo": "marcel",
"_vectors": {
"my_doggo_embedder": {
"regenerate": true,
"embeddings": vec![3; 384],
},
},
},
{
"id": 4,
"doggo": "sora",
"_vectors": {
"my_doggo_embedder": {
"regenerate": true,
},
},
},
]
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
let documents_count =
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
.unwrap();
snapshot!(documents_count, @"5");
file.persist().unwrap();
index_scheduler
.register(
KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: None,
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
let setting = meilisearch_types::settings::Settings::<Unchecked> {
embedders: Setting::Set(maplit::btreemap! {
S("my_doggo_embedder") => Setting::Set(EmbeddingSettings {
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
document_template: Setting::Set(S("{{doc.doggo}}")),
..Default::default()
})
}),
..Default::default()
};
index_scheduler
.register(
KindWithContent::SettingsUpdate {
index_uid: S("doggos"),
new_settings: Box::new(setting),
is_deletion: false,
allow_index_creation: false,
},
None,
false,
)
.unwrap();
index_scheduler.assert_internally_consistent();
handle.advance_one_successful_batch();
index_scheduler.assert_internally_consistent();
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
// the all the vectors linked to the new specified embedder have been removed
// Only the unknown embedders stays in the document DB
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
let conf = index.embedding_configs(&rtxn).unwrap();
// even though we specified the vector for the ID 3, it shouldn't be marked
// as user provided since we explicitely marked it as NOT user provided.
snapshot!(format!("{conf:#?}"), @r###"
[
IndexEmbeddingConfig {
name: "my_doggo_embedder",
config: EmbeddingConfig {
embedder_options: HuggingFace(
EmbedderOptions {
model: "sentence-transformers/all-MiniLM-L6-v2",
revision: Some(
"e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
),
distribution: None,
},
),
prompt: PromptData {
template: "{{doc.doggo}}",
},
},
user_provided: RoaringBitmap<[1, 2]>,
},
]
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty(), "{embedding:?}");
// the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let mut embeddings = Vec::new();
'vectors: for i in 0..=u8::MAX {
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e),
})
.transpose();
let Some(reader) = reader else {
break 'vectors;
};
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
if let Some(embedding) = embedding {
embeddings.push(embedding)
} else {
break 'vectors;
}
}
snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
// If we update marcel it should regenerate its embedding automatically
let content = serde_json::json!(
[
{
"id": 3,
"doggo": "marvel",
},
{
"id": 4,
"doggo": "sorry",
},
]
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1_u128).unwrap();
let documents_count =
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
.unwrap();
snapshot!(documents_count, @"2");
file.persist().unwrap();
index_scheduler
.register(
KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: None,
method: UpdateDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
// the document with the id 3 should have its original embedding updated
let rtxn = index.read_txn().unwrap();
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
snapshot!(json_string!(doc), @r###"
{
"id": 3,
"doggo": "marvel"
}
"###);
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
// the document with the id 4 should generate an embedding
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
}
#[test]
fn delete_document_containing_vector() {
// 1. Add an embedder
// 2. Push two documents containing a simple vector
// 3. Delete the first document
// 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore
// 5. Clear the index
// 6. The user defined roaring bitmap shouldn't contains the id of the second document
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let setting = meilisearch_types::settings::Settings::<Unchecked> {
embedders: Setting::Set(maplit::btreemap! {
S("manual") => Setting::Set(EmbeddingSettings {
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
dimensions: Setting::Set(3),
..Default::default()
})
}),
..Default::default()
};
index_scheduler
.register(
KindWithContent::SettingsUpdate {
index_uid: S("doggos"),
new_settings: Box::new(setting),
is_deletion: false,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
let content = serde_json::json!(
[
{
"id": 0,
"doggo": "kefir",
"_vectors": {
"manual": vec![0, 0, 0],
}
},
{
"id": 1,
"doggo": "intel",
"_vectors": {
"manual": vec![1, 1, 1],
}
},
]
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
let documents_count =
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
.unwrap();
snapshot!(documents_count, @"2");
file.persist().unwrap();
index_scheduler
.register(
KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: None,
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: false,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
index_scheduler
.register(
KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1")],
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
let conf = index.embedding_configs(&rtxn).unwrap();
snapshot!(format!("{conf:#?}"), @r###"
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
),
prompt: PromptData {
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
},
},
user_provided: RoaringBitmap<[0]>,
},
]
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["manual"];
assert!(!embedding.is_empty(), "{embedding:?}");
index_scheduler
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
.unwrap();
handle.advance_one_successful_batch();
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
let conf = index.embedding_configs(&rtxn).unwrap();
snapshot!(format!("{conf:#?}"), @r###"
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
),
prompt: PromptData {
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
},
},
user_provided: RoaringBitmap<[]>,
},
]
"###);
}
#[test]
fn delete_embedder_with_user_provided_vectors() {
// 1. Add two embedders
// 2. Push two documents containing a simple vector
// 3. The documents must not contain the vectors after the update as they are in the vectors db
// 3. Delete the embedders
// 4. The documents contain the vectors again
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let setting = meilisearch_types::settings::Settings::<Unchecked> {
embedders: Setting::Set(maplit::btreemap! {
S("manual") => Setting::Set(EmbeddingSettings {
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
dimensions: Setting::Set(3),
..Default::default()
}),
S("my_doggo_embedder") => Setting::Set(EmbeddingSettings {
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
document_template: Setting::Set(S("{{doc.doggo}}")),
..Default::default()
}),
}),
..Default::default()
};
index_scheduler
.register(
KindWithContent::SettingsUpdate {
index_uid: S("doggos"),
new_settings: Box::new(setting),
is_deletion: false,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
let content = serde_json::json!(
[
{
"id": 0,
"doggo": "kefir",
"_vectors": {
"manual": vec![0, 0, 0],
"my_doggo_embedder": vec![1; 384],
}
},
{
"id": 1,
"doggo": "intel",
"_vectors": {
"manual": vec![1, 1, 1],
}
},
]
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
let documents_count =
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
.unwrap();
snapshot!(documents_count, @"2");
file.persist().unwrap();
index_scheduler
.register(
KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: None,
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: false,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
}
{
let setting = meilisearch_types::settings::Settings::<Unchecked> {
embedders: Setting::Set(maplit::btreemap! {
S("manual") => Setting::Reset,
}),
..Default::default()
};
index_scheduler
.register(
KindWithContent::SettingsUpdate {
index_uid: S("doggos"),
new_settings: Box::new(setting),
is_deletion: false,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
}
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
}
{
let setting = meilisearch_types::settings::Settings::<Unchecked> {
embedders: Setting::Reset,
..Default::default()
};
index_scheduler
.register(
KindWithContent::SettingsUpdate {
index_uid: S("doggos"),
new_settings: Box::new(setting),
is_deletion: false,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
}
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
// FIXME: redaction
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
}
}
} }

View File

@@ -6,6 +6,10 @@ expression: doc
"doggo": "kefir", "doggo": "kefir",
"breed": "patou", "breed": "patou",
"_vectors": { "_vectors": {
"A_fakerest": {
"embeddings": "[vector]",
"userProvided": true
},
"noise": [ "noise": [
0.1, 0.1,
0.2, 0.2,

View File

@@ -6,6 +6,10 @@ expression: doc
"doggo": "Intel", "doggo": "Intel",
"breed": "beagle", "breed": "beagle",
"_vectors": { "_vectors": {
"A_fakerest": {
"embeddings": "[vector]",
"userProvided": true
},
"noise": [ "noise": [
0.1, 0.1,
0.2, 0.2,

View File

@@ -188,12 +188,6 @@ impl AuthFilter {
self.allow_index_creation && self.is_index_authorized(index) self.allow_index_creation && self.is_index_authorized(index)
} }
#[inline]
/// Return true if a tenant token was used to generate the search rules.
pub fn is_tenant_token(&self) -> bool {
self.search_rules.is_some()
}
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self { pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
Self { Self {
search_rules: None, search_rules: None,
@@ -211,7 +205,6 @@ impl AuthFilter {
.unwrap_or(true) .unwrap_or(true)
} }
/// Check if the index is authorized by the API key and the tenant token.
pub fn is_index_authorized(&self, index: &str) -> bool { pub fn is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index) self.key_authorized_indexes.is_index_authorized(index)
&& self && self
@@ -221,44 +214,6 @@ impl AuthFilter {
.unwrap_or(true) .unwrap_or(true)
} }
/// Only check if the index is authorized by the API key
pub fn api_key_is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index)
}
/// Only check if the index is authorized by the tenant token
pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool {
self.search_rules
.as_ref()
.map(|search_rules| search_rules.is_index_authorized(index))
.unwrap_or(true)
}
/// Return the list of authorized indexes by the tenant token if any
pub fn tenant_token_list_index_authorized(&self) -> Vec<String> {
match self.search_rules {
Some(ref search_rules) => {
let mut indexes: Vec<_> = match search_rules {
SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(),
SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(),
};
indexes.sort_unstable();
indexes
}
None => Vec::new(),
}
}
/// Return the list of authorized indexes by the api key if any
pub fn api_key_list_index_authorized(&self) -> Vec<String> {
let mut indexes: Vec<_> = match self.key_authorized_indexes {
SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(),
SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(),
};
indexes.sort_unstable();
indexes
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> { pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
if !self.is_index_authorized(index) { if !self.is_index_authorized(index) {
return None; return None;

View File

@@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
actix-web = { version = "4.6.0", default-features = false } actix-web = { version = "4.5.1", default-features = false }
anyhow = "1.0.79" anyhow = "1.0.79"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.3.0"
@@ -30,12 +30,7 @@ serde_json = "1.0.111"
tar = "0.4.40" tar = "0.4.40"
tempfile = "3.9.0" tempfile = "3.9.0"
thiserror = "1.0.56" thiserror = "1.0.56"
time = { version = "0.3.31", features = [ time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = "1.35" tokio = "1.35"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
@@ -54,8 +49,6 @@ chinese-pinyin = ["milli/chinese-pinyin"]
hebrew = ["milli/hebrew"] hebrew = ["milli/hebrew"]
# japanese specialized tokenization # japanese specialized tokenization
japanese = ["milli/japanese"] japanese = ["milli/japanese"]
# korean specialized tokenization
korean = ["milli/korean"]
# thai specialized tokenization # thai specialized tokenization
thai = ["milli/thai"] thai = ["milli/thai"]
# allow greek specialized tokenization # allow greek specialized tokenization

View File

@@ -189,6 +189,4 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError); merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarId); merge_with_error_impl_take_error_message!(InvalidSimilarId);

View File

@@ -222,7 +222,6 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ; InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ; InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
@@ -241,11 +240,7 @@ InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSimilarRetrieveVectors , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
InvalidSearchRetrieveVectors , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
@@ -273,14 +268,13 @@ InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
@@ -385,7 +379,6 @@ impl ErrorCode for milli::Error {
Code::IndexPrimaryKeyMultipleCandidatesFound Code::IndexPrimaryKeyMultipleCandidatesFound
} }
UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists, UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists,
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
UserError::SortRankingRuleMissing => Code::InvalidSearchSort, UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
@@ -398,8 +391,7 @@ impl ErrorCode for milli::Error {
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::InvalidVectorsMapType { .. } UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors, UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort, UserError::SortError(_) => Code::InvalidSearchSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => { UserError::InvalidMinTypoWordLenSetting(_, _) => {
@@ -513,21 +505,6 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
} }
} }
impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
)
}
}
impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f)
}
}
#[macro_export] #[macro_export]
macro_rules! internal_error { macro_rules! internal_error {
($target:ty : $($other:path), *) => { ($target:ty : $($other:path), *) => {

View File

@@ -8,7 +8,6 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer; use fst::IntoStreamer;
use milli::index::IndexEmbeddingConfig;
use milli::proximity::ProximityPrecision; use milli::proximity::ProximityPrecision;
use milli::update::Setting; use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
@@ -673,7 +672,7 @@ pub fn settings(
let embedders: BTreeMap<_, _> = index let embedders: BTreeMap<_, _> = index
.embedding_configs(rtxn)? .embedding_configs(rtxn)?
.into_iter() .into_iter()
.map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into()))) .map(|(name, config)| (name, Setting::Set(config.into())))
.collect(); .collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };

View File

@@ -14,20 +14,20 @@ default-run = "meilisearch"
[dependencies] [dependencies]
actix-cors = "0.7.0" actix-cors = "0.7.0"
actix-http = { version = "3.7.0", default-features = false, features = [ actix-http = { version = "3.6.0", default-features = false, features = [
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"rustls-0_21", "rustls-0_21",
] } ] }
actix-utils = "3.0.1" actix-utils = "3.0.1"
actix-web = { version = "4.6.0", default-features = false, features = [ actix-web = { version = "4.5.1", default-features = false, features = [
"macros", "macros",
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"cookies", "cookies",
"rustls-0_21", "rustls-0_21",
] } ] }
actix-web-static-files = { version = "4.0.1", optional = true } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] } anyhow = { version = "1.0.79", features = ["backtrace"] }
async-stream = "0.3.5" async-stream = "0.3.5"
async-trait = "0.1.77" async-trait = "0.1.77"
@@ -98,26 +98,27 @@ tokio-stream = "0.1.14"
toml = "0.8.8" toml = "0.8.8"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
walkdir = "2.4.0" walkdir = "2.4.0"
yaup = "0.2.1"
serde_urlencoded = "0.7.1" serde_urlencoded = "0.7.1"
termcolor = "1.4.1" termcolor = "1.4.1"
url = { version = "2.5.0", features = ["serde"] } url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40" tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] } tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.10" tracing-actix-web = "0.7.9"
build-info = { version = "1.7.0", path = "../build-info" } build-info = { version = "1.7.0", path = "../build-info" }
[dev-dependencies] [dev-dependencies]
actix-rt = "2.9.0" actix-rt = "2.9.0"
assert-json-diff = "2.0.2" assert-json-diff = "2.0.2"
brotli = "6.0.0" brotli = "3.4.0"
insta = "1.34.0" insta = "1.34.0"
manifest-dir-macros = "0.1.18" manifest-dir-macros = "0.1.18"
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6" temp-env = "0.3.6"
urlencoding = "2.1.3" urlencoding = "2.1.3"
yaup = "0.3.1" yaup = "0.2.1"
[build-dependencies] [build-dependencies]
anyhow = { version = "1.0.79", optional = true } anyhow = { version = "1.0.79", optional = true }
@@ -150,7 +151,6 @@ chinese = ["meilisearch-types/chinese"]
chinese-pinyin = ["meilisearch-types/chinese-pinyin"] chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
hebrew = ["meilisearch-types/hebrew"] hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"] japanese = ["meilisearch-types/japanese"]
korean = ["meilisearch-types/korean"]
thai = ["meilisearch-types/thai"] thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"] greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"] khmer = ["meilisearch-types/khmer"]
@@ -158,5 +158,5 @@ vietnamese = ["meilisearch-types/vietnamese"]
swedish-recomposition = ["meilisearch-types/swedish-recomposition"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe" sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"

View File

@@ -74,8 +74,8 @@ pub enum DocumentDeletionKind {
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind { pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool }, PerDocumentId,
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, Normal { with_filter: bool, limit: usize, offset: usize },
} }
pub trait Analytics: Sync + Send { pub trait Analytics: Sync + Send {

View File

@@ -597,9 +597,6 @@ pub struct SearchAggregator {
// every time a request has a filter, this field must be incremented by one // every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize, sort_total_number_of_criteria: usize,
// distinct
distinct: bool,
// filter // filter
filter_with_geo_radius: bool, filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool, filter_with_geo_bounding_box: bool,
@@ -625,7 +622,6 @@ pub struct SearchAggregator {
// Whether a non-default embedder was specified // Whether a non-default embedder was specified
embedder: bool, embedder: bool,
hybrid: bool, hybrid: bool,
retrieve_vectors: bool,
// every time a search is done, we increment the counter linked to the used settings // every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>, matching_strategy: HashMap<String, usize>,
@@ -652,7 +648,6 @@ pub struct SearchAggregator {
// scoring // scoring
show_ranking_score: bool, show_ranking_score: bool,
show_ranking_score_details: bool, show_ranking_score_details: bool,
ranking_score_threshold: bool,
} }
impl SearchAggregator { impl SearchAggregator {
@@ -666,7 +661,6 @@ impl SearchAggregator {
page, page,
hits_per_page, hits_per_page,
attributes_to_retrieve: _, attributes_to_retrieve: _,
retrieve_vectors,
attributes_to_crop: _, attributes_to_crop: _,
crop_length, crop_length,
attributes_to_highlight: _, attributes_to_highlight: _,
@@ -675,7 +669,6 @@ impl SearchAggregator {
show_ranking_score_details, show_ranking_score_details,
filter, filter,
sort, sort,
distinct,
facets: _, facets: _,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@@ -683,7 +676,6 @@ impl SearchAggregator {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -698,8 +690,6 @@ impl SearchAggregator {
ret.sort_sum_of_criteria_terms = sort.len(); ret.sort_sum_of_criteria_terms = sort.len();
} }
ret.distinct = distinct.is_some();
if let Some(ref filter) = filter { if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap()); static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1; ret.filter_total_number_of_criteria = 1;
@@ -736,7 +726,6 @@ impl SearchAggregator {
if let Some(ref vector) = vector { if let Some(ref vector) = vector {
ret.max_vector_size = vector.len(); ret.max_vector_size = vector.len();
} }
ret.retrieve_vectors |= retrieve_vectors;
if query.is_finite_pagination() { if query.is_finite_pagination() {
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
@@ -759,7 +748,6 @@ impl SearchAggregator {
ret.show_ranking_score = *show_ranking_score; ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details; ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
if let Some(hybrid) = hybrid { if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
@@ -804,7 +792,6 @@ impl SearchAggregator {
sort_with_geo_point, sort_with_geo_point,
sort_sum_of_criteria_terms, sort_sum_of_criteria_terms,
sort_total_number_of_criteria, sort_total_number_of_criteria,
distinct,
filter_with_geo_radius, filter_with_geo_radius,
filter_with_geo_bounding_box, filter_with_geo_bounding_box,
filter_sum_of_criteria_terms, filter_sum_of_criteria_terms,
@@ -813,7 +800,6 @@ impl SearchAggregator {
attributes_to_search_on_total_number_of_uses, attributes_to_search_on_total_number_of_uses,
max_terms_number, max_terms_number,
max_vector_size, max_vector_size,
retrieve_vectors,
matching_strategy, matching_strategy,
max_limit, max_limit,
max_offset, max_offset,
@@ -835,7 +821,6 @@ impl SearchAggregator {
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator, total_used_negative_operator,
ranking_score_threshold,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@@ -862,9 +847,6 @@ impl SearchAggregator {
self.sort_total_number_of_criteria = self.sort_total_number_of_criteria =
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
// distinct
self.distinct |= distinct;
// filter // filter
self.filter_with_geo_radius |= filter_with_geo_radius; self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
@@ -887,7 +869,6 @@ impl SearchAggregator {
// vector // vector
self.max_vector_size = self.max_vector_size.max(max_vector_size); self.max_vector_size = self.max_vector_size.max(max_vector_size);
self.retrieve_vectors |= retrieve_vectors;
self.semantic_ratio |= semantic_ratio; self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid; self.hybrid |= hybrid;
self.embedder |= embedder; self.embedder |= embedder;
@@ -923,7 +904,6 @@ impl SearchAggregator {
// scoring // scoring
self.show_ranking_score |= show_ranking_score; self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details; self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
} }
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -936,7 +916,6 @@ impl SearchAggregator {
sort_with_geo_point, sort_with_geo_point,
sort_sum_of_criteria_terms, sort_sum_of_criteria_terms,
sort_total_number_of_criteria, sort_total_number_of_criteria,
distinct,
filter_with_geo_radius, filter_with_geo_radius,
filter_with_geo_bounding_box, filter_with_geo_bounding_box,
filter_sum_of_criteria_terms, filter_sum_of_criteria_terms,
@@ -945,7 +924,6 @@ impl SearchAggregator {
attributes_to_search_on_total_number_of_uses, attributes_to_search_on_total_number_of_uses,
max_terms_number, max_terms_number,
max_vector_size, max_vector_size,
retrieve_vectors,
matching_strategy, matching_strategy,
max_limit, max_limit,
max_offset, max_offset,
@@ -967,7 +945,6 @@ impl SearchAggregator {
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator, total_used_negative_operator,
ranking_score_threshold,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@@ -994,7 +971,6 @@ impl SearchAggregator {
"with_geoPoint": sort_with_geo_point, "with_geoPoint": sort_with_geo_point,
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
}, },
"distinct": distinct,
"filter": { "filter": {
"with_geoRadius": filter_with_geo_radius, "with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box, "with_geoBoundingBox": filter_with_geo_bounding_box,
@@ -1009,7 +985,6 @@ impl SearchAggregator {
}, },
"vector": { "vector": {
"max_vector_size": max_vector_size, "max_vector_size": max_vector_size,
"retrieve_vectors": retrieve_vectors,
}, },
"hybrid": { "hybrid": {
"enabled": hybrid, "enabled": hybrid,
@@ -1040,7 +1015,6 @@ impl SearchAggregator {
"scoring": { "scoring": {
"show_ranking_score": show_ranking_score, "show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details, "show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
}, },
}); });
@@ -1098,7 +1072,6 @@ impl MultiSearchAggregator {
page: _, page: _,
hits_per_page: _, hits_per_page: _,
attributes_to_retrieve: _, attributes_to_retrieve: _,
retrieve_vectors: _,
attributes_to_crop: _, attributes_to_crop: _,
crop_length: _, crop_length: _,
attributes_to_highlight: _, attributes_to_highlight: _,
@@ -1107,7 +1080,6 @@ impl MultiSearchAggregator {
show_matches_position: _, show_matches_position: _,
filter: _, filter: _,
sort: _, sort: _,
distinct: _,
facets: _, facets: _,
highlight_pre_tag: _, highlight_pre_tag: _,
highlight_post_tag: _, highlight_post_tag: _,
@@ -1115,7 +1087,6 @@ impl MultiSearchAggregator {
matching_strategy: _, matching_strategy: _,
attributes_to_search_on: _, attributes_to_search_on: _,
hybrid: _, hybrid: _,
ranking_score_threshold: _,
} = query; } = query;
index_uid.as_str() index_uid.as_str()
@@ -1263,7 +1234,6 @@ impl FacetSearchAggregator {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -1278,8 +1248,7 @@ impl FacetSearchAggregator {
|| filter.is_some() || filter.is_some()
|| *matching_strategy != MatchingStrategy::default() || *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some() || attributes_to_search_on.is_some()
|| hybrid.is_some() || hybrid.is_some();
|| ranking_score_threshold.is_some();
ret ret
} }
@@ -1555,9 +1524,6 @@ pub struct DocumentsFetchAggregator {
// if a filter was used // if a filter was used
per_filter: bool, per_filter: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
// pagination // pagination
#[serde(rename = "pagination.max_limit")] #[serde(rename = "pagination.max_limit")]
max_limit: usize, max_limit: usize,
@@ -1567,21 +1533,18 @@ pub struct DocumentsFetchAggregator {
impl DocumentsFetchAggregator { impl DocumentsFetchAggregator {
pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self { pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
let (limit, offset, retrieve_vectors) = match query { let (limit, offset) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), DocumentFetchKind::PerDocumentId => (1, 0),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
(*limit, *offset, *retrieve_vectors)
}
}; };
Self { Self {
timestamp: Some(OffsetDateTime::now_utc()), timestamp: Some(OffsetDateTime::now_utc()),
user_agents: extract_user_agents(request).into_iter().collect(), user_agents: extract_user_agents(request).into_iter().collect(),
total_received: 1, total_received: 1,
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit, max_limit: limit,
max_offset: offset, max_offset: offset,
retrieve_vectors,
} }
} }
@@ -1595,7 +1558,6 @@ impl DocumentsFetchAggregator {
per_filter, per_filter,
max_limit, max_limit,
max_offset, max_offset,
retrieve_vectors,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@@ -1611,8 +1573,6 @@ impl DocumentsFetchAggregator {
self.max_limit = self.max_limit.max(max_limit); self.max_limit = self.max_limit.max(max_limit);
self.max_offset = self.max_offset.max(max_offset); self.max_offset = self.max_offset.max(max_offset);
self.retrieve_vectors |= retrieve_vectors;
} }
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -1653,7 +1613,6 @@ pub struct SimilarAggregator {
// Whether a non-default embedder was specified // Whether a non-default embedder was specified
embedder: bool, embedder: bool,
retrieve_vectors: bool,
// pagination // pagination
max_limit: usize, max_limit: usize,
@@ -1665,7 +1624,6 @@ pub struct SimilarAggregator {
// scoring // scoring
show_ranking_score: bool, show_ranking_score: bool,
show_ranking_score_details: bool, show_ranking_score_details: bool,
ranking_score_threshold: bool,
} }
impl SimilarAggregator { impl SimilarAggregator {
@@ -1677,11 +1635,9 @@ impl SimilarAggregator {
offset, offset,
limit, limit,
attributes_to_retrieve: _, attributes_to_retrieve: _,
retrieve_vectors,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
filter, filter,
ranking_score_threshold,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -1719,10 +1675,8 @@ impl SimilarAggregator {
ret.show_ranking_score = *show_ranking_score; ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details; ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
ret.embedder = embedder.is_some(); ret.embedder = embedder.is_some();
ret.retrieve_vectors = *retrieve_vectors;
ret ret
} }
@@ -1754,8 +1708,6 @@ impl SimilarAggregator {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder, embedder,
ranking_score_threshold,
retrieve_vectors,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@@ -1785,7 +1737,6 @@ impl SimilarAggregator {
} }
self.embedder |= embedder; self.embedder |= embedder;
self.retrieve_vectors |= retrieve_vectors;
// pagination // pagination
self.max_limit = self.max_limit.max(max_limit); self.max_limit = self.max_limit.max(max_limit);
@@ -1798,7 +1749,6 @@ impl SimilarAggregator {
// scoring // scoring
self.show_ranking_score |= show_ranking_score; self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details; self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
} }
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -1819,8 +1769,6 @@ impl SimilarAggregator {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder, embedder,
ranking_score_threshold,
retrieve_vectors,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@@ -1847,9 +1795,6 @@ impl SimilarAggregator {
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
}, },
"vector": {
"retrieve_vectors": retrieve_vectors,
},
"hybrid": { "hybrid": {
"embedder": embedder, "embedder": embedder,
}, },
@@ -1863,7 +1808,6 @@ impl SimilarAggregator {
"scoring": { "scoring": {
"show_ranking_score": show_ranking_score, "show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details, "show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
}, },
}); });

View File

@@ -98,29 +98,14 @@ impl From<MeilisearchHttpError> for aweb::Error {
impl From<aweb::error::PayloadError> for MeilisearchHttpError { impl From<aweb::error::PayloadError> for MeilisearchHttpError {
fn from(error: aweb::error::PayloadError) -> Self { fn from(error: aweb::error::PayloadError) -> Self {
match error { MeilisearchHttpError::Payload(PayloadError::Payload(error))
aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
PayloadError::Payload(ActixPayloadError::IncompleteError),
),
_ => MeilisearchHttpError::Payload(PayloadError::Payload(
ActixPayloadError::OtherError(error),
)),
}
} }
} }
#[derive(Debug, thiserror::Error)]
pub enum ActixPayloadError {
#[error("The provided payload is incomplete and cannot be parsed")]
IncompleteError,
#[error(transparent)]
OtherError(aweb::error::PayloadError),
}
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub enum PayloadError { pub enum PayloadError {
#[error(transparent)] #[error(transparent)]
Payload(ActixPayloadError), Payload(aweb::error::PayloadError),
#[error(transparent)] #[error(transparent)]
Json(JsonPayloadError), Json(JsonPayloadError),
#[error(transparent)] #[error(transparent)]
@@ -137,15 +122,13 @@ impl ErrorCode for PayloadError {
fn error_code(&self) -> Code { fn error_code(&self) -> Code {
match self { match self {
PayloadError::Payload(e) => match e { PayloadError::Payload(e) => match e {
ActixPayloadError::IncompleteError => Code::BadRequest, aweb::error::PayloadError::Incomplete(_) => Code::Internal,
ActixPayloadError::OtherError(error) => match error { aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::EncodingCorrupted => Code::Internal, aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge, aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::UnknownLength => Code::Internal, aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal, aweb::error::PayloadError::Io(_) => Code::Internal,
aweb::error::PayloadError::Io(_) => Code::Internal, _ => todo!(),
_ => todo!(),
},
}, },
PayloadError::Json(err) => match err { PayloadError::Json(err) => match err {
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge, JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,

View File

@@ -12,8 +12,6 @@ use futures::Future;
use meilisearch_auth::{AuthController, AuthFilter}; use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::error::{Code, ResponseError};
use self::policies::AuthError;
pub struct GuardedData<P, D> { pub struct GuardedData<P, D> {
data: D, data: D,
filters: AuthFilter, filters: AuthFilter,
@@ -37,12 +35,12 @@ impl<P, D> GuardedData<P, D> {
let missing_master_key = auth.get_master_key().is_none(); let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, token, index).await? { match Self::authenticate(auth, token, index).await? {
Ok(filters) => match data { Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }), Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()), None => Err(AuthenticationError::IrretrievableState.into()),
}, },
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)), None => Err(AuthenticationError::InvalidToken.into()),
} }
} }
@@ -53,12 +51,12 @@ impl<P, D> GuardedData<P, D> {
let missing_master_key = auth.get_master_key().is_none(); let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, String::new(), None).await? { match Self::authenticate(auth, String::new(), None).await? {
Ok(filters) => match data { Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }), Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()), None => Err(AuthenticationError::IrretrievableState.into()),
}, },
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()), None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
} }
} }
@@ -66,7 +64,7 @@ impl<P, D> GuardedData<P, D> {
auth: Data<AuthController>, auth: Data<AuthController>,
token: String, token: String,
index: Option<String>, index: Option<String>,
) -> Result<Result<AuthFilter, AuthError>, ResponseError> ) -> Result<Option<AuthFilter>, ResponseError>
where where
P: Policy + 'static, P: Policy + 'static,
{ {
@@ -129,14 +127,13 @@ pub trait Policy {
auth: Data<AuthController>, auth: Data<AuthController>,
token: &str, token: &str,
index: Option<&str>, index: Option<&str>,
) -> Result<AuthFilter, policies::AuthError>; ) -> Option<AuthFilter>;
} }
pub mod policies { pub mod policies {
use actix_web::web::Data; use actix_web::web::Data;
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation}; use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
use meilisearch_auth::{AuthController, AuthFilter, SearchRules}; use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
use meilisearch_types::error::{Code, ErrorCode};
// reexport actions in policies in order to be used in routes configuration. // reexport actions in policies in order to be used in routes configuration.
pub use meilisearch_types::keys::{actions, Action}; pub use meilisearch_types::keys::{actions, Action};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -147,53 +144,11 @@ pub mod policies {
enum TenantTokenOutcome { enum TenantTokenOutcome {
NotATenantToken, NotATenantToken,
Invalid,
Expired,
Valid(Uuid, SearchRules), Valid(Uuid, SearchRules),
} }
#[derive(thiserror::Error, Debug)]
pub enum AuthError {
#[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
ExpiredTenantToken { exp: i64, now: i64 },
#[error("The provided API key is invalid.")]
InvalidApiKey,
#[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error(
"The API key used to generate this tenant token cannot acces the index `{index}`."
)]
TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
#[error(
"The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
)]
ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error("The provided tenant token is invalid.")]
InvalidTenantToken,
#[error("Could not decode tenant token, {0}.")]
CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
#[error("Invalid action `{0}`.")]
InternalInvalidAction(u8),
}
impl From<jsonwebtoken::errors::Error> for AuthError {
fn from(error: jsonwebtoken::errors::Error) -> Self {
use jsonwebtoken::errors::ErrorKind;
match error.kind() {
ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
_ => AuthError::CouldNotDecodeTenantToken(error),
}
}
}
impl ErrorCode for AuthError {
fn error_code(&self) -> Code {
match self {
AuthError::InternalInvalidAction(_) => Code::Internal,
_ => Code::InvalidApiKey,
}
}
}
fn tenant_token_validation() -> Validation { fn tenant_token_validation() -> Validation {
let mut validation = Validation::default(); let mut validation = Validation::default();
validation.validate_exp = false; validation.validate_exp = false;
@@ -203,15 +158,15 @@ pub mod policies {
} }
/// Extracts the key id used to sign the payload, without performing any validation. /// Extracts the key id used to sign the payload, without performing any validation.
fn extract_key_id(token: &str) -> Result<Uuid, AuthError> { fn extract_key_id(token: &str) -> Option<Uuid> {
let mut validation = tenant_token_validation(); let mut validation = tenant_token_validation();
validation.insecure_disable_signature_validation(); validation.insecure_disable_signature_validation();
let dummy_key = DecodingKey::from_secret(b"secret"); let dummy_key = DecodingKey::from_secret(b"secret");
let token_data = decode::<Claims>(token, &dummy_key, &validation)?; let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
// get token fields without validating it. // get token fields without validating it.
let Claims { api_key_uid, .. } = token_data.claims; let Claims { api_key_uid, .. } = token_data.claims;
Ok(api_key_uid) Some(api_key_uid)
} }
fn is_keys_action(action: u8) -> bool { fn is_keys_action(action: u8) -> bool {
@@ -232,102 +187,76 @@ pub mod policies {
auth: Data<AuthController>, auth: Data<AuthController>,
token: &str, token: &str,
index: Option<&str>, index: Option<&str>,
) -> Result<AuthFilter, AuthError> { ) -> Option<AuthFilter> {
// authenticate if token is the master key. // authenticate if token is the master key.
// Without a master key, all routes are accessible except the key-related routes. // Without a master key, all routes are accessible except the key-related routes.
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) { if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
return Ok(AuthFilter::default()); return Some(AuthFilter::default());
} }
let (key_uuid, search_rules) = let (key_uuid, search_rules) =
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) { match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => { TenantTokenOutcome::Valid(key_uuid, search_rules) => {
(key_uuid, Some(search_rules)) (key_uuid, Some(search_rules))
} }
Ok(TenantTokenOutcome::NotATenantToken) TenantTokenOutcome::Expired => return None,
| Err(AuthError::InvalidTenantToken) => ( TenantTokenOutcome::Invalid => return None,
auth.get_optional_uid_from_encoded_key(token.as_bytes()) TenantTokenOutcome::NotATenantToken => {
.map_err(|_e| AuthError::InvalidApiKey)? (auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
.ok_or(AuthError::InvalidApiKey)?, }
None,
),
Err(e) => return Err(e),
}; };
// check that the indexes are allowed // check that the indexes are allowed
let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?; let action = Action::from_repr(A)?;
let auth_filter = auth let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
.get_key_filters(key_uuid, search_rules) if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
.map_err(|_e| AuthError::InvalidApiKey)?; && index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
{
// First check if the index is authorized in the tenant token, this is a public return Some(auth_filter);
// information, we can return a nice error message.
if let Some(index) = index {
if !auth_filter.tenant_token_is_index_authorized(index) {
return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.tenant_token_list_index_authorized(),
});
}
if !auth_filter.api_key_is_index_authorized(index) {
if auth_filter.is_tenant_token() {
// If the error comes from a tenant token we cannot share the list
// of authorized indexes in the API key. This is not public information.
return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
});
} else {
// Otherwise we can share the list
// of authorized indexes in the API key.
return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.api_key_list_index_authorized(),
});
}
}
}
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
return Ok(auth_filter);
} }
Err(AuthError::InvalidApiKey) None
} }
} }
impl<const A: u8> ActionPolicy<A> { impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token( fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
auth: &AuthController,
token: &str,
) -> Result<TenantTokenOutcome, AuthError> {
// Only search action can be accessed by a tenant token. // Only search action can be accessed by a tenant token.
if A != actions::SEARCH { if A != actions::SEARCH {
return Ok(TenantTokenOutcome::NotATenantToken); return TenantTokenOutcome::NotATenantToken;
} }
let uid = extract_key_id(token)?; let uid = if let Some(uid) = extract_key_id(token) {
uid
} else {
return TenantTokenOutcome::NotATenantToken;
};
// Check if tenant token is valid. // Check if tenant token is valid.
let key = if let Some(key) = auth.generate_key(uid) { let key = if let Some(key) = auth.generate_key(uid) {
key key
} else { } else {
return Err(AuthError::InvalidTenantToken); return TenantTokenOutcome::Invalid;
}; };
let data = decode::<Claims>( let data = if let Ok(data) = decode::<Claims>(
token, token,
&DecodingKey::from_secret(key.as_bytes()), &DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(), &tenant_token_validation(),
)?; ) {
data
} else {
return TenantTokenOutcome::Invalid;
};
// Check if token is expired. // Check if token is expired.
if let Some(exp) = data.claims.exp { if let Some(exp) = data.claims.exp {
let now = OffsetDateTime::now_utc().unix_timestamp(); if OffsetDateTime::now_utc().unix_timestamp() > exp {
if now > exp { return TenantTokenOutcome::Expired;
return Err(AuthError::ExpiredTenantToken { exp, now });
} }
} }
Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules)) TenantTokenOutcome::Valid(uid, data.claims.search_rules)
} }
} }

View File

@@ -16,7 +16,6 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn; use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId; use meilisearch_types::milli::DocumentId;
use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
@@ -40,7 +39,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{ use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
}; };
use crate::search::{parse_filter, RetrieveVectors}; use crate::search::parse_filter;
use crate::Opt; use crate::Opt;
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| { static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
@@ -95,8 +94,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub struct GetDocument { pub struct GetDocument {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
fields: OptionStarOrList<String>, fields: OptionStarOrList<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: Param<bool>,
} }
pub async fn get_document( pub async fn get_document(
@@ -110,20 +107,13 @@ pub async fn get_document(
debug!(parameters = ?params, "Get document"); debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?; let index_uid = IndexUid::try_from(index_uid)?;
let GetDocument { fields, retrieve_vectors: param_retrieve_vectors } = params.into_inner(); analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.merge_star_and_none(); let attributes_to_retrieve = fields.merge_star_and_none();
let features = index_scheduler.features();
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
analytics.get_fetch_documents(
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
&req,
);
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let document = let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors)?;
debug!(returns = ?document, "Get document"); debug!(returns = ?document, "Get document");
Ok(HttpResponse::Ok().json(document)) Ok(HttpResponse::Ok().json(document))
} }
@@ -163,8 +153,6 @@ pub struct BrowseQueryGet {
limit: Param<usize>, limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
fields: OptionStarOrList<String>, fields: OptionStarOrList<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>, filter: Option<String>,
} }
@@ -178,8 +166,6 @@ pub struct BrowseQuery {
limit: usize, limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
fields: Option<Vec<String>>, fields: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>, filter: Option<Value>,
} }
@@ -199,7 +185,6 @@ pub async fn documents_by_query_post(
with_filter: body.filter.is_some(), with_filter: body.filter.is_some(),
limit: body.limit, limit: body.limit,
offset: body.offset, offset: body.offset,
retrieve_vectors: body.retrieve_vectors,
}, },
&req, &req,
); );
@@ -216,7 +201,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET"); debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner(); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
let filter = match filter { let filter = match filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
@@ -230,7 +215,6 @@ pub async fn get_documents(
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
fields: fields.merge_star_and_none(), fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0,
filter, filter,
}; };
@@ -239,7 +223,6 @@ pub async fn get_documents(
with_filter: query.filter.is_some(), with_filter: query.filter.is_some(),
limit: query.limit, limit: query.limit,
offset: query.offset, offset: query.offset,
retrieve_vectors: query.retrieve_vectors,
}, },
&req, &req,
); );
@@ -253,14 +236,10 @@ fn documents_by_query(
query: BrowseQuery, query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query; let BrowseQuery { offset, limit, fields, filter } = query;
let features = index_scheduler.features();
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let (total, documents) = let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
let ret = PaginationView::new(offset, limit, total as usize, documents); let ret = PaginationView::new(offset, limit, total as usize, documents);
@@ -600,44 +579,13 @@ fn some_documents<'a, 't: 'a>(
index: &'a Index, index: &'a Index,
rtxn: &'t RoTxn, rtxn: &'t RoTxn,
doc_ids: impl IntoIterator<Item = DocumentId> + 'a, doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
retrieve_vectors: RetrieveVectors,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> { ) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(rtxn)?;
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| { Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> { ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?; Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
match retrieve_vectors {
RetrieveVectors::Ignore => {}
RetrieveVectors::Hide => {
document.remove("_vectors");
}
RetrieveVectors::Retrieve => {
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, key)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(key));
let embeddings = ExplicitVectors {
embeddings: Some(vector.into()),
regenerate: !user_provided,
};
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
);
}
document.insert("_vectors".into(), vectors.into());
}
}
Ok(document)
}) })
})) }))
} }
@@ -648,7 +596,6 @@ fn retrieve_documents<S: AsRef<str>>(
limit: usize, limit: usize,
filter: Option<Value>, filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>, attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
) -> Result<(u64, Vec<Document>), ResponseError> { ) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let filter = &filter; let filter = &filter;
@@ -673,57 +620,53 @@ fn retrieve_documents<S: AsRef<str>>(
let (it, number_of_documents) = { let (it, number_of_documents) = {
let number_of_documents = candidates.len(); let number_of_documents = candidates.len();
( (
some_documents( some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
index,
&rtxn,
candidates.into_iter().skip(offset).take(limit),
retrieve_vectors,
)?,
number_of_documents, number_of_documents,
) )
}; };
let documents: Vec<_> = it let documents: Result<Vec<_>, ResponseError> = it
.map(|document| { .map(|document| {
Ok(match &attributes_to_retrieve { Ok(match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values( Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?, &document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()).chain( attributes_to_retrieve.iter().map(|s| s.as_ref()),
(retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"),
),
), ),
None => document?, None => document?,
}) })
}) })
.collect::<Result<_, ResponseError>>()?; .collect();
Ok((number_of_documents, documents)) Ok((number_of_documents, documents?))
} }
fn retrieve_document<S: AsRef<str>>( fn retrieve_document<S: AsRef<str>>(
index: &Index, index: &Index,
doc_id: &str, doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>, attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
) -> Result<Document, ResponseError> { ) -> Result<Document, ResponseError> {
let txn = index.read_txn()?; let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index let internal_id = index
.external_documents_ids() .external_documents_ids()
.get(&txn, doc_id)? .get(&txn, doc_id)?
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = some_documents(index, &txn, Some(internal_id), retrieve_vectors)? let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next() .next()
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))??; .map(|(_, d)| d)
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve { let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values( Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document, &document,
attributes_to_retrieve attributes_to_retrieve.iter().map(|s| s.as_ref()),
.iter()
.map(|s| s.as_ref())
.chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")),
), ),
None => document, None => document,
}; };

View File

@@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::routes::indexes::search::search_kind; use crate::routes::indexes::search::search_kind;
use crate::search::{ use crate::search::{
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@@ -46,8 +46,6 @@ pub struct FacetSearchQuery {
pub matching_strategy: MatchingStrategy, pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
} }
pub async fn search( pub async fn search(
@@ -105,7 +103,6 @@ impl From<FacetSearchQuery> for SearchQuery {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = value; } = value;
SearchQuery { SearchQuery {
@@ -115,7 +112,6 @@ impl From<FacetSearchQuery> for SearchQuery {
page: None, page: None,
hits_per_page: None, hits_per_page: None,
attributes_to_retrieve: None, attributes_to_retrieve: None,
retrieve_vectors: false,
attributes_to_crop: None, attributes_to_crop: None,
crop_length: DEFAULT_CROP_LENGTH(), crop_length: DEFAULT_CROP_LENGTH(),
attributes_to_highlight: None, attributes_to_highlight: None,
@@ -124,7 +120,6 @@ impl From<FacetSearchQuery> for SearchQuery {
show_ranking_score_details: false, show_ranking_score_details: false,
filter, filter,
sort: None, sort: None,
distinct: None,
facets: None, facets: None,
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
@@ -133,7 +128,6 @@ impl From<FacetSearchQuery> for SearchQuery {
vector, vector,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} }
} }
} }

View File

@@ -19,10 +19,9 @@ use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::search::{ use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@@ -51,8 +50,6 @@ pub struct SearchQueryGet {
hits_per_page: Option<Param<usize>>, hits_per_page: Option<Param<usize>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
attributes_to_retrieve: Option<CS<String>>, attributes_to_retrieve: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
attributes_to_crop: Option<CS<String>>, attributes_to_crop: Option<CS<String>>,
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)] #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
@@ -63,8 +60,6 @@ pub struct SearchQueryGet {
filter: Option<String>, filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
sort: Option<String>, sort: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
distinct: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>, show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
@@ -87,21 +82,6 @@ pub struct SearchQueryGet {
pub hybrid_embedder: Option<String>, pub hybrid_embedder: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
pub hybrid_semantic_ratio: Option<SemanticRatioGet>, pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
}
} }
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)] #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
@@ -157,13 +137,11 @@ impl From<SearchQueryGet> for SearchQuery {
page: other.page.as_deref().copied(), page: other.page.as_deref().copied(),
hits_per_page: other.hits_per_page.as_deref().copied(), hits_per_page: other.hits_per_page.as_deref().copied(),
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()), attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
retrieve_vectors: other.retrieve_vectors.0,
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()), attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
crop_length: other.crop_length.0, crop_length: other.crop_length.0,
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter, filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
distinct: other.distinct,
show_matches_position: other.show_matches_position.0, show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0, show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0, show_ranking_score_details: other.show_ranking_score_details.0,
@@ -174,7 +152,6 @@ impl From<SearchQueryGet> for SearchQuery {
matching_strategy: other.matching_strategy, matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
hybrid, hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
} }
} }
} }
@@ -228,12 +205,10 @@ pub async fn search_with_url_query(
let features = index_scheduler.features(); let features = index_scheduler.features();
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?; let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || { let search_result =
perform_search(&index, query, search_kind, retrieve_vector) tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
})
.await?;
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
} }
@@ -270,13 +245,10 @@ pub async fn search_with_post(
let features = index_scheduler.features(); let features = index_scheduler.features();
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?; let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || { let search_result =
perform_search(&index, query, search_kind, retrieve_vectors) tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
})
.await?;
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
if search_result.degraded { if search_result.degraded {
@@ -298,10 +270,11 @@ pub fn search_kind(
features: RoFeatures, features: RoFeatures,
) -> Result<SearchKind, ResponseError> { ) -> Result<SearchKind, ResponseError> {
if query.vector.is_some() { if query.vector.is_some() {
features.check_vector("Passing `vector` as a parameter")?; features.check_vector("Passing `vector` as a query parameter")?;
} }
if query.hybrid.is_some() { if query.hybrid.is_some() {
features.check_vector("Passing `hybrid` as a parameter")?; features.check_vector("Passing `hybrid` as a query parameter")?;
} }
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing

View File

@@ -4,7 +4,11 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::{
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
InvalidSimilarShowRankingScoreDetails,
};
use meilisearch_types::error::{ErrorCode as _, ResponseError}; use meilisearch_types::error::{ErrorCode as _, ResponseError};
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
@@ -17,8 +21,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{ use crate::search::{
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind, add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
}; };
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -38,7 +42,9 @@ pub async fn similar_get(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.try_into()?; let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
ResponseError::from_msg(code.to_string(), code.error_code())
})?;
let mut aggregate = SimilarAggregator::from_query(&query, &req); let mut aggregate = SimilarAggregator::from_query(&query, &req);
@@ -93,8 +99,6 @@ async fn similar(
features.check_vector("Using the similar API")?; features.check_vector("Using the similar API")?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
// Tenant token search_rules. // Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules); add_search_rules(&mut query.filter, search_rules);
@@ -105,10 +109,8 @@ async fn similar(
let (embedder_name, embedder) = let (embedder_name, embedder) =
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || perform_similar(&index, query, embedder_name, embedder))
perform_similar(&index, query, embedder_name, embedder, retrieve_vectors) .await?
})
.await?
} }
#[derive(Debug, deserr::Deserr)] #[derive(Debug, deserr::Deserr)]
@@ -122,35 +124,18 @@ pub struct SimilarQueryGet {
limit: Param<usize>, limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)] #[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
attributes_to_retrieve: Option<CS<String>>, attributes_to_retrieve: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)] #[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
filter: Option<String>, filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)] #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
show_ranking_score: Param<bool>, show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)] #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>, show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)] #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: Option<String>, pub embedder: Option<String>,
} }
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
}
}
impl TryFrom<SimilarQueryGet> for SimilarQuery { impl TryFrom<SimilarQueryGet> for SimilarQuery {
type Error = ResponseError; type Error = InvalidSimilarId;
fn try_from( fn try_from(
SimilarQueryGet { SimilarQueryGet {
@@ -158,12 +143,10 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
offset, offset,
limit, limit,
attributes_to_retrieve, attributes_to_retrieve,
retrieve_vectors,
filter, filter,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder, embedder,
ranking_score_threshold,
}: SimilarQueryGet, }: SimilarQueryGet,
) -> Result<Self, Self::Error> { ) -> Result<Self, Self::Error> {
let filter = match filter { let filter = match filter {
@@ -175,18 +158,14 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
}; };
Ok(SimilarQuery { Ok(SimilarQuery {
id: id.0.try_into().map_err(|code: InvalidSimilarId| { id: id.0.try_into()?,
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
filter, filter,
embedder, embedder,
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()), attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
retrieve_vectors: retrieve_vectors.0,
show_ranking_score: show_ranking_score.0, show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0, show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
}) })
} }
} }

View File

@@ -15,7 +15,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::search_kind; use crate::routes::indexes::search::search_kind;
use crate::search::{ use crate::search::{
add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@@ -83,14 +83,11 @@ pub async fn multi_search_with_post(
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features) let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
.with_index(query_index)?; .with_index(query_index)?;
let retrieve_vector =
RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || { let search_result =
perform_search(&index, query, search_kind, retrieve_vector) tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind))
}) .await
.await .with_index(query_index)?;
.with_index(query_index)?;
search_results.push(SearchResultWithIndex { search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(), index_uid: index_uid.into_inner(),

View File

@@ -15,7 +15,6 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn; use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder; use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
@@ -60,8 +59,6 @@ pub struct SearchQuery {
pub hits_per_page: Option<usize>, pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>, pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
pub retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>, pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())] #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
@@ -78,8 +75,6 @@ pub struct SearchQuery {
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>, pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>, pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@@ -92,44 +87,6 @@ pub struct SearchQuery {
pub matching_strategy: MatchingStrategy, pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThreshold(f64);
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSearchRankingScoreThreshold)
} else {
Ok(RankingScoreThreshold(f))
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdSimilar(f64);
impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSimilarRankingScoreThreshold)
} else {
Ok(Self(f))
}
}
} }
// Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum. // Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
@@ -146,7 +103,6 @@ impl fmt::Debug for SearchQuery {
page, page,
hits_per_page, hits_per_page,
attributes_to_retrieve, attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop, attributes_to_crop,
crop_length, crop_length,
attributes_to_highlight, attributes_to_highlight,
@@ -155,14 +111,12 @@ impl fmt::Debug for SearchQuery {
show_ranking_score_details, show_ranking_score_details,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
crop_marker, crop_marker,
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
ranking_score_threshold,
} = self; } = self;
let mut debug = f.debug_struct("SearchQuery"); let mut debug = f.debug_struct("SearchQuery");
@@ -180,9 +134,6 @@ impl fmt::Debug for SearchQuery {
if let Some(q) = q { if let Some(q) = q {
debug.field("q", &q); debug.field("q", &q);
} }
if *retrieve_vectors {
debug.field("retrieve_vectors", &retrieve_vectors);
}
if let Some(v) = vector { if let Some(v) = vector {
if v.len() < 10 { if v.len() < 10 {
debug.field("vector", &v); debug.field("vector", &v);
@@ -205,9 +156,6 @@ impl fmt::Debug for SearchQuery {
if let Some(sort) = sort { if let Some(sort) = sort {
debug.field("sort", &sort); debug.field("sort", &sort);
} }
if let Some(distinct) = distinct {
debug.field("distinct", &distinct);
}
if let Some(facets) = facets { if let Some(facets) = facets {
debug.field("facets", &facets); debug.field("facets", &facets);
} }
@@ -240,9 +188,6 @@ impl fmt::Debug for SearchQuery {
debug.field("highlight_pre_tag", &highlight_pre_tag); debug.field("highlight_pre_tag", &highlight_pre_tag);
debug.field("highlight_post_tag", &highlight_post_tag); debug.field("highlight_post_tag", &highlight_post_tag);
debug.field("crop_marker", &crop_marker); debug.field("crop_marker", &crop_marker);
if let Some(ranking_score_threshold) = ranking_score_threshold {
debug.field("ranking_score_threshold", &ranking_score_threshold);
}
debug.finish() debug.finish()
} }
@@ -383,8 +328,6 @@ pub struct SearchQueryWithIndex {
pub hits_per_page: Option<usize>, pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>, pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
pub retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>, pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())] #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
@@ -401,8 +344,6 @@ pub struct SearchQueryWithIndex {
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>, pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>, pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@@ -415,8 +356,6 @@ pub struct SearchQueryWithIndex {
pub matching_strategy: MatchingStrategy, pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
} }
impl SearchQueryWithIndex { impl SearchQueryWithIndex {
@@ -430,7 +369,6 @@ impl SearchQueryWithIndex {
page, page,
hits_per_page, hits_per_page,
attributes_to_retrieve, attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop, attributes_to_crop,
crop_length, crop_length,
attributes_to_highlight, attributes_to_highlight,
@@ -439,7 +377,6 @@ impl SearchQueryWithIndex {
show_matches_position, show_matches_position,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@@ -447,7 +384,6 @@ impl SearchQueryWithIndex {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = self; } = self;
( (
index_uid, index_uid,
@@ -459,7 +395,6 @@ impl SearchQueryWithIndex {
page, page,
hits_per_page, hits_per_page,
attributes_to_retrieve, attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop, attributes_to_crop,
crop_length, crop_length,
attributes_to_highlight, attributes_to_highlight,
@@ -468,7 +403,6 @@ impl SearchQueryWithIndex {
show_matches_position, show_matches_position,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@@ -476,7 +410,6 @@ impl SearchQueryWithIndex {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
// do not use ..Default::default() here, // do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
}, },
@@ -499,14 +432,10 @@ pub struct SimilarQuery {
pub embedder: Option<String>, pub embedder: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)] #[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>, pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
pub retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)] #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)]
pub show_ranking_score: bool, pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)] #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool, pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
} }
#[derive(Debug, Clone, PartialEq, Deserr)] #[derive(Debug, Clone, PartialEq, Deserr)]
@@ -548,8 +477,6 @@ pub enum MatchingStrategy {
Last, Last,
/// All query words are mandatory /// All query words are mandatory
All, All,
/// Remove query words from the most frequent to the least
Frequency,
} }
impl Default for MatchingStrategy { impl Default for MatchingStrategy {
@@ -563,7 +490,6 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
match other { match other {
MatchingStrategy::Last => Self::Last, MatchingStrategy::Last => Self::Last,
MatchingStrategy::All => Self::All, MatchingStrategy::All => Self::All,
MatchingStrategy::Frequency => Self::Frequency,
} }
} }
} }
@@ -735,13 +661,6 @@ fn prepare_search<'t>(
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn); let mut search = index.search(rtxn);
search.time_budget(time_budget); search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
search.ranking_score_threshold(ranking_score_threshold.0);
}
if let Some(distinct) = &query.distinct {
search.distinct(distinct.clone());
}
match search_kind { match search_kind {
SearchKind::KeywordOnly => { SearchKind::KeywordOnly => {
@@ -752,15 +671,10 @@ fn prepare_search<'t>(
SearchKind::SemanticOnly { embedder_name, embedder } => { SearchKind::SemanticOnly { embedder_name, embedder } => {
let vector = match query.vector.clone() { let vector = match query.vector.clone() {
Some(vector) => vector, Some(vector) => vector,
None => { None => embedder
let span = tracing::trace_span!(target: "search::vector", "embed_one"); .embed_one(query.q.clone().unwrap())
let _entered = span.enter(); .map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?,
embedder
.embed_one(query.q.clone().unwrap())
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
}
}; };
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector)); search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
@@ -788,16 +702,11 @@ fn prepare_search<'t>(
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination); search.exhaustive_number_hits(is_finite_pagination);
search.scoring_strategy( search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
if query.show_ranking_score ScoringStrategy::Detailed
|| query.show_ranking_score_details } else {
|| query.ranking_score_threshold.is_some() ScoringStrategy::Skip
{ });
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
},
);
// compute the offset on the limit depending on the pagination mode. // compute the offset on the limit depending on the pagination mode.
let (offset, limit) = if is_finite_pagination { let (offset, limit) = if is_finite_pagination {
@@ -842,7 +751,6 @@ pub fn perform_search(
index: &Index, index: &Index,
query: SearchQuery, query: SearchQuery,
search_kind: SearchKind, search_kind: SearchKind,
retrieve_vectors: RetrieveVectors,
) -> Result<SearchResult, MeilisearchHttpError> { ) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
@@ -876,37 +784,32 @@ pub fn perform_search(
let SearchQuery { let SearchQuery {
q, q,
vector: _,
hybrid: _,
// already computed from prepare_search
offset: _,
limit, limit,
page, page,
hits_per_page, hits_per_page,
attributes_to_retrieve, attributes_to_retrieve,
// use the enum passed as parameter
retrieve_vectors: _,
attributes_to_crop, attributes_to_crop,
crop_length, crop_length,
attributes_to_highlight, attributes_to_highlight,
show_matches_position, show_matches_position,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
filter: _,
sort, sort,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
crop_marker, crop_marker,
// already used in prepare_search
vector: _,
hybrid: _,
offset: _,
ranking_score_threshold: _,
matching_strategy: _, matching_strategy: _,
attributes_to_search_on: _, attributes_to_search_on: _,
filter: _,
distinct: _,
} = query; } = query;
let format = AttributesFormat { let format = AttributesFormat {
attributes_to_retrieve, attributes_to_retrieve,
retrieve_vectors,
attributes_to_highlight, attributes_to_highlight,
attributes_to_crop, attributes_to_crop,
crop_length, crop_length,
@@ -990,7 +893,6 @@ pub fn perform_search(
struct AttributesFormat { struct AttributesFormat {
attributes_to_retrieve: Option<BTreeSet<String>>, attributes_to_retrieve: Option<BTreeSet<String>>,
retrieve_vectors: RetrieveVectors,
attributes_to_highlight: Option<HashSet<String>>, attributes_to_highlight: Option<HashSet<String>>,
attributes_to_crop: Option<Vec<String>>, attributes_to_crop: Option<Vec<String>>,
crop_length: usize, crop_length: usize,
@@ -1003,36 +905,6 @@ struct AttributesFormat {
show_ranking_score_details: bool, show_ranking_score_details: bool,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RetrieveVectors {
/// Do not touch the `_vectors` field
///
/// this is the behavior when the vectorStore feature is disabled
Ignore,
/// Remove the `_vectors` field
///
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `false`
Hide,
/// Retrieve vectors from the DB and merge them into the `_vectors` field
///
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `true`
Retrieve,
}
impl RetrieveVectors {
pub fn new(
retrieve_vector: bool,
features: index_scheduler::RoFeatures,
) -> Result<Self, index_scheduler::Error> {
match (retrieve_vector, features.check_vector("Passing `retrieveVectors` as a parameter")) {
(true, Ok(())) => Ok(Self::Retrieve),
(true, Err(error)) => Err(error),
(false, Ok(())) => Ok(Self::Hide),
(false, Err(_)) => Ok(Self::Ignore),
}
}
}
fn make_hits( fn make_hits(
index: &Index, index: &Index,
rtxn: &RoTxn<'_>, rtxn: &RoTxn<'_>,
@@ -1042,32 +914,10 @@ fn make_hits(
document_scores: Vec<Vec<ScoreDetails>>, document_scores: Vec<Vec<ScoreDetails>>,
) -> Result<Vec<SearchHit>, MeilisearchHttpError> { ) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
let fields_ids_map = index.fields_ids_map(rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
let displayed_ids = let displayed_ids = index
index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>()); .displayed_fields_ids(rtxn)?
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
(None, _) => false,
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
(Some(_), None) => true,
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
};
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
if vectors_is_hidden {
RetrieveVectors::Hide
} else {
RetrieveVectors::Retrieve
}
} else {
format.retrieve_vectors
};
let displayed_ids =
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let fids = |attrs: &BTreeSet<String>| { let fids = |attrs: &BTreeSet<String>| {
let mut ids = BTreeSet::new(); let mut ids = BTreeSet::new();
for attr in attrs { for attr in attrs {
@@ -1090,7 +940,6 @@ fn make_hits(
.intersection(&displayed_ids) .intersection(&displayed_ids)
.cloned() .cloned()
.collect(); .collect();
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
let formatted_options = compute_formatted_options( let formatted_options = compute_formatted_options(
@@ -1124,48 +973,18 @@ fn make_hits(
formatter_builder.highlight_prefix(format.highlight_pre_tag); formatter_builder.highlight_prefix(format.highlight_pre_tag);
formatter_builder.highlight_suffix(format.highlight_post_tag); formatter_builder.highlight_suffix(format.highlight_post_tag);
let mut documents = Vec::new(); let mut documents = Vec::new();
let embedding_configs = index.embedding_configs(rtxn)?;
let documents_iter = index.documents(rtxn, documents_ids)?; let documents_iter = index.documents(rtxn, documents_ids)?;
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) { for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
// First generate a document with all the displayed fields // First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
let add_vectors_fid =
vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve);
// select the attributes to retrieve // select the attributes to retrieve
let attributes_to_retrieve = to_retrieve_ids let attributes_to_retrieve = to_retrieve_ids
.iter() .iter()
// skip the vectors_fid if RetrieveVectors::Hide
.filter(|fid| match vectors_fid {
Some(vectors_fid) => {
!(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
}
None => true,
})
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
.chain(add_vectors_fid.iter())
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); .map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
let mut document = let mut document =
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
if retrieve_vectors == RetrieveVectors::Retrieve {
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, id)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings =
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
vectors.insert(name, serde_json::to_value(embeddings)?);
}
document.insert("_vectors".into(), vectors.into());
}
let (matches_position, formatted) = format_fields( let (matches_position, formatted) = format_fields(
&displayed_document, &displayed_document,
&fields_ids_map, &fields_ids_map,
@@ -1235,7 +1054,6 @@ pub fn perform_similar(
query: SimilarQuery, query: SimilarQuery,
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
retrieve_vectors: RetrieveVectors,
) -> Result<SimilarResult, ResponseError> { ) -> Result<SimilarResult, ResponseError> {
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
@@ -1247,10 +1065,8 @@ pub fn perform_similar(
filter: _, filter: _,
embedder: _, embedder: _,
attributes_to_retrieve, attributes_to_retrieve,
retrieve_vectors: _,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
ranking_score_threshold,
} = query; } = query;
// using let-else rather than `?` so that the borrow checker identifies we're always returning here, // using let-else rather than `?` so that the borrow checker identifies we're always returning here,
@@ -1274,10 +1090,6 @@ pub fn perform_similar(
} }
} }
if let Some(ranking_score_threshold) = ranking_score_threshold {
similar.ranking_score_threshold(ranking_score_threshold.0);
}
let milli::SearchResult { let milli::SearchResult {
documents_ids, documents_ids,
matching_words: _, matching_words: _,
@@ -1294,7 +1106,6 @@ pub fn perform_similar(
let format = AttributesFormat { let format = AttributesFormat {
attributes_to_retrieve, attributes_to_retrieve,
retrieve_vectors,
attributes_to_highlight: None, attributes_to_highlight: None,
attributes_to_crop: None, attributes_to_crop: None,
crop_length: DEFAULT_CROP_LENGTH(), crop_length: DEFAULT_CROP_LENGTH(),
@@ -1336,23 +1147,13 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
// TODO: TAMO: milli encountered an internal error, what do we want to do? // TODO: TAMO: milli encountered an internal error, what do we want to do?
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()]; let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
let geo_point = &document.get("_geo").unwrap_or(&json!(null)); let geo_point = &document.get("_geo").unwrap_or(&json!(null));
if let Some((lat, lng)) = if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"]))
{
let distance = milli::distance_between_two_points(&base, &[lat, lng]); let distance = milli::distance_between_two_points(&base, &[lat, lng]);
document.insert("_geoDistance".to_string(), json!(distance.round() as usize)); document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
} }
} }
} }
fn extract_geo_value(value: &Value) -> Option<f64> {
match value {
Value::Number(n) => n.as_f64(),
Value::String(s) => s.parse().ok(),
_ => None,
}
}
fn compute_formatted_options( fn compute_formatted_options(
attr_to_highlight: &HashSet<String>, attr_to_highlight: &HashSet<String>,
attr_to_crop: &[String], attr_to_crop: &[String],
@@ -1726,54 +1527,4 @@ mod test {
insert_geo_distance(sorters, &mut document); insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), None); assert_eq!(document.get("_geoDistance"), None);
} }
#[test]
fn test_insert_geo_distance_with_coords_as_string() {
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": "50",
"lng": 3
}
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": "50",
"lng": "3"
},
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": 50,
"lng": "3"
},
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
}
} }

View File

@@ -40,9 +40,8 @@ pub struct Permit {
impl Drop for Permit { impl Drop for Permit {
fn drop(&mut self) { fn drop(&mut self) {
let sender = self.sender.clone();
// if the channel is closed then the whole instance is down // if the channel is closed then the whole instance is down
std::mem::drop(tokio::spawn(async move { sender.send(()).await })); let _ = futures::executor::block_on(self.sender.send(()));
} }
} }

View File

@@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
}); });
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| { static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({"message": null, json!({"message": "The provided API key is invalid.",
"code": "invalid_api_key", "code": "invalid_api_key",
"type": "auth", "type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key" "link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -119,8 +119,7 @@ async fn error_access_expired_key() {
thread::sleep(time::Duration::new(1, 0)); thread::sleep(time::Duration::new(1, 0));
for (method, route) in AUTHORIZATIONS.keys() { for (method, route) in AUTHORIZATIONS.keys() {
let (mut response, code) = server.dummy_request(method, route).await; let (response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response); assert_eq!(403, code, "{:?}", &response);
@@ -150,8 +149,7 @@ async fn error_access_unauthorized_index() {
// filter `products` index routes // filter `products` index routes
.filter(|(_, route)| route.starts_with("/indexes/products")) .filter(|(_, route)| route.starts_with("/indexes/products"))
{ {
let (mut response, code) = server.dummy_request(method, route).await; let (response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response); assert_eq!(403, code, "{:?}", &response);
@@ -178,8 +176,7 @@ async fn error_access_unauthorized_action() {
let key = response["key"].as_str().unwrap(); let key = response["key"].as_str().unwrap();
server.use_api_key(key); server.use_api_key(key);
let (mut response, code) = server.dummy_request(method, route).await; let (response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response); assert_eq!(403, code, "{:?}", &response);
@@ -283,7 +280,7 @@ async fn access_authorized_no_index_restriction() {
route, route,
action action
); );
assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action); assert_ne!(code, 403);
} }
} }
} }

View File

@@ -1,10 +1,7 @@
use actix_web::test;
use http::StatusCode;
use jsonwebtoken::{EncodingKey, Header};
use meili_snap::*; use meili_snap::*;
use uuid::Uuid; use uuid::Uuid;
use crate::common::{Server, Value}; use crate::common::Server;
use crate::json; use crate::json;
#[actix_rt::test] #[actix_rt::test]
@@ -439,262 +436,3 @@ async fn patch_api_keys_unknown_field() {
} }
"###); "###);
} }
async fn send_request_with_custom_auth(
app: impl actix_web::dev::Service<
actix_http::Request,
Response = actix_web::dev::ServiceResponse<impl actix_web::body::MessageBody>,
Error = actix_web::Error,
>,
url: &str,
auth: &str,
) -> (Value, StatusCode) {
let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
(response, status_code)
}
#[actix_rt::test]
async fn invalid_auth_format() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"401 Unauthorized");
snapshot!(response, @r###"
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
"###);
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"401 Unauthorized");
snapshot!(response, @r###"
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
"###);
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}
#[actix_rt::test]
async fn invalid_api_key() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let uuid = Uuid::nil();
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
let req = test::TestRequest::post()
.uri("/keys")
.insert_header(("Authorization", "Bearer MASTER_KEY"))
.set_json(&key)
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
{
"name": null,
"description": null,
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
"uid": "00000000-0000-0000-0000-000000000000",
"actions": [
"search"
],
"indexes": [
"dog"
],
"expiresAt": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
let key = response["key"].as_str().unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}
#[actix_rt::test]
async fn invalid_tenant_token() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
// The tenant token won't be recognized at all if we're not on a search route
let claims = json!({ "tamo": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let claims = json!({ "tamo": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// The error messages are not ideal but that's expected since we cannot _yet_ use deserr
let claims = json!({ "searchRules": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let uuid = Uuid::nil();
let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, InvalidSignature.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// ~~ For the next tests we first need a valid API key
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
let req = test::TestRequest::post()
.uri("/keys")
.insert_header(("Authorization", "Bearer MASTER_KEY"))
.set_json(&key)
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
{
"name": null,
"description": null,
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
"uid": "00000000-0000-0000-0000-000000000000",
"actions": [
"search"
],
"indexes": [
"dog"
],
"expiresAt": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
let key = response["key"].as_str().unwrap();
let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() });
let jwt = jsonwebtoken::encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(key.as_bytes()),
)
.unwrap();
// Try to access an index that is not authorized by the tenant token
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The API key used to generate this tenant token cannot acces the index `doggo`.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}

View File

@@ -53,8 +53,7 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
}); });
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| { static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({ json!({"message": "The provided API key is invalid.",
"message": null,
"code": "invalid_api_key", "code": "invalid_api_key",
"type": "auth", "type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key" "link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -192,9 +191,7 @@ macro_rules! compute_forbidden_search {
server.use_api_key(&web_token); server.use_api_key(&web_token);
let index = server.index("sales"); let index = server.index("sales");
index index
.search(json!({}), |mut response, code| { .search(json!({}), |response, code| {
// We don't assert anything on the message since it may change between cases
response["message"] = serde_json::json!(null);
assert_eq!( assert_eq!(
response, response,
INVALID_RESPONSE.clone(), INVALID_RESPONSE.clone(),
@@ -498,8 +495,7 @@ async fn error_access_forbidden_routes() {
for ((method, route), actions) in AUTHORIZATIONS.iter() { for ((method, route), actions) in AUTHORIZATIONS.iter() {
if !actions.contains("search") { if !actions.contains("search") {
let (mut response, code) = server.dummy_request(method, route).await; let (response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403); assert_eq!(code, 403);
} }
@@ -533,16 +529,14 @@ async fn error_access_expired_parent_key() {
server.use_api_key(&web_token); server.use_api_key(&web_token);
// test search request while parent_key is not expired // test search request while parent_key is not expired
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
assert_ne!(response, INVALID_RESPONSE.clone()); assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403); assert_ne!(code, 403);
// wait until the key is expired. // wait until the key is expired.
thread::sleep(time::Duration::new(1, 0)); thread::sleep(time::Duration::new(1, 0));
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403); assert_eq!(code, 403);
} }
@@ -591,8 +585,7 @@ async fn error_access_modified_token() {
.join("."); .join(".");
server.use_api_key(&altered_token); server.use_api_key(&altered_token);
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403); assert_eq!(code, 403);
} }

View File

@@ -109,11 +109,9 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
fn invalid_response(query_index: Option<usize>) -> Value { fn invalid_response(query_index: Option<usize>) -> Value {
let message = if let Some(query_index) = query_index { let message = if let Some(query_index) = query_index {
json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")) format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")
} else { } else {
// if it's anything else we simply return null and will tests all the "The provided API key is invalid.".to_string()
// error messages somewhere else
json!(null)
}; };
json!({"message": message, json!({"message": message,
"code": "invalid_api_key", "code": "invalid_api_key",
@@ -416,10 +414,7 @@ macro_rules! compute_forbidden_single_search {
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) { for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone()); let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token); server.use_api_key(&web_token);
let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await; let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
assert_eq!( assert_eq!(
response, response,
invalid_response(failed_query_index), invalid_response(failed_query_index),
@@ -474,13 +469,10 @@ macro_rules! compute_forbidden_multiple_search {
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) { for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone()); let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token); server.use_api_key(&web_token);
let (mut response, code) = server.multi_search(json!({"queries" : [ let (response, code) = server.multi_search(json!({"queries" : [
{"indexUid": "sales"}, {"indexUid": "sales"},
{"indexUid": "products"}, {"indexUid": "products"},
]})).await; ]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
assert_eq!( assert_eq!(
response, response,
invalid_response(failed_query_index), invalid_response(failed_query_index),
@@ -1081,20 +1073,18 @@ async fn error_access_expired_parent_key() {
server.use_api_key(&web_token); server.use_api_key(&web_token);
// test search request while parent_key is not expired // test search request while parent_key is not expired
let (mut response, code) = server let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]})) .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await; .await;
response["message"] = serde_json::json!(null);
assert_ne!(response, invalid_response(None)); assert_ne!(response, invalid_response(None));
assert_ne!(code, 403); assert_ne!(code, 403);
// wait until the key is expired. // wait until the key is expired.
thread::sleep(time::Duration::new(1, 0)); thread::sleep(time::Duration::new(1, 0));
let (mut response, code) = server let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]})) .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await; .await;
response["message"] = serde_json::json!(null);
assert_eq!(response, invalid_response(None)); assert_eq!(response, invalid_response(None));
assert_eq!(code, 403); assert_eq!(code, 403);
} }
@@ -1144,9 +1134,8 @@ async fn error_access_modified_token() {
.join("."); .join(".");
server.use_api_key(&altered_token); server.use_api_key(&altered_token);
let (mut response, code) = let (response, code) =
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await; server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
response["message"] = serde_json::json!(null);
assert_eq!(response, invalid_response(None)); assert_eq!(response, invalid_response(None));
assert_eq!(code, 403); assert_eq!(code, 403);
} }

View File

@@ -182,10 +182,14 @@ impl Index<'_> {
self.service.get(url).await self.service.get(url).await
} }
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) { pub async fn get_document(
&self,
id: u64,
options: Option<GetDocumentOptions>,
) -> (Value, StatusCode) {
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id); let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
if let Some(options) = options { if let Some(fields) = options.and_then(|o| o.fields) {
write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap(); let _ = write!(url, "?fields={}", fields.join(","));
} }
self.service.get(url).await self.service.get(url).await
} }
@@ -201,11 +205,18 @@ impl Index<'_> {
} }
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) { pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
let url = format!( let mut url = format!("/indexes/{}/documents?", urlencode(self.uid.as_ref()));
"/indexes/{}/documents{}", if let Some(limit) = options.limit {
urlencode(self.uid.as_ref()), let _ = write!(url, "limit={}&", limit);
yaup::to_string(&options).unwrap() }
);
if let Some(offset) = options.offset {
let _ = write!(url, "offset={}&", offset);
}
if let Some(attributes_to_retrieve) = options.attributes_to_retrieve {
let _ = write!(url, "fields={}&", attributes_to_retrieve.join(","));
}
self.service.get(url).await self.service.get(url).await
} }
@@ -365,7 +376,7 @@ impl Index<'_> {
} }
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) { pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query); let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await self.service.get(url).await
} }
@@ -402,7 +413,7 @@ impl Index<'_> {
} }
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) { pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query); let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await self.service.get(url).await
} }
@@ -424,14 +435,13 @@ impl Index<'_> {
} }
} }
#[derive(Debug, Default, serde::Serialize)] pub struct GetDocumentOptions {
#[serde(rename_all = "camelCase")]
pub struct GetAllDocumentsOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub limit: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub offset: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Vec<&'static str>>, pub fields: Option<Vec<&'static str>>,
pub retrieve_vectors: bool, }
#[derive(Debug, Default)]
pub struct GetAllDocumentsOptions {
pub limit: Option<usize>,
pub offset: Option<usize>,
pub attributes_to_retrieve: Option<Vec<&'static str>>,
} }

View File

@@ -6,7 +6,7 @@ pub mod service;
use std::fmt::{self, Display}; use std::fmt::{self, Display};
#[allow(unused)] #[allow(unused)]
pub use index::GetAllDocumentsOptions; pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
use meili_snap::json_string; use meili_snap::json_string;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[allow(unused)] #[allow(unused)]
@@ -42,12 +42,6 @@ impl std::ops::Deref for Value {
} }
} }
impl std::ops::DerefMut for Value {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl PartialEq<serde_json::Value> for Value { impl PartialEq<serde_json::Value> for Value {
fn eq(&self, other: &serde_json::Value) -> bool { fn eq(&self, other: &serde_json::Value) -> bool {
&self.0 == other &self.0 == other
@@ -71,7 +65,7 @@ impl Display for Value {
write!( write!(
f, f,
"{}", "{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" }) json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
) )
} }
} }

View File

@@ -183,58 +183,6 @@ async fn add_single_document_gzip_encoded() {
} }
"###); "###);
} }
#[actix_rt::test]
async fn add_single_document_gzip_encoded_with_incomplete_error() {
let document = json!("kefir");
// this is a what is expected and should work
let server = Server::new().await;
let app = server.init_web_app().await;
// post
let document = serde_json::to_string(&document).unwrap();
let req = test::TestRequest::post()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
.insert_header(("content-type", "application/json"))
.insert_header(("content-encoding", "gzip"))
.to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"400 Bad Request");
snapshot!(json_string!(response),
@r###"
{
"message": "The provided payload is incomplete and cannot be parsed",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// put
let req = test::TestRequest::put()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
.insert_header(("content-type", "application/json"))
.insert_header(("content-encoding", "gzip"))
.to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"400 Bad Request");
snapshot!(json_string!(response),
@r###"
{
"message": "The provided payload is incomplete and cannot be parsed",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
/// Here we try document request with every encoding /// Here we try document request with every encoding
#[actix_rt::test] #[actix_rt::test]
@@ -1092,52 +1040,6 @@ async fn document_addition_with_primary_key() {
"###); "###);
} }
#[actix_rt::test]
async fn document_addition_with_huge_int_primary_key() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{
"primary": 14630868576586246730u64,
"content": "foo",
}
]);
let (response, code) = index.add_documents(documents, Some("primary")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response,
@r###"
{
"uid": 0,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index.get_document(14630868576586246730u64, None).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response),
@r###"
{
"primary": 14630868576586246730,
"content": "foo"
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn replace_document() { async fn replace_document() {
let server = Server::new().await; let server = Server::new().await;

View File

@@ -719,7 +719,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!(null)).await; let (response, code) = index.get_document_by_filter(json!(null)).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value type: expected an object, but found null", "message": "Invalid value type: expected an object, but found null",
"code": "bad_request", "code": "bad_request",
@@ -730,7 +730,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`", "message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_offset", "code": "invalid_document_offset",
@@ -741,7 +741,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`", "message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_limit", "code": "invalid_document_limit",
@@ -752,7 +752,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`", "message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
"code": "invalid_document_fields", "code": "invalid_document_fields",
@@ -763,7 +763,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await; let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.", "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_document_filter", "code": "invalid_document_filter",
@@ -774,7 +774,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter", "code": "invalid_document_filter",
@@ -786,7 +786,7 @@ async fn fetch_document_by_filter() {
let (response, code) = let (response, code) =
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await; index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese", "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
"code": "invalid_document_filter", "code": "invalid_document_filter",
@@ -795,70 +795,3 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
} }
#[actix_rt::test]
async fn retrieve_vectors() {
let server = Server::new().await;
let index = server.index("doggo");
// GETALL DOCUMENTS BY QUERY
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await;
snapshot!(response, @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
}
"###);
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=true").await;
snapshot!(response, @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// FETCHALL DOCUMENTS BY POST
let (response, _code) =
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
snapshot!(response, @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",
"code": "invalid_document_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
}
"###);
let (response, _code) = index.get_document_by_filter(json!({ "retrieveVectors": true })).await;
snapshot!(response, @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// GET A SINGLEDOCUMENT
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": "tamo"}))).await;
snapshot!(response, @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
}
"###);
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
snapshot!(response, @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@@ -4,7 +4,7 @@ use meili_snap::*;
use urlencoding::encode as urlencode; use urlencoding::encode as urlencode;
use crate::common::encoder::Encoder; use crate::common::encoder::Encoder;
use crate::common::{GetAllDocumentsOptions, Server, Value}; use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server, Value};
use crate::json; use crate::json;
// TODO: partial test since we are testing error, amd error is not yet fully implemented in // TODO: partial test since we are testing error, amd error is not yet fully implemented in
@@ -59,7 +59,8 @@ async fn get_document() {
}) })
); );
let (response, code) = index.get_document(0, Some(json!({ "fields": ["id"] }))).await; let (response, code) =
index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["id"]) })).await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
response, response,
@@ -68,8 +69,9 @@ async fn get_document() {
}) })
); );
let (response, code) = let (response, code) = index
index.get_document(0, Some(json!({ "fields": ["nested.content"] }))).await; .get_document(0, Some(GetDocumentOptions { fields: Some(vec!["nested.content"]) }))
.await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
response, response,
@@ -209,7 +211,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
let (response, code) = index let (response, code) = index
.get_all_documents(GetAllDocumentsOptions { .get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["name"]), attributes_to_retrieve: Some(vec!["name"]),
..Default::default() ..Default::default()
}) })
.await; .await;
@@ -223,19 +225,9 @@ async fn test_get_all_documents_attributes_to_retrieve() {
assert_eq!(response["limit"], json!(20)); assert_eq!(response["limit"], json!(20));
assert_eq!(response["total"], json!(77)); assert_eq!(response["total"], json!(77));
let (response, code) = index.get_all_documents_raw("?fields=").await;
assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 20);
for results in response["results"].as_array().unwrap() {
assert_eq!(results.as_object().unwrap().keys().count(), 0);
}
assert_eq!(response["offset"], json!(0));
assert_eq!(response["limit"], json!(20));
assert_eq!(response["total"], json!(77));
let (response, code) = index let (response, code) = index
.get_all_documents(GetAllDocumentsOptions { .get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["wrong"]), attributes_to_retrieve: Some(vec![]),
..Default::default() ..Default::default()
}) })
.await; .await;
@@ -250,7 +242,22 @@ async fn test_get_all_documents_attributes_to_retrieve() {
let (response, code) = index let (response, code) = index
.get_all_documents(GetAllDocumentsOptions { .get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["name", "tags"]), attributes_to_retrieve: Some(vec!["wrong"]),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 20);
for results in response["results"].as_array().unwrap() {
assert_eq!(results.as_object().unwrap().keys().count(), 0);
}
assert_eq!(response["offset"], json!(0));
assert_eq!(response["limit"], json!(20));
assert_eq!(response["total"], json!(77));
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
attributes_to_retrieve: Some(vec!["name", "tags"]),
..Default::default() ..Default::default()
}) })
.await; .await;
@@ -263,7 +270,10 @@ async fn test_get_all_documents_attributes_to_retrieve() {
} }
let (response, code) = index let (response, code) = index
.get_all_documents(GetAllDocumentsOptions { fields: Some(vec!["*"]), ..Default::default() }) .get_all_documents(GetAllDocumentsOptions {
attributes_to_retrieve: Some(vec!["*"]),
..Default::default()
})
.await; .await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 20); assert_eq!(response["results"].as_array().unwrap().len(), 20);
@@ -273,7 +283,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
let (response, code) = index let (response, code) = index
.get_all_documents(GetAllDocumentsOptions { .get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["*", "wrong"]), attributes_to_retrieve: Some(vec!["*", "wrong"]),
..Default::default() ..Default::default()
}) })
.await; .await;
@@ -306,10 +316,12 @@ async fn get_document_s_nested_attributes_to_retrieve() {
assert_eq!(code, 202); assert_eq!(code, 202);
index.wait_task(1).await; index.wait_task(1).await;
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await; let (response, code) =
index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!(response, json!({})); assert_eq!(response, json!({}));
let (response, code) = index.get_document(1, Some(json!({ "fields": ["content"] }))).await; let (response, code) =
index.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
response, response,
@@ -321,7 +333,9 @@ async fn get_document_s_nested_attributes_to_retrieve() {
}) })
); );
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content.truc"] }))).await; let (response, code) = index
.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) }))
.await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
response, response,
@@ -329,7 +343,9 @@ async fn get_document_s_nested_attributes_to_retrieve() {
"content.truc": "foobar", "content.truc": "foobar",
}) })
); );
let (response, code) = index.get_document(1, Some(json!({ "fields": ["content.truc"] }))).await; let (response, code) = index
.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) }))
.await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
response, response,
@@ -524,207 +540,3 @@ async fn get_document_by_filter() {
} }
"###); "###);
} }
#[actix_rt::test]
async fn get_document_with_vectors() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": null }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
// by default you shouldn't see the `_vectors` object
let (documents, _code) = index.get_all_documents(Default::default()).await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir"
},
{
"id": 1,
"name": "echo"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) = index.get_document(0, None).await;
snapshot!(json_string!(documents), @r###"
{
"id": 0,
"name": "kefir"
}
"###);
// if we try to retrieve the vectors with the `fields` parameter they
// still shouldn't be displayed
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["name", "_vectors"]),
..Default::default()
})
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"name": "kefir"
},
{
"name": "echo"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) =
index.get_document(0, Some(json!({"fields": ["name", "_vectors"]}))).await;
snapshot!(json_string!(documents), @r###"
{
"name": "kefir"
}
"###);
// If we specify the retrieve vectors boolean and nothing else we should get the vectors
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
snapshot!(json_string!(documents), @r###"
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
}
"###);
// If we specify the retrieve vectors boolean and exclude vectors form the `fields` we should still get the vectors
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
retrieve_vectors: true,
fields: Some(vec!["name"]),
..Default::default()
})
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) =
index.get_document(0, Some(json!({"retrieveVectors": true, "fields": ["name"]}))).await;
snapshot!(json_string!(documents), @r###"
{
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
}
"###);
}

View File

@@ -1938,210 +1938,3 @@ async fn import_dump_v6_containing_experimental_features() {
}) })
.await; .await;
} }
// In this test we must generate the dump ourselves to ensure the
// `user provided` vectors are well set
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn generate_and_import_dump_containing_vectors() {
let temp = tempfile::tempdir().unwrap();
let mut opt = default_settings(temp.path());
let server = Server::new_with_options(opt.clone()).await.unwrap();
let (code, _) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let index = server.index("pets");
let (response, code) = index
.update_settings(json!(
{
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}}",
}
}
}
))
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = index
.add_documents(
json!([
{"id": 0, "doggo": "kefir", "_vectors": { "doggo_embedder": vec![0; 384] }},
{"id": 1, "doggo": "echo", "_vectors": { "doggo_embedder": { "regenerate": false, "embeddings": vec![1; 384] }}},
{"id": 2, "doggo": "intel", "_vectors": { "doggo_embedder": { "regenerate": true, "embeddings": vec![2; 384] }}},
{"id": 3, "doggo": "bill", "_vectors": { "doggo_embedder": { "regenerate": true }}},
{"id": 4, "doggo": "max" },
]),
None,
)
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = server.create_dump().await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// ========= We made a dump, now we should clear the DB and try to import our dump
drop(server);
tokio::fs::remove_dir_all(&opt.db_path).await.unwrap();
let dump_name = format!("{}.dump", response["details"]["dumpUid"].as_str().unwrap());
let dump_path = opt.dump_dir.join(dump_name);
assert!(dump_path.exists(), "path: `{}`", dump_path.display());
opt.import_dump = Some(dump_path);
// NOTE: We shouldn't have to change the database path but I lost one hour
// because of a « bad path » error and that fixed it.
opt.db_path = temp.path().join("data.ms");
let mut server = Server::new_auth_with_options(opt, temp).await;
server.use_api_key("MASTER_KEY");
let (indexes, code) = server.list_indexes(None, None).await;
assert_eq!(code, 200, "{indexes}");
snapshot!(indexes["results"].as_array().unwrap().len(), @"1");
snapshot!(indexes["results"][0]["uid"], @r###""pets""###);
snapshot!(indexes["results"][0]["primaryKey"], @r###""id""###);
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let index = server.index("pets");
let (response, code) = index.settings().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"nonSeparatorTokens": [],
"separatorTokens": [],
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100,
"sortFacetValuesBy": {
"*": "alpha"
}
},
"pagination": {
"maxTotalHits": 1000
},
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}}"
}
},
"searchCutoffMs": null
}
"###);
index
.search(json!({"retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"], { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###"
[
{
"id": 0,
"doggo": "kefir",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": false
}
}
},
{
"id": 1,
"doggo": "echo",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": false
}
}
},
{
"id": 2,
"doggo": "intel",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 3,
"doggo": "bill",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 4,
"doggo": "max",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
}
]
"###);
})
.await;
}

View File

@@ -1,25 +0,0 @@
---
source: meilisearch/tests/dumps/mod.rs
---
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}}"
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@@ -1,19 +0,0 @@
---
source: meilisearch/tests/dumps/mod.rs
---
{
"uid": 1,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 5,
"indexedDocuments": 5
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@@ -13,7 +13,6 @@ mod snapshot;
mod stats; mod stats;
mod swap_indexes; mod swap_indexes;
mod tasks; mod tasks;
mod vector;
// Tests are isolated by features in different modules to allow better readability, test // Tests are isolated by features in different modules to allow better readability, test
// targetability, and improved incremental compilation times. // targetability, and improved incremental compilation times.

View File

@@ -107,39 +107,6 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 1,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Brown", "pattern": "stripped" },
},
{
"id": 2,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Black", "pattern": "stripped" },
},
{
"id": 3,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Blue", "pattern": "used" },
},
{
"id": 4,
"description": "T-Shirt",
"brand": "Nike",
"product_id": "789012",
"color": { "main": "Blue", "pattern": "stripped" },
}
])
});
static DOCUMENT_PRIMARY_KEY: &str = "id"; static DOCUMENT_PRIMARY_KEY: &str = "id";
static DOCUMENT_DISTINCT_KEY: &str = "product_id"; static DOCUMENT_DISTINCT_KEY: &str = "product_id";
@@ -272,35 +239,3 @@ async fn distinct_search_with_pagination_no_ranking() {
snapshot!(response["totalPages"], @"2"); snapshot!(response["totalPages"], @"2");
snapshot!(response["totalHits"], @"6"); snapshot!(response["totalHits"], @"6");
} }
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new().await;
let index = server.index("tamo");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await;
let task = index.wait_task(task.uid()).await;
snapshot!(task, name: "succeed");
fn get_hits(response: &Value) -> Vec<String> {
let hits_array = response["hits"]
.as_array()
.unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap()));
hits_array
.iter()
.map(|h| h[DOCUMENT_PRIMARY_KEY].as_number().unwrap().to_string())
.collect::<Vec<_>>()
}
let (response, code) =
index.search_post(json!({"page": 1, "hitsPerPage": 3, "distinct": "color.main"})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"3");
snapshot!(format!("{:?}", hits), @r###"["1", "2", "3"]"###);
snapshot!(response["page"], @"1");
snapshot!(response["totalPages"], @"1");
snapshot!(response["totalHits"], @"3");
}

View File

@@ -71,7 +71,7 @@ async fn search_bad_offset() {
} }
"###); "###);
let (response, code) = index.search_get("?offset=doggo").await; let (response, code) = index.search_get("offset=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -99,7 +99,7 @@ async fn search_bad_limit() {
} }
"###); "###);
let (response, code) = index.search_get("?limit=doggo").await; let (response, code) = index.search_get("limit=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -127,7 +127,7 @@ async fn search_bad_page() {
} }
"###); "###);
let (response, code) = index.search_get("?page=doggo").await; let (response, code) = index.search_get("page=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -155,7 +155,7 @@ async fn search_bad_hits_per_page() {
} }
"###); "###);
let (response, code) = index.search_get("?hitsPerPage=doggo").await; let (response, code) = index.search_get("hitsPerPage=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -167,74 +167,6 @@ async fn search_bad_hits_per_page() {
"###); "###);
} }
#[actix_rt::test]
async fn search_bad_attributes_to_retrieve() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"attributesToRetrieve": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.attributesToRetrieve`: expected an array, but found a string: `\"doggo\"`",
"code": "invalid_search_attributes_to_retrieve",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_retrieve"
}
"###);
// Can't make the `attributes_to_retrieve` fail with a get search since it'll accept anything as an array of strings.
}
#[actix_rt::test]
async fn search_bad_retrieve_vectors() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"retrieveVectors": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
let (response, code) = index.search_post(json!({"retrieveVectors": [true]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
let (response, code) = index.search_get("?retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
let (response, code) = index.search_get("?retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn search_bad_attributes_to_crop() { async fn search_bad_attributes_to_crop() {
let server = Server::new().await; let server = Server::new().await;
@@ -269,7 +201,7 @@ async fn search_bad_crop_length() {
} }
"###); "###);
let (response, code) = index.search_get("?cropLength=doggo").await; let (response, code) = index.search_get("cropLength=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -359,7 +291,7 @@ async fn search_bad_show_matches_position() {
} }
"###); "###);
let (response, code) = index.search_get("?showMatchesPosition=doggo").await; let (response, code) = index.search_get("showMatchesPosition=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -389,40 +321,6 @@ async fn search_bad_facets() {
// Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings. // Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings.
} }
#[actix_rt::test]
async fn search_bad_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`",
"code": "invalid_search_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn search_invalid_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_search_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn search_non_filterable_facets() { async fn search_non_filterable_facets() {
let server = Server::new().await; let server = Server::new().await;
@@ -442,7 +340,7 @@ async fn search_non_filterable_facets() {
} }
"###); "###);
let (response, code) = index.search_get("?facets=doggo").await; let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -472,7 +370,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
} }
"###); "###);
let (response, code) = index.search_get("?facets=doggo").await; let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -502,7 +400,7 @@ async fn search_non_filterable_facets_no_filterable() {
} }
"###); "###);
let (response, code) = index.search_get("?facets=doggo").await; let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -532,7 +430,7 @@ async fn search_non_filterable_facets_multiple_facets() {
} }
"###); "###);
let (response, code) = index.search_get("?facets=doggo,neko").await; let (response, code) = index.search_get("facets=doggo,neko").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -607,7 +505,7 @@ async fn search_bad_matching_strategy() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`, `frequency`", "message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`",
"code": "invalid_search_matching_strategy", "code": "invalid_search_matching_strategy",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy" "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
@@ -625,11 +523,11 @@ async fn search_bad_matching_strategy() {
} }
"###); "###);
let (response, code) = index.search_get("?matchingStrategy=doggo").await; let (response, code) = index.search_get("matchingStrategy=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`, `frequency`", "message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`",
"code": "invalid_search_matching_strategy", "code": "invalid_search_matching_strategy",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy" "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
@@ -1140,66 +1038,3 @@ async fn search_on_unknown_field_plus_joker() {
) )
.await; .await;
} }
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new().await;
let index = server.index("tamo");
let (task, _) = index.create(None).await;
let task = index.wait_task(task.uid()).await;
snapshot!(task, name: "task-succeed");
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
index.wait_task(task.uid()).await;
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
index.wait_task(task.uid()).await;
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.distinct`: expected a string, but found a boolean: `true`",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
}

View File

@@ -117,70 +117,3 @@ async fn geo_bounding_box_with_string_and_number() {
) )
.await; .await;
} }
#[actix_rt::test]
async fn bug_4640() {
// https://github.com/meilisearch/meilisearch/issues/4640
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.update_settings_filterable_attributes(json!(["_geo"])).await;
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
index.wait_task(ret.uid()).await;
// Sort the document with the second one first
index
.search(
json!({
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
},
"_geoDistance": 0
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
},
"_geoDistance": 9714063
},
{
"id": 3,
"name": "Crêpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
],
"query": "",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"###);
},
)
.await;
}

View File

@@ -124,61 +124,32 @@ async fn simple_search() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}}}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
snapshot!(response["semanticHitCount"], @"0"); snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"2"); snapshot!(response["semanticHitCount"], @"2");
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"3"); snapshot!(response["semanticHitCount"], @"3");
} }
#[actix_rt::test]
async fn limit_offset() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"0");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"1");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
}
#[actix_rt::test] #[actix_rt::test]
async fn simple_search_hf() { async fn simple_search_hf() {
let server = Server::new().await; let server = Server::new().await;
@@ -233,10 +204,10 @@ async fn distribution_shift() {
let server = Server::new().await; let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
let (response, code) = index.search_post(search.clone()).await; let (response, code) = index.search_post(search.clone()).await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
let (response, code) = index let (response, code) = index
.update_settings(json!({ .update_settings(json!({
@@ -257,7 +228,7 @@ async fn distribution_shift() {
let (response, code) = index.search_post(search).await; let (response, code) = index.search_post(search).await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.1920928955078125e-7}]"###);
} }
#[actix_rt::test] #[actix_rt::test]
@@ -268,23 +239,20 @@ async fn highlighter() {
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.2}, "hybrid": {"semanticRatio": 0.2},
"retrieveVectors": true, "attributesToHighlight": [
"attributesToHighlight": [ "desc"
"desc",
"_vectors",
], ],
"highlightPreTag": "**BEGIN**", "highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**", "highlightPostTag": "**END**"
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
snapshot!(response["semanticHitCount"], @"0"); snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.8}, "hybrid": {"semanticRatio": 0.8},
"retrieveVectors": true,
"showRankingScore": true, "showRankingScore": true,
"attributesToHighlight": [ "attributesToHighlight": [
"desc" "desc"
@@ -294,14 +262,13 @@ async fn highlighter() {
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"3"); snapshot!(response["semanticHitCount"], @"3");
// no highlighting on full semantic // no highlighting on full semantic
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 1.0}, "hybrid": {"semanticRatio": 1.0},
"retrieveVectors": true,
"showRankingScore": true, "showRankingScore": true,
"attributesToHighlight": [ "attributesToHighlight": [
"desc" "desc"
@@ -311,7 +278,7 @@ async fn highlighter() {
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"3"); snapshot!(response["semanticHitCount"], @"3");
} }
@@ -394,12 +361,12 @@ async fn single_document() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0}"###); snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0}"###);
snapshot!(response["semanticHitCount"], @"1"); snapshot!(response["semanticHitCount"], @"1");
} }
@@ -410,25 +377,25 @@ async fn query_combination() {
// search without query and vector, but with hybrid => still placeholder // search without query and vector, but with hybrid => still placeholder
let (response, code) = index let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###); snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
snapshot!(response["semanticHitCount"], @"null"); snapshot!(response["semanticHitCount"], @"null");
// same with a different semantic ratio // same with a different semantic ratio
let (response, code) = index let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###); snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
snapshot!(response["semanticHitCount"], @"null"); snapshot!(response["semanticHitCount"], @"null");
// wrong vector dimensions // wrong vector dimensions
let (response, code) = index let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
.await; .await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
@@ -443,34 +410,34 @@ async fn query_combination() {
// full vector // full vector
let (response, code) = index let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.6581138968467712}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.6581138968467712}]"###);
snapshot!(response["semanticHitCount"], @"3"); snapshot!(response["semanticHitCount"], @"3");
// full keyword, without a query // full keyword, without a query
let (response, code) = index let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###); snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
snapshot!(response["semanticHitCount"], @"null"); snapshot!(response["semanticHitCount"], @"null");
// query + vector, full keyword => keyword // query + vector, full keyword => keyword
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###);
snapshot!(response["semanticHitCount"], @"null"); snapshot!(response["semanticHitCount"], @"null");
// query + vector, no hybrid keyword => // query + vector, no hybrid keyword =>
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true}))
.await; .await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
@@ -486,7 +453,7 @@ async fn query_combination() {
// full vector, without a vector => error // full vector, without a vector => error
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
) )
.await; .await;
@@ -503,93 +470,11 @@ async fn query_combination() {
// hybrid without a vector => full keyword // hybrid without a vector => full keyword
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###);
snapshot!(response["semanticHitCount"], @"0"); snapshot!(response["semanticHitCount"], @"0");
} }
#[actix_rt::test]
async fn retrieve_vectors() {
let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
}
]
"###);
// remove `_vectors` from displayed attributes
let (response, code) =
index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2"
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3"
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1"
}
]
"###);
}

View File

@@ -1,128 +0,0 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
index.add_documents(documents.clone(), None).await;
index.wait_task(0).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "1",
},
{
"title": "Captain Planet",
"id": "2",
},
{
"title": "Captain Marvel",
"id": "3",
},
{
"title": "a Captain Marvel ersatz",
"id": "4"
},
{
"title": "He's not part of the Marvel Cinematic Universe",
"id": "5"
},
{
"title": "a Shazam ersatz, but better than Captain Planet",
"id": "6"
},
{
"title": "Capitain CAAAAAVEEERNE!!!!",
"id": "7"
}
])
});
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"2"},{"id":"6"},{"id":"7"}]"###);
})
.await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
})
.await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}
#[actix_rt::test]
async fn search_with_typo() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"7"},{"id":"2"},{"id":"6"}]"###);
})
.await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
})
.await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}
#[actix_rt::test]
async fn search_with_unknown_word() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"2"},{"id":"3"},{"id":"4"},{"id":"6"},{"id":"7"}]"###);
})
.await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @"[]");
})
.await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}

View File

@@ -7,7 +7,6 @@ mod facet_search;
mod formatted; mod formatted;
mod geo; mod geo;
mod hybrid; mod hybrid;
mod matching_strategy;
mod multi; mod multi;
mod pagination; mod pagination;
mod restrict_searchable; mod restrict_searchable;
@@ -48,31 +47,6 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
},
{
"title": "Batman Returns",
"id": "C",
},
{
"title": "Batman",
"id": "D",
},
{
"title": "Badman",
"id": "E",
}
])
});
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
@@ -301,7 +275,7 @@ async fn negative_special_cases_search() {
index.add_documents(documents, None).await; index.add_documents(documents, None).await;
index.wait_task(0).await; index.wait_task(0).await;
index.update_settings(json!({"synonyms": { "escape": ["gläss"] }})).await; index.update_settings(json!({"synonyms": { "escape": ["glass"] }})).await;
index.wait_task(1).await; index.wait_task(1).await;
// There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass // There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass
@@ -985,213 +959,6 @@ async fn test_score_details() {
.await; .await;
} }
#[actix_rt::test]
async fn test_score() {
let server = Server::new().await;
let index = server.index("test");
let documents = SCORE_DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index
.search(
json!({
"q": "Badman the dark knight returns 1",
"showRankingScore": true,
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.9746605609456898
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.8055252965383685
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.16666666666666666
},
{
"title": "Batman Returns",
"id": "C",
"_rankingScore": 0.07702020202020202
},
{
"title": "Batman",
"id": "D",
"_rankingScore": 0.07702020202020202
}
]
"###);
},
)
.await;
}
#[actix_rt::test]
async fn test_score_threshold() {
let query = "Badman dark returns 1";
let server = Server::new().await;
let index = server.index("test");
let documents = SCORE_DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.0
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.25
},
{
"title": "Batman Returns",
"id": "C",
"_rankingScore": 0.11553030303030302
},
{
"title": "Batman",
"id": "D",
"_rankingScore": 0.11553030303030302
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.2
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.25
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.5
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.8
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 1.0
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###);
// nobody is perfect
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
},
)
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn test_degraded_score_details() { async fn test_degraded_score_details() {
let server = Server::new().await; let server = Server::new().await;
@@ -1290,38 +1057,21 @@ async fn experimental_feature_vector_store() {
index.add_documents(json!(documents), None).await; index.add_documents(json!(documents), None).await;
index.wait_task(0).await; index.wait_task(0).await;
index let (response, code) = index
.search(json!({ .search_post(json!({
"vector": [1.0, 2.0, 3.0], "vector": [1.0, 2.0, 3.0],
"showRankingScore": true "showRankingScore": true
}), |response, code|{ }))
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
index
.search(json!({
"retrieveVectors": true,
"showRankingScore": true
}), |response, code|{
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await; .await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `vector` as a query parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.set_features(json!({"vectorStore": true})).await; let (response, code) = server.set_features(json!({"vectorStore": true})).await;
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
@@ -1354,7 +1104,6 @@ async fn experimental_feature_vector_store() {
.search_post(json!({ .search_post(json!({
"vector": [1.0, 2.0, 3.0], "vector": [1.0, 2.0, 3.0],
"showRankingScore": true, "showRankingScore": true,
"retrieveVectors": true,
})) }))
.await; .await;
@@ -1366,16 +1115,11 @@ async fn experimental_feature_vector_store() {
"title": "Shazam!", "title": "Shazam!",
"id": "287947", "id": "287947",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 1.0,
[ 2.0,
1.0, 3.0
2.0, ]
3.0
]
],
"regenerate": false
}
}, },
"_rankingScore": 1.0 "_rankingScore": 1.0
}, },
@@ -1383,16 +1127,11 @@ async fn experimental_feature_vector_store() {
"title": "Captain Marvel", "title": "Captain Marvel",
"id": "299537", "id": "299537",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 1.0,
[ 2.0,
1.0, 54.0
2.0, ]
54.0
]
],
"regenerate": false
}
}, },
"_rankingScore": 0.9129111766815186 "_rankingScore": 0.9129111766815186
}, },
@@ -1400,16 +1139,11 @@ async fn experimental_feature_vector_store() {
"title": "Gläss", "title": "Gläss",
"id": "450465", "id": "450465",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ -100.0,
[ 340.0,
-100.0, 90.0
340.0, ]
90.0
]
],
"regenerate": false
}
}, },
"_rankingScore": 0.8106412887573242 "_rankingScore": 0.8106412887573242
}, },
@@ -1417,16 +1151,11 @@ async fn experimental_feature_vector_store() {
"title": "How to Train Your Dragon: The Hidden World", "title": "How to Train Your Dragon: The Hidden World",
"id": "166428", "id": "166428",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ -100.0,
[ 231.0,
-100.0, 32.0
231.0, ]
32.0
]
],
"regenerate": false
}
}, },
"_rankingScore": 0.7412010431289673 "_rankingScore": 0.7412010431289673
}, },
@@ -1434,16 +1163,11 @@ async fn experimental_feature_vector_store() {
"title": "Escape Room", "title": "Escape Room",
"id": "522681", "id": "522681",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 10.0,
[ -23.0,
10.0, 32.0
-23.0, ]
32.0
]
],
"regenerate": false
}
}, },
"_rankingScore": 0.6972063183784485 "_rankingScore": 0.6972063183784485
} }

View File

@@ -1,20 +0,0 @@
---
source: meilisearch/tests/search/distinct.rs
---
{
"uid": 1,
"indexUid": "tamo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"filterableAttributes": [
"color.main"
]
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@@ -1,18 +0,0 @@
---
source: meilisearch/tests/search/errors.rs
---
{
"uid": 0,
"indexUid": "tamo",
"status": "succeeded",
"type": "indexCreation",
"canceledBy": null,
"details": {
"primaryKey": null
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@@ -87,68 +87,6 @@ async fn similar_bad_id() {
"###); "###);
} }
#[actix_rt::test]
async fn similar_bad_ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`",
"code": "invalid_similar_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn similar_invalid_ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_similar_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn similar_invalid_id() { async fn similar_invalid_id() {
let server = Server::new().await; let server = Server::new().await;
@@ -241,7 +179,7 @@ async fn similar_bad_offset() {
} }
"###); "###);
let (response, code) = index.similar_get("?id=287947&offset=doggo").await; let (response, code) = index.similar_get("id=287947&offset=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -283,7 +221,7 @@ async fn similar_bad_limit() {
} }
"###); "###);
let (response, code) = index.similar_get("?id=287946&limit=doggo").await; let (response, code) = index.similar_get("id=287946&limit=doggo").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@@ -756,54 +694,3 @@ async fn filter_reserved_geo_point_string() {
}) })
.await; .await;
} }
#[actix_rt::test]
async fn similar_bad_retrieve_vectors() {
let server = Server::new().await;
server.set_features(json!({"vectorStore": true})).await;
let index = server.index("test");
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
let (response, code) = index.similar_get("?retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
let (response, code) = index.similar_get("?retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
}

View File

@@ -78,7 +78,7 @@ async fn basic() {
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
index index
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { .similar(json!({"id": 143}), |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
[ [
@@ -87,16 +87,11 @@ async fn basic() {
"release_year": 2019, "release_year": 2019,
"id": "522681", "id": "522681",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.1,
[ 0.6,
0.10000000149011612, 0.8
0.6000000238418579, ]
0.800000011920929
]
],
"regenerate": false
}
} }
}, },
{ {
@@ -104,16 +99,11 @@ async fn basic() {
"release_year": 2019, "release_year": 2019,
"id": "299537", "id": "299537",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.6,
[ 0.8,
0.6000000238418579, -0.2
0.800000011920929, ]
-0.20000000298023224
]
],
"regenerate": false
}
} }
}, },
{ {
@@ -121,16 +111,11 @@ async fn basic() {
"release_year": 2019, "release_year": 2019,
"id": "166428", "id": "166428",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.7,
[ 0.7,
0.699999988079071, -0.4
0.699999988079071, ]
-0.4000000059604645
]
],
"regenerate": false
}
} }
}, },
{ {
@@ -138,16 +123,11 @@ async fn basic() {
"release_year": 2019, "release_year": 2019,
"id": "287947", "id": "287947",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.8,
[ 0.4,
0.800000011920929, -0.5
0.4000000059604645, ]
-0.5
]
],
"regenerate": false
}
} }
} }
] ]
@@ -156,7 +136,7 @@ async fn basic() {
.await; .await;
index index
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { .similar(json!({"id": "299537"}), |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
[ [
@@ -165,16 +145,11 @@ async fn basic() {
"release_year": 2019, "release_year": 2019,
"id": "166428", "id": "166428",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.7,
[ 0.7,
0.699999988079071, -0.4
0.699999988079071, ]
-0.4000000059604645
]
],
"regenerate": false
}
} }
}, },
{ {
@@ -182,16 +157,11 @@ async fn basic() {
"release_year": 2019, "release_year": 2019,
"id": "287947", "id": "287947",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.8,
[ 0.4,
0.800000011920929, -0.5
0.4000000059604645, ]
-0.5
]
],
"regenerate": false
}
} }
}, },
{ {
@@ -199,16 +169,11 @@ async fn basic() {
"release_year": 2019, "release_year": 2019,
"id": "522681", "id": "522681",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.1,
[ 0.6,
0.10000000149011612, 0.8
0.6000000238418579, ]
0.800000011920929
]
],
"regenerate": false
}
} }
}, },
{ {
@@ -216,16 +181,11 @@ async fn basic() {
"release_year": 1930, "release_year": 1930,
"id": "143", "id": "143",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ -0.5,
[ 0.3,
-0.5, 0.85
0.30000001192092896, ]
0.8500000238418579
]
],
"regenerate": false
}
} }
} }
] ]
@@ -234,285 +194,6 @@ async fn basic() {
.await; .await;
} }
#[actix_rt::test]
async fn ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
},
"_rankingScore": 0.39060014486312866
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
0.699999988079071,
0.699999988079071,
-0.4000000059604645
]
],
"regenerate": false
}
},
"_rankingScore": 0.2819308042526245
},
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
"_vectors": {
"manual": {
"embeddings": [
[
0.800000011920929,
0.4000000059604645,
-0.5
]
],
"regenerate": false
}
},
"_rankingScore": 0.1662663221359253
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
},
"_rankingScore": 0.39060014486312866
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
0.699999988079071,
0.699999988079071,
-0.4000000059604645
]
],
"regenerate": false
}
},
"_rankingScore": 0.2819308042526245
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
},
"_rankingScore": 0.39060014486312866
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @"[]");
},
)
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn filter() { async fn filter() {
let server = Server::new().await; let server = Server::new().await;
@@ -546,97 +227,71 @@ async fn filter() {
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
index index
.similar( .similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| {
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), snapshot!(code, @"200 OK");
|response, code| { snapshot!(json_string!(response["hits"]), @r###"
snapshot!(code, @"200 OK"); [
snapshot!(json_string!(response["hits"]), @r###" {
[ "title": "Captain Marvel",
{ "release_year": 2019,
"title": "Captain Marvel", "id": "299537",
"release_year": 2019, "_vectors": {
"id": "299537", "manual": [
"_vectors": { 0.6,
"manual": { 0.8,
"embeddings": [ -0.2
[ ]
0.6000000238418579, }
0.800000011920929, },
-0.20000000298023224 {
] "title": "How to Train Your Dragon: The Hidden World",
], "release_year": 2019,
"regenerate": false "id": "166428",
} "_vectors": {
} "manual": [
}, 0.7,
{ 0.7,
"title": "How to Train Your Dragon: The Hidden World", -0.4
"release_year": 2019, ]
"id": "166428", }
"_vectors": { },
"manual": { {
"embeddings": [ "title": "Shazam!",
[ "release_year": 2019,
0.699999988079071, "id": "287947",
0.699999988079071, "_vectors": {
-0.4000000059604645 "manual": [
] 0.8,
], 0.4,
"regenerate": false -0.5
} ]
} }
}, }
{ ]
"title": "Shazam!", "###);
"release_year": 2019, })
"id": "287947",
"_vectors": {
"manual": {
"embeddings": [
[
0.800000011920929,
0.4000000059604645,
-0.5
]
],
"regenerate": false
}
}
}
]
"###);
},
)
.await; .await;
index index
.similar( .similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| {
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), snapshot!(code, @"200 OK");
|response, code| { snapshot!(json_string!(response["hits"]), @r###"
snapshot!(code, @"200 OK"); [
snapshot!(json_string!(response["hits"]), @r###" {
[ "title": "All Quiet on the Western Front",
{ "release_year": 1930,
"title": "All Quiet on the Western Front", "id": "143",
"release_year": 1930, "_vectors": {
"id": "143", "manual": [
"_vectors": { -0.5,
"manual": { 0.3,
"embeddings": [ 0.85
[ ]
-0.5, }
0.30000001192092896, }
0.8500000238418579 ]
] "###);
], })
"regenerate": false
}
}
}
]
"###);
},
)
.await; .await;
} }
@@ -673,7 +328,7 @@ async fn limit_and_offset() {
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
index index
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| { .similar(json!({"id": 143, "limit": 1}), |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
[ [
@@ -682,16 +337,11 @@ async fn limit_and_offset() {
"release_year": 2019, "release_year": 2019,
"id": "522681", "id": "522681",
"_vectors": { "_vectors": {
"manual": { "manual": [
"embeddings": [ 0.1,
[ 0.6,
0.10000000149011612, 0.8
0.6000000238418579, ]
0.800000011920929
]
],
"regenerate": false
}
} }
} }
] ]
@@ -700,32 +350,24 @@ async fn limit_and_offset() {
.await; .await;
index index
.similar( .similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| {
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), snapshot!(code, @"200 OK");
|response, code| { snapshot!(json_string!(response["hits"]), @r###"
snapshot!(code, @"200 OK"); [
snapshot!(json_string!(response["hits"]), @r###" {
[ "title": "Captain Marvel",
{ "release_year": 2019,
"title": "Captain Marvel", "id": "299537",
"release_year": 2019, "_vectors": {
"id": "299537", "manual": [
"_vectors": { 0.6,
"manual": { 0.8,
"embeddings": [ -0.2
[ ]
0.6000000238418579, }
0.800000011920929, }
-0.20000000298023224 ]
] "###);
], })
"regenerate": false
}
}
}
]
"###);
},
)
.await; .await;
} }

View File

@@ -31,7 +31,6 @@ macro_rules! verify_snapshot {
} }
#[actix_rt::test] #[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn perform_snapshot() { async fn perform_snapshot() {
let temp = tempfile::tempdir().unwrap(); let temp = tempfile::tempdir().unwrap();
let snapshot_dir = tempfile::tempdir().unwrap(); let snapshot_dir = tempfile::tempdir().unwrap();

View File

@@ -2,7 +2,6 @@ mod errors;
mod webhook; mod webhook;
use meili_snap::insta::assert_json_snapshot; use meili_snap::insta::assert_json_snapshot;
use meili_snap::snapshot;
use time::format_description::well_known::Rfc3339; use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime; use time::OffsetDateTime;
@@ -739,9 +738,11 @@ async fn test_summarized_index_creation() {
async fn test_summarized_index_deletion() { async fn test_summarized_index_deletion() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
let (ret, _code) = index.delete().await; index.delete().await;
let task = index.wait_task(ret.uid()).await; index.wait_task(0).await;
snapshot!(task, let (task, _) = index.get_task(0).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###" @r###"
{ {
"uid": 0, "uid": 0,
@@ -766,34 +767,12 @@ async fn test_summarized_index_deletion() {
"###); "###);
// is the details correctly set when documents are actually deleted. // is the details correctly set when documents are actually deleted.
// /!\ We need to wait for the document addition to be processed otherwise, if the test runs too slow, index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
// both tasks may get autobatched and the deleted documents count will be wrong. index.delete().await;
let (ret, _code) = index.wait_task(2).await;
index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; let (task, _) = index.get_task(2).await;
let task = index.wait_task(ret.uid()).await; assert_json_snapshot!(task,
snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (ret, _code) = index.delete().await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
@r###" @r###"
{ {
"uid": 2, "uid": 2,
@@ -813,25 +792,22 @@ async fn test_summarized_index_deletion() {
"###); "###);
// What happens when you delete an index that doesn't exists. // What happens when you delete an index that doesn't exists.
let (ret, _code) = index.delete().await; index.delete().await;
let task = index.wait_task(ret.uid()).await; index.wait_task(2).await;
snapshot!(task, let (task, _) = index.get_task(2).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###" @r###"
{ {
"uid": 3, "uid": 2,
"indexUid": "test", "indexUid": "test",
"status": "failed", "status": "succeeded",
"type": "indexDeletion", "type": "indexDeletion",
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"deletedDocuments": 0 "deletedDocuments": 1
},
"error": {
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}, },
"error": null,
"duration": "[duration]", "duration": "[duration]",
"enqueuedAt": "[date]", "enqueuedAt": "[date]",
"startedAt": "[date]", "startedAt": "[date]",

View File

@@ -1,588 +0,0 @@
mod settings;
use meili_snap::{json_string, snapshot};
use crate::common::index::Index;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
#[actix_rt::test]
async fn add_remove_user_provided() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
1.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [10, 10, 10] }},
{"id": 1, "name": "echo", "_vectors": { "manual": null }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
10.0,
10.0,
10.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (value, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 1,
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
}
async fn generate_default_user_provided_documents(server: &Server) -> Index {
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }},
{"id": 2, "name": "billou", "_vectors": { "manual": [[2, 2, 2], [2, 2, 3]] }},
{"id": 3, "name": "intel", "_vectors": { "manual": { "regenerate": false, "embeddings": [3, 3, 3] }}},
{"id": 4, "name": "max", "_vectors": { "manual": { "regenerate": false, "embeddings": [[4, 4, 4], [4, 4, 5]] }}},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
}
#[actix_rt::test]
async fn user_provided_embeddings_error() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
// First case, we forget to specify the `regenerate`
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Second case, we don't specify anything
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Third case, we specify something wrong in place of regenerate
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 4,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 5,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 6,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 7,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 10,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 11,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 12,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn clear_documents() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
let (value, _code) = index.clear_all_documents().await;
index.wait_task(value.uid()).await;
// Make sure the documents DB has been cleared
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
snapshot!(documents, @r###"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0,
"semanticHitCount": 0
}
"###);
}
#[actix_rt::test]
async fn add_remove_one_vector_4588() {
// https://github.com/meilisearch/meilisearch/issues/4588
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, name: "settings-processed");
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-added");
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": null }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-deleted");
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
snapshot!(documents, @r###"
{
"hits": [
{
"id": 0,
"name": "kefir"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"semanticHitCount": 1
}
"###);
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
}

View File

@@ -1,228 +0,0 @@
use meili_snap::{json_string, snapshot};
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
use crate::vector::generate_default_user_provided_documents;
#[actix_rt::test]
async fn update_embedder() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": { "manual": {}},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 2,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
let ret = server.wait_task(response.uid()).await;
snapshot!(ret, @r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 2
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn reset_embedder_documents() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
let (response, code) = index.delete_settings().await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
// Make sure the documents are still present
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: None,
offset: None,
retrieve_vectors: false,
fields: None,
})
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir"
},
{
"id": 1,
"name": "echo"
},
{
"id": 2,
"name": "billou"
},
{
"id": 3,
"name": "intel"
},
{
"id": 4,
"name": "max"
}
],
"offset": 0,
"limit": 20,
"total": 5
}
"###);
// Make sure we are still able to retrieve their vectors
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
1.0
]
],
"regenerate": false
}
}
},
{
"id": 2,
"name": "billou",
"_vectors": {
"manual": {
"embeddings": [
[
2.0,
2.0,
2.0
],
[
2.0,
2.0,
3.0
]
],
"regenerate": false
}
}
},
{
"id": 3,
"name": "intel",
"_vectors": {
"manual": {
"embeddings": [
[
3.0,
3.0,
3.0
]
],
"regenerate": false
}
}
},
{
"id": 4,
"name": "max",
"_vectors": {
"manual": {
"embeddings": [
[
4.0,
4.0,
4.0
],
[
4.0,
4.0,
5.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 5
}
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
snapshot!(json_string!(documents), @r###"
{
"message": "Cannot find embedder with name `default`.",
"code": "invalid_embedder",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_embedder"
}
"###);
}

View File

@@ -1,19 +0,0 @@
---
source: meilisearch/tests/vector/mod.rs
---
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@@ -1,19 +0,0 @@
---
source: meilisearch/tests/vector/mod.rs
---
{
"uid": 2,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@@ -1,23 +0,0 @@
---
source: meilisearch/tests/vector/mod.rs
---
{
"uid": 0,
"indexUid": "doggo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "milli" name = "milli"
edition = "2021" edition = "2018"
publish = false publish = false
version.workspace = true version.workspace = true
@@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.9.0" bstr = "1.9.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] } bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.8.11", default-features = false } charabia = { version = "0.8.10", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.11" crossbeam-channel = "0.5.11"
deserr = "0.6.1" deserr = "0.6.1"
@@ -44,7 +44,7 @@ once_cell = "1.19.0"
ordered-float = "4.2.0" ordered-float = "4.2.0"
rand_pcg = { version = "0.3.1", features = ["serde1"] } rand_pcg = { version = "0.3.1", features = ["serde1"] }
rayon = "1.8.0" rayon = "1.8.0"
roaring = { version = "0.10.2", features = ["serde"] } roaring = "0.10.2"
rstar = { version = "0.11.0", features = ["serde"] } rstar = { version = "0.11.0", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.111", features = ["preserve_order"] }
@@ -71,15 +71,15 @@ csv = "1.3.0"
candle-core = { version = "0.4.1" } candle-core = { version = "0.4.1" }
candle-transformers = { version = "0.4.1" } candle-transformers = { version = "0.4.1" }
candle-nn = { version = "0.4.1" } candle-nn = { version = "0.4.1" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default_features = false, features = [
"onig", "onig",
] } ] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
"online", "online",
] } ] }
tiktoken-rs = "0.5.8" tiktoken-rs = "0.5.8"
liquid = "0.26.4" liquid = "0.26.4"
arroy = "0.4.0" arroy = "0.3.1"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.40" tracing = "0.1.40"
ureq = { version = "2.9.7", features = ["json"] } ureq = { version = "2.9.7", features = ["json"] }

View File

@@ -59,7 +59,6 @@ fn main() -> Result<(), Box<dyn Error>> {
false, false,
universe, universe,
&None, &None,
&None,
GeoSortStrategy::default(), GeoSortStrategy::default(),
0, 0,
20, 20,
@@ -67,7 +66,6 @@ fn main() -> Result<(), Box<dyn Error>> {
&mut DefaultSearchLogger, &mut DefaultSearchLogger,
logger, logger,
TimeBudget::max(), TimeBudget::max(),
None,
)?; )?;
if let Some((logger, dir)) = detailed_logger { if let Some((logger, dir)) = detailed_logger {
logger.finish(&mut ctx, Path::new(dir))?; logger.finish(&mut ctx, Path::new(dir))?;

View File

@@ -95,7 +95,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read. /// `next_document` advance the document reader until all the documents have been read.
pub fn next_enriched_document( pub fn next_enriched_document(
&mut self, &mut self,
) -> Result<Option<EnrichedDocument<'_>>, DocumentsBatchCursorError> { ) -> Result<Option<EnrichedDocument>, DocumentsBatchCursorError> {
let document = self.documents.next_document()?; let document = self.documents.next_document()?;
let document_id = match self.external_ids.move_on_next()? { let document_id = match self.external_ids.move_on_next()? {
Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?, Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?,

View File

@@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes(); const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
/// Helper function to convert an obkv reader into a JSON object. /// Helper function to convert an obkv reader into a JSON object.
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> { pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
obkv.iter() obkv.iter()
.map(|(field_id, value)| { .map(|(field_id, value)| {
let field_name = index let field_name = index
@@ -64,7 +64,7 @@ impl DocumentsBatchIndex {
self.0.len() self.0.len()
} }
pub fn iter(&self) -> bimap::hash::Iter<'_, FieldId, String> { pub fn iter(&self) -> bimap::hash::Iter<FieldId, String> {
self.0.iter() self.0.iter()
} }
@@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
self.0.get_by_right(name).cloned() self.0.get_by_right(name).cloned()
} }
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> { pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
let mut map = Object::new(); let mut map = Object::new();
for (k, v) in document.iter() { for (k, v) in document.iter() {

View File

@@ -52,7 +52,7 @@ impl<'a> PrimaryKey<'a> {
pub fn document_id( pub fn document_id(
&self, &self,
document: &obkv::KvReader<'_, FieldId>, document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper, fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> { ) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self { match self {
@@ -166,7 +166,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserE
Some(s) => Ok(s.to_string()), Some(s) => Ok(s.to_string()),
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }), None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
}, },
Value::Number(number) if !number.is_f64() => Ok(number.to_string()), Value::Number(number) if number.is_i64() => Ok(number.to_string()),
content => Err(UserError::InvalidDocumentId { document_id: content }), content => Err(UserError::InvalidDocumentId { document_id: content }),
} }
} }

View File

@@ -76,7 +76,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read. /// `next_document` advance the document reader until all the documents have been read.
pub fn next_document( pub fn next_document(
&mut self, &mut self,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> { ) -> Result<Option<KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_next()? { match self.cursor.move_on_next()? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => { Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
Ok(Some(KvReader::new(value))) Ok(Some(KvReader::new(value)))
@@ -108,7 +108,7 @@ impl From<serde_json::Error> for DocumentsBatchCursorError {
impl error::Error for DocumentsBatchCursorError {} impl error::Error for DocumentsBatchCursorError {}
impl fmt::Display for DocumentsBatchCursorError { impl fmt::Display for DocumentsBatchCursorError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
DocumentsBatchCursorError::Grenad(e) => e.fmt(f), DocumentsBatchCursorError::Grenad(e) => e.fmt(f),
DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f), DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f),

View File

@@ -56,7 +56,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
Ok(Ok(())) Ok(Ok(()))
} }
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a documents, or a sequence of documents.") write!(f, "a documents, or a sequence of documents.")
} }
} }

View File

@@ -119,8 +119,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidVectorDimensions { expected: usize, found: usize }, InvalidVectorDimensions { expected: usize, found: usize },
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")] #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
InvalidVectorsMapType { document_id: String, value: Value }, InvalidVectorsMapType { document_id: String, value: Value },
#[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError },
#[error("{0}")] #[error("{0}")]
InvalidFilter(String), InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))] #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
@@ -136,17 +134,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
} }
)] )]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool }, InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field,
match .valid_fields.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
),
}
)]
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not facet-searchable. {}", #[error("Attribute `{}` is not facet-searchable. {}",
.field, .field,
match .valid_fields.is_empty() { match .valid_fields.is_empty() {
@@ -283,9 +270,8 @@ impl From<arroy::Error> for Error {
arroy::Error::DatabaseFull arroy::Error::DatabaseFull
| arroy::Error::InvalidItemAppend | arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. } | arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_) | arroy::Error::MissingNode
| arroy::Error::MissingKey { .. } | arroy::Error::MissingMetadata => {
| arroy::Error::MissingMetadata(_) => {
Error::InternalError(InternalError::ArroyError(value)) Error::InternalError(InternalError::ArroyError(value))
} }
} }

View File

@@ -24,21 +24,17 @@ impl ExternalDocumentsIds {
} }
/// Returns `true` if hard and soft external documents lists are empty. /// Returns `true` if hard and soft external documents lists are empty.
pub fn is_empty(&self, rtxn: &RoTxn<'_>) -> heed::Result<bool> { pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
self.0.is_empty(rtxn).map_err(Into::into) self.0.is_empty(rtxn).map_err(Into::into)
} }
pub fn get<A: AsRef<str>>( pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
&self,
rtxn: &RoTxn<'_>,
external_id: A,
) -> heed::Result<Option<u32>> {
self.0.get(rtxn, external_id.as_ref()) self.0.get(rtxn, external_id.as_ref())
} }
/// An helper function to debug this type, returns an `HashMap` of both, /// An helper function to debug this type, returns an `HashMap` of both,
/// soft and hard fst maps, combined. /// soft and hard fst maps, combined.
pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashMap<String, u32>> { pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
let mut map = HashMap::default(); let mut map = HashMap::default();
for result in self.0.iter(rtxn)? { for result in self.0.iter(rtxn)? {
let (external, internal) = result?; let (external, internal) = result?;
@@ -55,11 +51,7 @@ impl ExternalDocumentsIds {
/// ///
/// - If attempting to delete a document that doesn't exist /// - If attempting to delete a document that doesn't exist
/// - If attempting to create a document that already exists /// - If attempting to create a document that already exists
pub fn apply( pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
&self,
wtxn: &mut RwTxn<'_>,
operations: Vec<DocumentOperation>,
) -> heed::Result<()> {
for DocumentOperation { external_id, internal_id, kind } in operations { for DocumentOperation { external_id, internal_id, kind } in operations {
match kind { match kind {
DocumentOperationKind::Create => { DocumentOperationKind::Create => {
@@ -77,7 +69,7 @@ impl ExternalDocumentsIds {
} }
/// Returns an iterator over all the external ids. /// Returns an iterator over all the external ids.
pub fn iter<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<RoIter<'t, Str, BEU32>> { pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
self.0.iter(rtxn) self.0.iter(rtxn)
} }
} }

View File

@@ -11,7 +11,7 @@ pub enum FacetType {
} }
impl fmt::Display for FacetType { impl fmt::Display for FacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
FacetType::String => f.write_str("string"), FacetType::String => f.write_str("string"),
FacetType::Number => f.write_str("number"), FacetType::Number => f.write_str("number"),
@@ -37,7 +37,7 @@ impl FromStr for FacetType {
pub struct InvalidFacetType; pub struct InvalidFacetType;
impl fmt::Display for InvalidFacetType { impl fmt::Display for InvalidFacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(r#"Invalid facet type, must be "string" or "number""#) f.write_str(r#"Invalid facet type, must be "string" or "number""#)
} }
} }

View File

@@ -4,7 +4,6 @@ use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
use crate::{FieldId, FieldsIdsMap, Weight}; use crate::{FieldId, FieldsIdsMap, Weight};
#[derive(Debug, Default, Serialize, Deserialize)] #[derive(Debug, Default, Serialize, Deserialize)]
@@ -24,13 +23,7 @@ impl FieldidsWeightsMap {
/// Should only be called in the case there are NO searchable attributes. /// Should only be called in the case there are NO searchable attributes.
/// All the fields will be inserted in the order of the fields ids map with a weight of 0. /// All the fields will be inserted in the order of the fields ids map with a weight of 0.
pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self { pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
FieldidsWeightsMap { FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() }
map: fid_map
.iter()
.filter(|(_fid, name)| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
.map(|(fid, _name)| (fid, 0))
.collect(),
}
} }
/// Removes a field id from the map, returning the associated weight previously in the map. /// Removes a field id from the map, returning the associated weight previously in the map.

View File

@@ -41,16 +41,6 @@ impl FieldsIdsMap {
} }
} }
/// Get the ids of a field and all its nested fields based on its name.
pub fn nested_ids(&self, name: &str) -> Vec<FieldId> {
self.names_ids
.range(name.to_string()..)
.take_while(|(key, _)| key.starts_with(name))
.filter(|(key, _)| crate::is_faceted_by(key, name))
.map(|(_name, id)| *id)
.collect()
}
/// Get the id of a field based on its name. /// Get the id of a field based on its name.
pub fn id(&self, name: &str) -> Option<FieldId> { pub fn id(&self, name: &str) -> Option<FieldId> {
self.names_ids.get(name).copied() self.names_ids.get(name).copied()
@@ -136,32 +126,4 @@ mod tests {
assert_eq!(iter.next(), Some((3, "title"))); assert_eq!(iter.next(), Some((3, "title")));
assert_eq!(iter.next(), None); assert_eq!(iter.next(), None);
} }
#[test]
fn nested_fields() {
let mut map = FieldsIdsMap::new();
assert_eq!(map.insert("id"), Some(0));
assert_eq!(map.insert("doggo"), Some(1));
assert_eq!(map.insert("doggo.name"), Some(2));
assert_eq!(map.insert("doggolution"), Some(3));
assert_eq!(map.insert("doggo.breed.name"), Some(4));
assert_eq!(map.insert("description"), Some(5));
insta::assert_debug_snapshot!(map.nested_ids("doggo"), @r###"
[
1,
4,
2,
]
"###);
insta::assert_debug_snapshot!(map.nested_ids("doggo.breed"), @r###"
[
4,
]
"###);
insta::assert_debug_snapshot!(map.nested_ids("_vector"), @"[]");
}
} }

View File

@@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec { impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
type EItem = (u16, &'a str); type EItem = (u16, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 2); let mut bytes = Vec::with_capacity(s.len() + 2);
bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes()); bytes.extend_from_slice(s.as_bytes());

View File

@@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec { impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
type EItem = (u32, &'a str); type EItem = (u32, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 4); let mut bytes = Vec::with_capacity(s.len() + 4);
bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes()); bytes.extend_from_slice(s.as_bytes());

View File

@@ -35,7 +35,7 @@ where
fn bytes_encode( fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem, (field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<'a, [u8]>, BoxedError> { ) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32); let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes

View File

@@ -47,12 +47,6 @@ pub struct FacetGroupValue {
pub bitmap: RoaringBitmap, pub bitmap: RoaringBitmap,
} }
#[derive(Debug)]
pub struct FacetGroupLazyValue<'b> {
pub size: u8,
pub bitmap_bytes: &'b [u8],
}
pub struct FacetGroupKeyCodec<T> { pub struct FacetGroupKeyCodec<T> {
_phantom: PhantomData<T>, _phantom: PhantomData<T>,
} }
@@ -75,7 +69,6 @@ where
Ok(Cow::Owned(v)) Ok(Cow::Owned(v))
} }
} }
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
where where
T: BytesDecode<'a>, T: BytesDecode<'a>,
@@ -91,7 +84,6 @@ where
} }
pub struct FacetGroupValueCodec; pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue; type EItem = FacetGroupValue;
@@ -101,23 +93,11 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
Ok(Cow::Owned(v)) Ok(Cow::Owned(v))
} }
} }
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue; type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0]; let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Ok(FacetGroupValue { size, bitmap }) Ok(FacetGroupValue { size, bitmap })
} }
} }
pub struct FacetGroupLazyValueCodec;
impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
type DItem = FacetGroupLazyValue<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
}
}

View File

@@ -24,7 +24,7 @@ impl<'a> BytesDecode<'a> for OrderedF64Codec {
impl heed::BytesEncode<'_> for OrderedF64Codec { impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64; type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> { fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buffer = [0u8; 16]; let mut buffer = [0u8; 16];
// write the globally ordered float // write the globally ordered float

Some files were not shown because too many files have changed in this diff Show More